diff --git a/.github/scripts/merge-yaml.py b/.github/scripts/merge-yaml.py new file mode 100644 index 0000000000..1cc1e10979 --- /dev/null +++ b/.github/scripts/merge-yaml.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 + +# Copyright NVIDIA CORPORATION +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import pathlib + +import yaml + + +def load_yaml(path): + with path.open() as f: + return yaml.safe_load(f) or {} + + +def merge_yaml(base, override): + for key, value in override.items(): + if isinstance(value, dict) and isinstance(base.get(key), dict): + merge_yaml(base[key], value) + else: + base[key] = value + return base + + +def main(): + parser = argparse.ArgumentParser( + description="Merge one YAML file into another and write the merged result." + ) + parser.add_argument("--base", required=True, type=pathlib.Path) + parser.add_argument("--override", required=True, type=pathlib.Path) + parser.add_argument("--output", required=True, type=pathlib.Path) + args = parser.parse_args() + + merged = merge_yaml(load_yaml(args.base), load_yaml(args.override)) + with args.output.open("w") as f: + yaml.safe_dump(merged, f, default_flow_style=False, sort_keys=False) + + print(f"Merged {args.override} into {args.output}") + + +if __name__ == "__main__": + main() diff --git a/.github/scripts/update-csv-images.py b/.github/scripts/update-csv-images.py new file mode 100644 index 0000000000..dedf7286a5 --- /dev/null +++ b/.github/scripts/update-csv-images.py @@ -0,0 +1,390 @@ +#!/usr/bin/env python3 + +# Copyright NVIDIA CORPORATION +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import json +import pathlib +import re +import subprocess +import sys + +import yaml + + +RELATED_IMAGE_COMPONENTS = { + "gpu-operator-image": "operator", + "gpu-operator-validator-image": "validator", + "dcgm-exporter-image": "dcgmExporter", + "dcgm-image": "dcgm", + "container-toolkit-image": "toolkit", + "device-plugin-image": "devicePlugin", + "gpu-feature-discovery-image": "gfd", + "mig-manager-image": "migManager", + "k8s-driver-manager-image": "driver.manager", + "vfio-manager-image": "vfioManager", + "cc-manager-image": "ccManager", + "sandbox-device-plugin-image": "sandboxDevicePlugin", + "kata-sandbox-device-plugin-image": "kataSandboxDevicePlugin", + "vgpu-device-manager-image": "vgpuDeviceManager", + "gdrcopy-image": "gdrcopy", +} + +ENV_IMAGE_COMPONENTS = { + "VALIDATOR_IMAGE": "validator", + "GFD_IMAGE": "gfd", + "CONTAINER_TOOLKIT_IMAGE": "toolkit", + "DCGM_IMAGE": "dcgm", + "DCGM_EXPORTER_IMAGE": "dcgmExporter", + "DEVICE_PLUGIN_IMAGE": "devicePlugin", + "DRIVER_MANAGER_IMAGE": "driver.manager", + "MIG_MANAGER_IMAGE": "migManager", + "VFIO_MANAGER_IMAGE": "vfioManager", + "CC_MANAGER_IMAGE": "ccManager", + "SANDBOX_DEVICE_PLUGIN_IMAGE": "sandboxDevicePlugin", + "KATA_SANDBOX_DEVICE_PLUGIN_IMAGE": "kataSandboxDevicePlugin", + "VGPU_DEVICE_MANAGER_IMAGE": "vgpuDeviceManager", + "GDRCOPY_IMAGE": "gdrcopy", +} + +OS_SPECIFIC_COMPONENTS = {"driver", "gdrcopy"} +DRIVER_RELATED_IMAGES = { + "default": "driver-image", + "535": "driver-image-535", + "580": "driver-image-580", +} +DRIVER_ENV_IMAGES = { + "default": "DRIVER_IMAGE", + "535": "DRIVER_IMAGE-535", + "580": "DRIVER_IMAGE-580", +} +RELEASE_TAG_PATTERN = re.compile( + r"^v?(?P[0-9]+)\.(?P[0-9]+)\.(?P[0-9]+)" + r"(?:-rc\.(?P[0-9]+))?$" +) + + +def load_yaml(path): + with path.open() as f: + return yaml.safe_load(f) or {} + + +def component(values, path): + current = values + for part in path.split("."): + current = current.get(part, {}) + return current + + +def driver_image_slot(values): + version = str(component(values, "driver").get("version") or "").strip() + if version.startswith("580"): + return "580" + if version.startswith("535"): + return "535" + return "default" + + +def os_image_ref_candidates( + values, + component_path, + olm_bundle_os_suffix, +): + data = component(values, component_path) + repository = (data.get("repository") or "").strip() + image = (data.get("image") or "").strip() + version = (data.get("version") or "").strip() + + if not repository: + if image and ("/" in image or "@" in image): + return [image] + return None + + if not image or not version: + return [] + + if version.startswith("sha256:"): + return [f"{repository}/{image}@{version}"] + + tag = version + if olm_bundle_os_suffix and not tag.endswith(f"-{olm_bundle_os_suffix}"): + tag = f"{tag}-{olm_bundle_os_suffix}" + + return [f"{repository}/{image}:{tag}"] + + +def build_image_ref_candidates( + values, + chart, + component_path, + olm_bundle_os_suffix, +): + if component_path in OS_SPECIFIC_COMPONENTS: + return os_image_ref_candidates( + values, + component_path, + olm_bundle_os_suffix, + ) + + data = component(values, component_path) + repository = (data.get("repository") or "").strip() + image = (data.get("image") or "").strip() + version = data.get("version") + + if repository: + version = (version or "").strip() + if not image or not version: + return [] + return [f"{repository}/{image}:{version}"] + + # Some override files provide a fully-qualified image in the image field. + if image and ("/" in image or "@" in image): + return [image] + + return [] + + +def resolve_digest(image_ref, skip_digest): + if not image_ref or skip_digest: + return image_ref + + image_ref_without_digest = image_ref.split("@", 1)[0] + image_name = image_ref_without_digest.rsplit("/", 1)[-1] + if ":" not in image_name: + return image_ref + + digest = subprocess.check_output( + ["regctl", "image", "digest", image_ref_without_digest], + text=True, + ).strip() + return f"{image_ref_without_digest}@{digest}" + + +def resolve_digest_candidates(image_refs, skip_digest): + last_error = None + for image_ref in image_refs: + try: + return resolve_digest(image_ref, skip_digest) + except subprocess.CalledProcessError as exc: + last_error = exc + print(f"Warning: failed to resolve digest for {image_ref}.", file=sys.stderr) + + if last_error: + raise last_error + return None + + +def strip_tag_from_digest_ref(image_ref): + if not image_ref or "@sha256:" not in image_ref: + return image_ref + + image_ref_without_digest, digest = image_ref.split("@", 1) + last_slash = image_ref_without_digest.rfind("/") + last_colon = image_ref_without_digest.rfind(":") + if last_colon > last_slash: + image_ref_without_digest = image_ref_without_digest[:last_colon] + + return f"{image_ref_without_digest}@{digest}" + + +def split_repository_image_version(image_ref): + if "@sha256:" in image_ref: + image_ref_without_digest, digest = image_ref.split("@", 1) + else: + image_ref_without_digest = image_ref + digest = "" + + repository_and_image, separator, tag = image_ref_without_digest.rpartition(":") + if "/" not in tag: + image_path = repository_and_image + else: + image_path = image_ref_without_digest + tag = "" + + if "/" not in image_path: + version = f"{tag}@{digest}" if tag and digest else digest or tag + return "", image_path, version + + repository, image = image_path.rsplit("/", 1) + version = f"{tag}@{digest}" if tag and digest else digest or tag + return repository, image, version + + +def update_alm_examples(csv, image_refs): + annotations = csv.get("metadata", {}).get("annotations", {}) + alm_examples = annotations.get("alm-examples") + driver_ref = image_refs.get("driver") + if not alm_examples or not driver_ref: + return + + examples = json.loads(alm_examples) + driver_repository, driver_image, driver_version = split_repository_image_version(driver_ref) + + for example in examples: + if example.get("kind") != "NVIDIADriver": + continue + spec = example.setdefault("spec", {}) + spec["repository"] = driver_repository + spec["image"] = driver_image + spec["version"] = driver_version + + annotations["alm-examples"] = json.dumps(examples, indent=2) + + +def update_operator_deployment(csv, operator_ref, env_refs): + install = csv.get("spec", {}).get("install", {}) + for deployment in install.get("spec", {}).get("deployments", []): + spec = deployment.get("spec", {}).get("template", {}).get("spec", {}) + for container in spec.get("containers", []): + if container.get("name") != "gpu-operator": + continue + if operator_ref: + container["image"] = operator_ref + for env in container.get("env", []): + value = env_refs.get(env.get("name")) + if value: + env["value"] = value + + +def csv_name_for_version(version): + return f"gpu-operator-certified.v{version}" + + +def release_metadata_from_tag(release_tag): + match = RELEASE_TAG_PATTERN.match((release_tag or "").strip()) + if not match: + raise ValueError( + f"release tag must look like v26.3.2 or v26.3.2-rc.1: {release_tag}" + ) + + major = int(match.group("major")) + minor = int(match.group("minor")) + patch = int(match.group("patch")) + rc = match.group("rc") + + final_version = f"{major}.{minor}.{patch}" + csv_version = f"{final_version}-rc.{rc}" if rc is not None else final_version + + if rc is not None and int(rc) > 1: + replaces_version = f"{final_version}-rc.{int(rc) - 1}" + elif patch > 0: + replaces_version = f"{major}.{minor}.{patch - 1}" + else: + replaces_version = None + + return { + "version": csv_version, + "replaces": csv_name_for_version(replaces_version) if replaces_version else None, + "skip_range": f">=1.9.0 <{final_version}", + } + + +def update_release_metadata(csv, release_tag): + if not release_tag: + return + + metadata = release_metadata_from_tag(release_tag) + csv_metadata = csv.setdefault("metadata", {}) + csv_metadata["name"] = csv_name_for_version(metadata["version"]) + annotations = csv_metadata.setdefault("annotations", {}) + annotations["olm.skipRange"] = metadata["skip_range"] + + spec = csv.setdefault("spec", {}) + spec["version"] = metadata["version"] + if metadata["replaces"]: + spec["replaces"] = metadata["replaces"] + + +def main(): + parser = argparse.ArgumentParser( + description="Update OLM CSV image references from Helm values." + ) + parser.add_argument("--values", required=True, type=pathlib.Path) + parser.add_argument("--chart", required=True, type=pathlib.Path) + parser.add_argument("--csv", required=True, type=pathlib.Path) + parser.add_argument( + "--olm-bundle-os-suffix", + dest="olm_bundle_os_suffix", + default="rhel9.6", + ) + parser.add_argument( + "--release-tag", + default="", + help="Release tag used to update CSV version, replaces, and olm.skipRange.", + ) + parser.add_argument("--skip-digest", action="store_true") + args = parser.parse_args() + + values = load_yaml(args.values) + chart = load_yaml(args.chart) + csv = load_yaml(args.csv) + + image_refs = {} + component_names = set(RELATED_IMAGE_COMPONENTS.values()) | set(ENV_IMAGE_COMPONENTS.values()) + component_names.add("operator") + component_names.add("driver") + + for component_name in sorted(component_names): + image_ref_candidates = build_image_ref_candidates( + values, + chart, + component_name, + args.olm_bundle_os_suffix.strip(), + ) + if image_ref_candidates: + image_ref = resolve_digest_candidates( + image_ref_candidates, + args.skip_digest, + ) + if component_name in OS_SPECIFIC_COMPONENTS: + image_ref = strip_tag_from_digest_ref(image_ref) + image_refs[component_name] = image_ref + + operator_ref = image_refs.get("operator") + if operator_ref: + csv.setdefault("metadata", {}).setdefault("annotations", {})["containerImage"] = operator_ref + + related_images = csv.get("spec", {}).get("relatedImages", []) + driver_slot = driver_image_slot(values) + driver_related_image_name = DRIVER_RELATED_IMAGES[driver_slot] + for item in related_images: + item_name = item.get("name") + if item_name == driver_related_image_name and "driver" in image_refs: + item["image"] = image_refs["driver"] + continue + + component_name = RELATED_IMAGE_COMPONENTS.get(item_name) + if component_name and component_name in image_refs: + item["image"] = image_refs[component_name] + + env_refs = { + env_name: image_refs[component_name] + for env_name, component_name in ENV_IMAGE_COMPONENTS.items() + if component_name in image_refs + } + driver_env_name = DRIVER_ENV_IMAGES[driver_slot] + if "driver" in image_refs: + env_refs[driver_env_name] = image_refs["driver"] + + update_operator_deployment(csv, operator_ref, env_refs) + update_alm_examples(csv, image_refs) + update_release_metadata(csv, args.release_tag) + + with args.csv.open("w") as f: + yaml.safe_dump(csv, f, default_flow_style=False, sort_keys=False) + + +if __name__ == "__main__": + main() diff --git a/.github/workflows/release-image-list.yaml b/.github/workflows/release-image-list.yaml index 3b707967e3..2f5ed6cd8c 100644 --- a/.github/workflows/release-image-list.yaml +++ b/.github/workflows/release-image-list.yaml @@ -23,6 +23,7 @@ permissions: jobs: generate-image-list: + if: ${{ !github.event.release.prerelease && !contains(github.event.release.tag_name, '-rc.') }} runs-on: ubuntu-latest timeout-minutes: 5 permissions: diff --git a/.github/workflows/release-rc-assets.yaml b/.github/workflows/release-rc-assets.yaml new file mode 100644 index 0000000000..ae969d9211 --- /dev/null +++ b/.github/workflows/release-rc-assets.yaml @@ -0,0 +1,398 @@ +# Copyright NVIDIA CORPORATION +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Manual usage: +# 1. In GitHub Actions, choose this workflow and select the branch that should +# produce the RC artifacts. The workflow runs on the selected branch head. +# 2. Set release_tag to the artifact version, for example v26.7.0-rc.1. +# No Git tag is required or created. +# 3. Optionally set helm_values_override_b64 to base64-encoded Helm values YAML +# for image overrides. Include operator, validator, and nodeStatusExporter +# when overriding the gpu-operator image location or tag. +# Example values-overrides.yaml: +# operator: +# repository: ghcr.io/nvidia +# image: gpu-operator +# version: 4d29f656 +# validator: +# repository: ghcr.io/nvidia +# image: gpu-operator +# version: 4d29f656 +# nodeStatusExporter: +# repository: ghcr.io/nvidia +# image: gpu-operator +# version: 4d29f656 +# Encode it with: +# base64 -w0 values-overrides.yaml +# On macOS/BSD base64, use: +# base64 < values-overrides.yaml | tr -d '\n' +# 4. Optional input: +# - olm_bundle_os_suffix defaults to rhel9.6. +# 5. Start the manual workflow. The Helm OCI chart and OLM bundle image are +# pushed to GHCR with release_tag, and dist/ is uploaded as a workflow +# artifact. +# +name: Release RC Assets + +on: + workflow_dispatch: + inputs: + release_tag: + description: "RC release tag, for example v26.7.0-rc.1" + required: true + type: string + helm_values_override_b64: + description: "Base64-encoded Helm values override YAML" + required: false + type: string + olm_bundle_os_suffix: + description: "OS suffix used for OLM bundle driver and GDRCopy images" + required: false + default: "rhel9.6" + type: string + +permissions: {} + +jobs: + package-rc-assets: + runs-on: ubuntu-latest + timeout-minutes: 30 + permissions: + contents: read + packages: write + outputs: + artifact_name: ${{ steps.package.outputs.artifact_name }} + release_tag: ${{ steps.validate.outputs.release_tag }} + helm_values_override_asset: ${{ steps.package.outputs.helm_values_override_asset }} + image_list_asset: ${{ steps.package.outputs.image_list_asset }} + steps: + - uses: actions/checkout@v7 + name: Check out code + with: + persist-credentials: false + + - name: Validate inputs + id: validate + env: + RELEASE_TAG: ${{ inputs.release_tag }} + run: | + set -euo pipefail + + if [[ ! "${RELEASE_TAG}" =~ ^v?[0-9]+\.[0-9]+\.[0-9]+-rc\.[0-9]+$ ]]; then + echo "::error::release_tag must look like v26.3.2-rc.1" + exit 1 + fi + + echo "release_tag=${RELEASE_TAG}" >> "${GITHUB_OUTPUT}" + echo "::notice::Release tag: ${RELEASE_TAG}" + echo "::notice::Source commit: ${GITHUB_SHA}" + + - name: Validate package inputs + id: package-inputs + env: + HELM_VALUES_OVERRIDE_B64: ${{ inputs.helm_values_override_b64 }} + run: | + set -euo pipefail + + OVERRIDE_FILE="" + if [[ -n "${HELM_VALUES_OVERRIDE_B64}" ]]; then + OVERRIDE_FILE="${RUNNER_TEMP}/helm-values-overrides.yaml" + printf '%s' "${HELM_VALUES_OVERRIDE_B64}" | base64 --decode > "${OVERRIDE_FILE}" + fi + + if [[ ! -d bundle ]]; then + echo "::error::OLM bundle path does not exist or is not a directory: bundle" + exit 1 + fi + + echo "helm_values_override=${OVERRIDE_FILE}" >> "${GITHUB_OUTPUT}" + + - name: Set up Helm + uses: azure/setup-helm@v5 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: "3.12" + + - name: Get Golang version + run: | + GOLANG_VERSION=$(grep "GOLANG_VERSION ?=" versions.mk) + echo "GOLANG_VERSION=${GOLANG_VERSION##GOLANG_VERSION ?= }" >> "${GITHUB_ENV}" + + - name: Install Go + uses: actions/setup-go@v6 + with: + go-version: ${{ env.GOLANG_VERSION }} + + - name: Install PyYAML + run: pip install --quiet pyyaml + + - name: Install regctl + uses: regclient/actions/regctl-installer@148669fe4b19151fcab6e00c6df2db43b9e2b097 + with: + release: v0.11.5 + + - name: Install Operator SDK + run: | + set -euo pipefail + + OPERATOR_SDK_VERSION="$(awk -F': ' '/operators.operatorframework.io.metrics.builder:/ { print $2 }' bundle/metadata/annotations.yaml | sed 's/^operator-sdk-//')" + if [[ -z "${OPERATOR_SDK_VERSION}" ]]; then + echo "::error::OPERATOR_SDK_VERSION not found in bundle metadata" + exit 1 + fi + + curl -sSLo operator-sdk \ + "https://github.com/operator-framework/operator-sdk/releases/download/${OPERATOR_SDK_VERSION}/operator-sdk_linux_amd64" + chmod +x operator-sdk + sudo mv operator-sdk /usr/local/bin/operator-sdk + operator-sdk version + + - name: Login to GitHub Container Registry + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: regctl registry login ghcr.io -u "${GITHUB_ACTOR}" -p "${GH_TOKEN}" + + - name: Login to GitHub Container Registry for Helm + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: echo "${GH_TOKEN}" | helm registry login ghcr.io -u "${GITHUB_ACTOR}" --password-stdin + + - name: Login to GitHub Container Registry for Docker + uses: docker/login-action@v4 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up QEMU + uses: docker/setup-qemu-action@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v4 + + - name: Apply Helm values override + if: ${{ steps.package-inputs.outputs.helm_values_override != '' }} + env: + HELM_VALUES_OVERRIDE: ${{ steps.package-inputs.outputs.helm_values_override }} + run: | + python3 .github/scripts/merge-yaml.py \ + --base deployments/gpu-operator/values.yaml \ + --override "${HELM_VALUES_OVERRIDE}" \ + --output deployments/gpu-operator/values.yaml + + - name: Lint Helm chart + run: helm lint deployments/gpu-operator + + - name: Update OLM bundle images and release metadata + env: + RELEASE_TAG: ${{ steps.validate.outputs.release_tag }} + OLM_BUNDLE_OS_SUFFIX: ${{ inputs.olm_bundle_os_suffix }} + run: | + python3 .github/scripts/update-csv-images.py \ + --release-tag "${RELEASE_TAG}" \ + --olm-bundle-os-suffix "${OLM_BUNDLE_OS_SUFFIX}" \ + --values deployments/gpu-operator/values.yaml \ + --chart deployments/gpu-operator/Chart.yaml \ + --csv bundle/manifests/gpu-operator-certified.clusterserviceversion.yaml + + - name: Validate updated OLM bundle + env: + RELEASE_TAG: ${{ steps.validate.outputs.release_tag }} + CSV: bundle/manifests/gpu-operator-certified.clusterserviceversion.yaml + run: | + set -euo pipefail + + python3 - <<'PY' + import os + import re + import sys + + import yaml + + release_tag = os.environ["RELEASE_TAG"] + csv_path = os.environ["CSV"] + match = re.fullmatch(r"v?([0-9]+)\.([0-9]+)\.([0-9]+)-rc\.([0-9]+)", release_tag) + if not match: + print(f"::error::release tag must look like v26.3.2-rc.1: {release_tag}") + sys.exit(1) + + major, minor, patch, rc = (int(part) for part in match.groups()) + final_version = f"{major}.{minor}.{patch}" + expected_version = f"{final_version}-rc.{rc}" + expected_name = f"gpu-operator-certified.v{expected_version}" + expected_skip_range = f">=1.9.0 <{final_version}" + + with open(csv_path) as f: + csv = yaml.safe_load(f) + + annotations = csv.get("metadata", {}).get("annotations", {}) + spec = csv.get("spec", {}) + checks = { + "metadata.name": ( + csv.get("metadata", {}).get("name"), + expected_name, + ), + "metadata.annotations.olm.skipRange": ( + annotations.get("olm.skipRange"), + expected_skip_range, + ), + "spec.version": (str(spec.get("version")), expected_version), + } + + failed = False + for field, (actual, expected) in checks.items(): + if actual != expected: + print(f"::error::{field} is {actual!r}; expected {expected!r}") + failed = True + + if failed: + sys.exit(1) + + print( + "::notice::Verified OLM release metadata: " + f"name={expected_name}, version={expected_version}, " + f"replaces={spec.get('replaces')}, " + f"olm.skipRange='{expected_skip_range}'" + ) + PY + + operator-sdk bundle validate ./bundle + make validate-csv + + - name: Build and push OLM bundle image + env: + RELEASE_TAG: ${{ steps.validate.outputs.release_tag }} + GH_REPOSITORY: ${{ github.repository }} + VERSION: "" + DEFAULT_CHANNEL: "stable" + CHANNELS: "stable" + BUNDLE_IMAGE_BUILD_PLATFORM_OPTIONS: "--platform=linux/amd64,linux/arm64 --push" + run: | + REPOSITORY="$(echo "${GH_REPOSITORY}" | tr '[:upper:]' '[:lower:]')" + BUNDLE_IMAGE_BASE="ghcr.io/${REPOSITORY}/gpu-operator-bundle" + make build-bundle-image DOCKER="docker buildx" BUNDLE_IMAGE="${BUNDLE_IMAGE_BASE}:${RELEASE_TAG}" + + - name: Package RC assets + id: package + env: + RELEASE_TAG: ${{ steps.validate.outputs.release_tag }} + HELM_VALUES_OVERRIDE: ${{ steps.package-inputs.outputs.helm_values_override }} + run: | + set -euo pipefail + + ARTIFACT_NAME="gpu-operator-rc-assets-${RELEASE_TAG}" + DIST_DIR="dist" + mkdir -p "${DIST_DIR}" + + IMAGE_LIST_ASSET="gpu-operator-images.txt" + VALUES_ASSET="values-${RELEASE_TAG}.yaml" + CSV_ASSET="gpu-operator-certified-${RELEASE_TAG}.clusterserviceversion.yaml" + VALUES_OVERRIDE_ASSET="" + if [[ -n "${HELM_VALUES_OVERRIDE}" ]]; then + VALUES_OVERRIDE_ASSET="helm-values-overrides-${RELEASE_TAG}.yaml" + cp "${HELM_VALUES_OVERRIDE}" "${DIST_DIR}/${VALUES_OVERRIDE_ASSET}" + fi + + cp deployments/gpu-operator/values.yaml "${DIST_DIR}/${VALUES_ASSET}" + cp bundle/manifests/gpu-operator-certified.clusterserviceversion.yaml "${DIST_DIR}/${CSV_ASSET}" + + python3 .github/scripts/generate-image-list.py \ + --gpu-operator-version "${RELEASE_TAG}" \ + --values deployments/gpu-operator/values.yaml \ + --chart deployments/gpu-operator/Chart.yaml \ + --nfd-values deployments/gpu-operator/charts/node-feature-discovery/values.yaml \ + --nfd-chart deployments/gpu-operator/charts/node-feature-discovery/Chart.yaml \ + --output "${DIST_DIR}/${IMAGE_LIST_ASSET}" + + helm package \ + --version "${RELEASE_TAG}" \ + --app-version "${RELEASE_TAG}" \ + --destination "${DIST_DIR}" \ + deployments/gpu-operator + + ( + cd "${DIST_DIR}" + sha256sum * > SHA256SUMS + ) + + echo "artifact_name=${ARTIFACT_NAME}" >> "${GITHUB_OUTPUT}" + echo "helm_values_override_asset=${VALUES_OVERRIDE_ASSET}" >> "${GITHUB_OUTPUT}" + echo "image_list_asset=${IMAGE_LIST_ASSET}" >> "${GITHUB_OUTPUT}" + echo "::notice::Packaged assets:" + ls -lah "${DIST_DIR}" + + - name: Publish Helm OCI chart + env: + RELEASE_TAG: ${{ steps.validate.outputs.release_tag }} + GH_REPOSITORY: ${{ github.repository }} + run: | + set -euo pipefail + + REPOSITORY="$(echo "${GH_REPOSITORY}" | tr '[:upper:]' '[:lower:]')" + CHART_PACKAGE="dist/gpu-operator-${RELEASE_TAG}.tgz" + CHART_REPOSITORY="oci://ghcr.io/${REPOSITORY}/charts" + + if [[ ! -f "${CHART_PACKAGE}" ]]; then + echo "::error::Helm chart package does not exist: ${CHART_PACKAGE}" + exit 1 + fi + + helm push "${CHART_PACKAGE}" "${CHART_REPOSITORY}" + echo "::notice::Published Helm chart to ${CHART_REPOSITORY}/gpu-operator:${RELEASE_TAG}" + + - name: Verify expected images are available + env: + RELEASE_TAG: ${{ steps.validate.outputs.release_tag }} + GH_REPOSITORY: ${{ github.repository }} + IMAGE_LIST_ASSET: ${{ steps.package.outputs.image_list_asset }} + run: | + set -euo pipefail + + IMAGE_LIST="dist/${IMAGE_LIST_ASSET}" + if [[ ! -s "${IMAGE_LIST}" ]]; then + echo "::error::Image list asset does not exist or is empty: ${IMAGE_LIST}" + exit 1 + fi + + REPOSITORY="$(echo "${GH_REPOSITORY}" | tr '[:upper:]' '[:lower:]')" + BUNDLE_IMAGE="ghcr.io/${REPOSITORY}/gpu-operator-bundle:${RELEASE_TAG}" + + missing_images=() + while IFS= read -r image || [[ -n "${image}" ]]; do + [[ -z "${image}" ]] && continue + if ! regctl manifest head "${image}" >/dev/null; then + missing_images+=("${image}") + fi + done < "${IMAGE_LIST}" + + if ! regctl manifest head "${BUNDLE_IMAGE}" >/dev/null; then + missing_images+=("${BUNDLE_IMAGE}") + fi + + if (( ${#missing_images[@]} > 0 )); then + echo "::error::The following expected images are not available:" + printf ' %s\n' "${missing_images[@]}" + exit 1 + fi + + echo "::notice::Verified all expected images are available, including ${BUNDLE_IMAGE}." + + - name: Upload packaged assets artifact + uses: actions/upload-artifact@v7 + with: + name: ${{ steps.package.outputs.artifact_name }} + path: dist/* + if-no-files-found: error