Skip to content

Generate SBOM for CNCF Projects #17

Generate SBOM for CNCF Projects

Generate SBOM for CNCF Projects #17

Workflow file for this run

name: Generate SBOM for CNCF Projects
on:
schedule:
# Run weekly on Sunday at 02:00 UTC
- cron: '0 2 * * 0'
workflow_dispatch:
inputs:
project_filter:
description: 'Filter by owner/repo (e.g., "kubernetes/kubernetes"), leave empty for all'
required: false
default: ''
force_regenerate:
description: 'Force regenerate existing SBOMs'
required: false
default: false
type: boolean
releases_mode:
description: 'Which releases to process'
required: false
default: 'recent'
type: choice
options:
- recent # Only releases from the past week
- latest # Latest N releases regardless of date
max_releases:
description: 'Maximum number of releases to process per repo (only used with "latest" mode)'
required: false
default: '3'
type: choice
options:
- '1'
- '2'
- '3'
- '4'
- '5'
source:
description: 'Which repositories to process'
required: false
default: 'all'
type: choice
options:
- all # Both CNCF projects and discovered repos
- cncf # Only official CNCF projects
- discovered # Only discovered repos from CNCF orgs
env:
MIKEBOM_VERSION: v0.1.0-alpha.9
permissions:
contents: read
jobs:
# ===========================================
# Prepare matrix for official CNCF projects
# ===========================================
prepare-cncf-matrix:
runs-on: ubuntu-latest
if: ${{ github.event.inputs.source != 'discovered' }}
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
has_repos: ${{ steps.set-matrix.outputs.has_repos }}
steps:
- name: Checkout repository
uses: actions/checkout@v5
- name: Setup yq
run: |
sudo wget -qO /usr/local/bin/yq https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64
sudo chmod +x /usr/local/bin/yq
- name: Generate CNCF projects matrix
id: set-matrix
run: |
FILTER="${{ github.event.inputs.project_filter }}"
PROJECTS_FILE="util/data/cncf-projects.yaml"
if [ ! -f "$PROJECTS_FILE" ]; then
echo "Error: $PROJECTS_FILE not found. Run the sync-cncf-projects workflow first."
exit 1
fi
if [ -n "$FILTER" ]; then
OWNER=$(echo "$FILTER" | cut -d'/' -f1)
REPO=$(echo "$FILTER" | cut -d'/' -f2)
MATRIX=$(yq -o=json '.repositories | map(select(.owner == "'"$OWNER"'" and .repo == "'"$REPO"'")) | {"include": .}' "$PROJECTS_FILE" | jq -c .)
else
MATRIX=$(yq -o=json '{"include": .repositories}' "$PROJECTS_FILE" | jq -c .)
fi
REPO_COUNT=$(echo "$MATRIX" | jq '.include | length')
echo "matrix=$MATRIX" >> $GITHUB_OUTPUT
echo "has_repos=$( [ "$REPO_COUNT" -gt 0 ] && echo 'true' || echo 'false' )" >> $GITHUB_OUTPUT
echo "Generated CNCF matrix with $REPO_COUNT repositories"
# ===========================================
# Prepare matrix for discovered repos (split into batches due to 256 limit)
# ===========================================
prepare-discovered-matrix:
runs-on: ubuntu-latest
if: ${{ github.event.inputs.source != 'cncf' }}
outputs:
matrix1: ${{ steps.set-matrix.outputs.matrix1 }}
matrix2: ${{ steps.set-matrix.outputs.matrix2 }}
matrix3: ${{ steps.set-matrix.outputs.matrix3 }}
matrix4: ${{ steps.set-matrix.outputs.matrix4 }}
matrix5: ${{ steps.set-matrix.outputs.matrix5 }}
matrix6: ${{ steps.set-matrix.outputs.matrix6 }}
matrix7: ${{ steps.set-matrix.outputs.matrix7 }}
matrix8: ${{ steps.set-matrix.outputs.matrix8 }}
has_repos1: ${{ steps.set-matrix.outputs.has_repos1 }}
has_repos2: ${{ steps.set-matrix.outputs.has_repos2 }}
has_repos3: ${{ steps.set-matrix.outputs.has_repos3 }}
has_repos4: ${{ steps.set-matrix.outputs.has_repos4 }}
has_repos5: ${{ steps.set-matrix.outputs.has_repos5 }}
has_repos6: ${{ steps.set-matrix.outputs.has_repos6 }}
has_repos7: ${{ steps.set-matrix.outputs.has_repos7 }}
has_repos8: ${{ steps.set-matrix.outputs.has_repos8 }}
steps:
- name: Checkout repository
uses: actions/checkout@v5
- name: Setup yq
run: |
sudo wget -qO /usr/local/bin/yq https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64
sudo chmod +x /usr/local/bin/yq
- name: Generate discovered repos matrix (batched)
id: set-matrix
run: |
FILTER="${{ github.event.inputs.project_filter }}"
DISCOVERED_FILE="util/data/discovered-repos.yaml"
BATCH_SIZE=250
MAX_BATCHES=8
if [ ! -f "$DISCOVERED_FILE" ]; then
echo "No discovered-repos.yaml found, skipping discovered repos"
for i in $(seq 1 $MAX_BATCHES); do
echo "matrix${i}={\"include\":[]}" >> $GITHUB_OUTPUT
echo "has_repos${i}=false" >> $GITHUB_OUTPUT
done
exit 0
fi
if [ -n "$FILTER" ]; then
OWNER=$(echo "$FILTER" | cut -d'/' -f1)
REPO=$(echo "$FILTER" | cut -d'/' -f2)
ALL_REPOS=$(yq -o=json '.repositories | map(select(.owner == "'"$OWNER"'" and .repo == "'"$REPO"'"))' "$DISCOVERED_FILE")
else
ALL_REPOS=$(yq -o=json '.repositories' "$DISCOVERED_FILE")
fi
TOTAL=$(echo "$ALL_REPOS" | jq 'length')
echo "Total discovered repos: $TOTAL"
MAX_SUPPORTED=$((BATCH_SIZE * MAX_BATCHES))
if [ "$TOTAL" -gt "$MAX_SUPPORTED" ]; then
echo "::warning::Found $TOTAL repos but only $MAX_SUPPORTED can be processed (max $MAX_BATCHES batches of $BATCH_SIZE). Consider increasing MAX_BATCHES."
fi
# Split into batches
for i in $(seq 1 $MAX_BATCHES); do
START=$(( (i-1) * BATCH_SIZE ))
BATCH=$(echo "$ALL_REPOS" | jq --argjson start $START --argjson size $BATCH_SIZE '.[$start:$start+$size] | {"include": .}' | jq -c .)
COUNT=$(echo "$BATCH" | jq '.include | length')
echo "matrix${i}=$BATCH" >> $GITHUB_OUTPUT
echo "has_repos${i}=$( [ "$COUNT" -gt 0 ] && echo 'true' || echo 'false' )" >> $GITHUB_OUTPUT
echo "Batch $i: $COUNT repositories"
done
# ===========================================
# Generate SBOMs for official CNCF projects
# ===========================================
generate-sbom-cncf:
needs: prepare-cncf-matrix
if: ${{ needs.prepare-cncf-matrix.outputs.has_repos == 'true' }}
runs-on: oracle-vm-2cpu-8gb-x86-64
strategy:
fail-fast: false
max-parallel: 5
matrix: ${{ fromJson(needs.prepare-cncf-matrix.outputs.matrix) }}
steps:
- name: Checkout repository
uses: actions/checkout@v5
- name: Install mikebom
run: |
curl -sL "https://github.com/kusari-sandbox/mikebom/releases/download/${MIKEBOM_VERSION}/mikebom-${MIKEBOM_VERSION}-x86_64-unknown-linux-gnu.tar.gz" -o /tmp/mikebom.tar.gz
tar xzf /tmp/mikebom.tar.gz -C /tmp
sudo install -m 755 /tmp/mikebom-${MIKEBOM_VERSION}-x86_64-unknown-linux-gnu/mikebom /usr/local/bin/mikebom
mikebom --version
- name: Install AWS CLI
run: |
if ! command -v aws &>/dev/null; then
curl -sSL "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o /tmp/awscliv2.zip
unzip -q /tmp/awscliv2.zip -d /tmp
sudo /tmp/aws/install
fi
- name: Get stable releases and generate SBOMs
id: generate
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
FORCE_REGENERATE: ${{ github.event.inputs.force_regenerate }}
RELEASES_MODE: ${{ github.event.inputs.releases_mode || 'recent' }}
MAX_RELEASES: ${{ github.event.inputs.max_releases || '3' }}
run: |
set -e
OWNER="${{ matrix.owner }}"
REPO="${{ matrix.repo }}"
PROJECT_NAME="${{ matrix.name }}"
SANITIZED_PROJECT=$(echo "$PROJECT_NAME" | tr '[:upper:]' '[:lower:]' | tr ' ' '-' | tr -cd '[:alnum:]-')
SBOM_BASE_DIR="sbom/${SANITIZED_PROJECT}/${REPO}"
PROCESSED=0
ONE_WEEK_AGO=$(date -u -d '7 days ago' +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-7d +%Y-%m-%dT%H:%M:%SZ)
echo "::group::Processing $PROJECT_NAME ($OWNER/$REPO)"
echo "Releases mode: $RELEASES_MODE"
generate_sbom() {
local TAG="$1"
local VERSION=$(echo "$TAG" | sed 's/^v//')
local VERSION_SLUG=$(echo "$VERSION" | tr '.' '_')
local SBOM_DIR="${SBOM_BASE_DIR}/${VERSION}"
local SBOM_FILE="${SBOM_DIR}/${SANITIZED_PROJECT}_${VERSION_SLUG}_spdx.json"
if [ -f "$SBOM_FILE" ] && [ "$FORCE_REGENERATE" != "true" ]; then
echo "SBOM already exists: $SBOM_FILE, skipping..."
return 1
fi
echo "Generating SBOM for $OWNER/$REPO@$TAG..."
local TEMP_DIR=$(mktemp -d)
if ! git clone --depth 1 --branch "$TAG" "https://github.com/${OWNER}/${REPO}.git" "$TEMP_DIR" 2>/dev/null; then
echo "Failed to clone $OWNER/$REPO@$TAG, skipping..."
rm -rf "$TEMP_DIR"
return 1
fi
mkdir -p "$SBOM_DIR"
# Generate SBOM with mikebom (SPDX 2.3 + deps.dev enrichment)
if mikebom sbom scan \
--path "$TEMP_DIR" \
--format spdx-2.3-json \
--output "$SBOM_FILE" 2>&1 | tail -5; then
echo "Successfully generated SBOM: $SBOM_FILE"
rm -rf "$TEMP_DIR"
return 0
else
echo "Failed to generate SBOM for $OWNER/$REPO@$TAG"
rm -rf "$TEMP_DIR"
return 1
fi
}
RELEASES=$(gh api repos/${OWNER}/${REPO}/releases --paginate -q '.[0:50]' 2>/dev/null || echo "[]")
if [ "$RELEASES" == "[]" ] || [ -z "$RELEASES" ]; then
echo "No releases found, trying tags..."
TAGS=$(gh api repos/${OWNER}/${REPO}/tags --paginate -q '.[0:20] | .[].name' 2>/dev/null || echo "")
if [ -z "$TAGS" ]; then
echo "No tags found, skipping..."
echo "::endgroup::"
exit 0
fi
EFFECTIVE_MAX=$( [ "$RELEASES_MODE" == "recent" ] && echo 3 || echo $MAX_RELEASES )
for TAG in $TAGS; do
if echo "$TAG" | grep -qiE '[-\.](alpha|beta|rc|pre|dev|snapshot|nightly|canary|test|draft|wip)[0-9]*'; then
continue
fi
if ! echo "$TAG" | grep -qE '^v?[0-9]+\.[0-9]+'; then
continue
fi
if generate_sbom "$TAG"; then
PROCESSED=$((PROCESSED + 1))
fi
if [ "$PROCESSED" -ge "$EFFECTIVE_MAX" ]; then
break
fi
done
else
if [ "$RELEASES_MODE" == "recent" ]; then
readarray -t RELEASE_DATA < <(echo "$RELEASES" | jq -r --arg since "$ONE_WEEK_AGO" '.[] | select(.draft == false and .prerelease == false and .published_at >= $since) | "\(.tag_name)|\(.published_at)"')
else
readarray -t RELEASE_DATA < <(echo "$RELEASES" | jq -r '.[] | select(.draft == false and .prerelease == false) | "\(.tag_name)|\(.published_at)"')
fi
for RELEASE_INFO in "${RELEASE_DATA[@]}"; do
TAG=$(echo "$RELEASE_INFO" | cut -d'|' -f1)
if echo "$TAG" | grep -qiE '[-\.](alpha|beta|rc|pre|dev|snapshot|nightly|canary|test|draft|wip)[0-9]*'; then
continue
fi
if generate_sbom "$TAG"; then
PROCESSED=$((PROCESSED + 1))
fi
if [ "$RELEASES_MODE" == "latest" ] && [ "$PROCESSED" -ge "$MAX_RELEASES" ]; then
break
fi
done
fi
echo "::endgroup::"
- name: Upload SBOMs to S3
env:
AWS_ACCESS_KEY_ID: ${{ secrets.OCI_S3_ACCESS_KEY }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.OCI_S3_SECRET_KEY }}
AWS_SESSION_TOKEN: ""
AWS_REQUEST_CHECKSUM_CALCULATION: when_required
AWS_RESPONSE_CHECKSUM_VALIDATION: when_required
S3_ENDPOINT: ${{ vars.OCI_S3_ENDPOINT }}
S3_REGION: ${{ vars.OCI_S3_REGION }}
PROJECT_BUCKET: ${{ vars.OCI_PROJECT_BUCKET }}
run: |
OWNER="${{ matrix.owner }}"
REPO="${{ matrix.repo }}"
PROJECT_NAME="${{ matrix.name }}"
SANITIZED_PROJECT=$(echo "$PROJECT_NAME" | tr '[:upper:]' '[:lower:]' | tr ' ' '-' | tr -cd '[:alnum:]-')
SBOM_BASE_DIR="sbom/${SANITIZED_PROJECT}/${REPO}"
if [ ! -d "$SBOM_BASE_DIR" ]; then
echo "No SBOMs generated, nothing to upload"
exit 0
fi
UPLOADED=0
while IFS= read -r file; do
rel="${file#${SBOM_BASE_DIR}/}"
VERSION=$(dirname "$rel")
VERSION_SLUG=$(echo "$VERSION" | tr '.' '_')
S3_KEY="${SANITIZED_PROJECT}/${VERSION}/${SANITIZED_PROJECT}_${VERSION_SLUG}_spdx.json"
echo "Uploading: s3://${PROJECT_BUCKET}/${S3_KEY}"
if aws s3api put-object \
--endpoint-url "$S3_ENDPOINT" \
--region "$S3_REGION" \
--bucket "$PROJECT_BUCKET" \
--key "$S3_KEY" \
--body "$file" >/dev/null; then
UPLOADED=$((UPLOADED + 1))
else
echo "::warning::Upload failed for ${file}"
fi
done < <(find "$SBOM_BASE_DIR" -type f -name '*.json' | sort)
echo "Uploaded ${UPLOADED} SBOMs to s3://${PROJECT_BUCKET}/"
# ===========================================
# Generate SBOMs for subproject repos (Batch 1)
# ===========================================
generate-sbom-subprojects-1:
needs: prepare-discovered-matrix
if: ${{ needs.prepare-discovered-matrix.outputs.has_repos1 == 'true' }}
uses: ./.github/workflows/reusable-generate-sbom.yml
with:
matrix: ${{ needs.prepare-discovered-matrix.outputs.matrix1 }}
job_name: "subprojects-batch-1"
sbom_path_prefix: "subprojects"
force_regenerate: ${{ github.event.inputs.force_regenerate || 'false' }}
releases_mode: ${{ github.event.inputs.releases_mode || 'recent' }}
max_releases: ${{ github.event.inputs.max_releases || '3' }}
secrets: inherit
# ===========================================
# Generate SBOMs for subproject repos (Batch 2)
# ===========================================
generate-sbom-subprojects-2:
needs: prepare-discovered-matrix
if: ${{ needs.prepare-discovered-matrix.outputs.has_repos2 == 'true' }}
uses: ./.github/workflows/reusable-generate-sbom.yml
with:
matrix: ${{ needs.prepare-discovered-matrix.outputs.matrix2 }}
job_name: "subprojects-batch-2"
sbom_path_prefix: "subprojects"
force_regenerate: ${{ github.event.inputs.force_regenerate || 'false' }}
releases_mode: ${{ github.event.inputs.releases_mode || 'recent' }}
max_releases: ${{ github.event.inputs.max_releases || '3' }}
secrets: inherit
# ===========================================
# Generate SBOMs for subproject repos (Batch 3)
# ===========================================
generate-sbom-subprojects-3:
needs: prepare-discovered-matrix
if: ${{ needs.prepare-discovered-matrix.outputs.has_repos3 == 'true' }}
uses: ./.github/workflows/reusable-generate-sbom.yml
with:
matrix: ${{ needs.prepare-discovered-matrix.outputs.matrix3 }}
job_name: "subprojects-batch-3"
sbom_path_prefix: "subprojects"
force_regenerate: ${{ github.event.inputs.force_regenerate || 'false' }}
releases_mode: ${{ github.event.inputs.releases_mode || 'recent' }}
max_releases: ${{ github.event.inputs.max_releases || '3' }}
secrets: inherit
# ===========================================
# Generate SBOMs for subproject repos (Batch 4)
# ===========================================
generate-sbom-subprojects-4:
needs: prepare-discovered-matrix
if: ${{ needs.prepare-discovered-matrix.outputs.has_repos4 == 'true' }}
uses: ./.github/workflows/reusable-generate-sbom.yml
with:
matrix: ${{ needs.prepare-discovered-matrix.outputs.matrix4 }}
job_name: "subprojects-batch-4"
sbom_path_prefix: "subprojects"
force_regenerate: ${{ github.event.inputs.force_regenerate || 'false' }}
releases_mode: ${{ github.event.inputs.releases_mode || 'recent' }}
max_releases: ${{ github.event.inputs.max_releases || '3' }}
secrets: inherit
# ===========================================
# Generate SBOMs for subproject repos (Batch 5)
# ===========================================
generate-sbom-subprojects-5:
needs: prepare-discovered-matrix
if: ${{ needs.prepare-discovered-matrix.outputs.has_repos5 == 'true' }}
uses: ./.github/workflows/reusable-generate-sbom.yml
with:
matrix: ${{ needs.prepare-discovered-matrix.outputs.matrix5 }}
job_name: "subprojects-batch-5"
sbom_path_prefix: "subprojects"
force_regenerate: ${{ github.event.inputs.force_regenerate || 'false' }}
releases_mode: ${{ github.event.inputs.releases_mode || 'recent' }}
max_releases: ${{ github.event.inputs.max_releases || '3' }}
secrets: inherit
# ===========================================
# Generate SBOMs for subproject repos (Batch 6)
# ===========================================
generate-sbom-subprojects-6:
needs: prepare-discovered-matrix
if: ${{ needs.prepare-discovered-matrix.outputs.has_repos6 == 'true' }}
uses: ./.github/workflows/reusable-generate-sbom.yml
with:
matrix: ${{ needs.prepare-discovered-matrix.outputs.matrix6 }}
job_name: "subprojects-batch-6"
sbom_path_prefix: "subprojects"
force_regenerate: ${{ github.event.inputs.force_regenerate || 'false' }}
releases_mode: ${{ github.event.inputs.releases_mode || 'recent' }}
max_releases: ${{ github.event.inputs.max_releases || '3' }}
secrets: inherit
# ===========================================
# Generate SBOMs for subproject repos (Batch 7)
# ===========================================
generate-sbom-subprojects-7:
needs: prepare-discovered-matrix
if: ${{ needs.prepare-discovered-matrix.outputs.has_repos7 == 'true' }}
uses: ./.github/workflows/reusable-generate-sbom.yml
with:
matrix: ${{ needs.prepare-discovered-matrix.outputs.matrix7 }}
job_name: "subprojects-batch-7"
sbom_path_prefix: "subprojects"
force_regenerate: ${{ github.event.inputs.force_regenerate || 'false' }}
releases_mode: ${{ github.event.inputs.releases_mode || 'recent' }}
max_releases: ${{ github.event.inputs.max_releases || '3' }}
secrets: inherit
# ===========================================
# Generate SBOMs for subproject repos (Batch 8)
# ===========================================
generate-sbom-subprojects-8:
needs: prepare-discovered-matrix
if: ${{ needs.prepare-discovered-matrix.outputs.has_repos8 == 'true' }}
uses: ./.github/workflows/reusable-generate-sbom.yml
with:
matrix: ${{ needs.prepare-discovered-matrix.outputs.matrix8 }}
job_name: "subprojects-batch-8"
sbom_path_prefix: "subprojects"
force_regenerate: ${{ github.event.inputs.force_regenerate || 'false' }}
releases_mode: ${{ github.event.inputs.releases_mode || 'recent' }}
max_releases: ${{ github.event.inputs.max_releases || '3' }}
secrets: inherit