feat: auto-trigger garbage collection after manifest cleanup

- Added automatic GC to prune script after deleting manifests
- Cronjob now uses python:3.12-slim with kubectl installed
- Added serviceAccountName: registry-gc-runner for permissions
- GC scales down registry, runs garbage-collect, scales back up
- Deletes unreferenced blob layers to actually free disk space
This commit is contained in:
Ashwin Kumar Sivakumar 2026-06-12 04:50:02 +05:30
parent b6b7d62bad
commit 4eed905fb6
2 changed files with 89 additions and 2 deletions

View file

@ -13,11 +13,20 @@ spec:
backoffLimit: 1
template:
spec:
serviceAccountName: registry-gc-runner
restartPolicy: Never
containers:
- name: prune
image: python:3.12-alpine
command: ["python", "/scripts/prune.py"]
image: python:3.12-slim
command: ["sh", "-c"]
args:
- |
# Install kubectl
apt-get update && apt-get install -y curl --no-install-recommends && rm -rf /var/lib/apt/lists/*
curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl
# Run the prune script
python3 /scripts/prune.py
volumeMounts:
- name: script
mountPath: /scripts

View file

@ -104,3 +104,81 @@ data:
print(f' delete failed {repo}:{t} err={e}')
print(f'deleted_manifests={deleted}')
# Trigger garbage collection to delete unreferenced blob layers
if deleted > 0:
print('\n=== Triggering Garbage Collection ===')
try:
# Scale down registry to run GC
import subprocess
subprocess.run(['kubectl', 'scale', 'deployment', 'docker-registry', '--replicas=0', '-n', 'registry'], check=True)
print('Scaled down docker-registry deployment')
# Wait for deployment to be fully down
import time
time.sleep(5)
# Run GC job
gc_job = {
'apiVersion': 'batch/v1',
'kind': 'Job',
'metadata': {'name': 'registry-gc-once', 'namespace': 'registry'},
'spec': {
'backoffLimit': 0,
'template': {
'spec': {
'restartPolicy': 'Never',
'containers': [{
'name': 'gc',
'image': 'registry:3',
'command': ['registry', 'garbage-collect', '--delete-untagged', '/etc/distribution/config.yml'],
'volumeMounts': [
{'name': 'storage', 'mountPath': '/var/lib/registry'},
{'name': 'config', 'mountPath': '/etc/distribution'}
]
}],
'volumes': [
{'name': 'storage', 'persistentVolumeClaim': {'claimName': 'registry-pvc'}},
{'name': 'config', 'configMap': {'name': 'registry-config'}}
]
}
}
}
}
# Delete old GC job if exists
subprocess.run(['kubectl', 'delete', 'job', 'registry-gc-once', '-n', 'registry', '--ignore-not-found=true'], check=False)
time.sleep(2)
# Create and wait for GC job
import tempfile
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
json.dump(gc_job, f)
f.flush()
subprocess.run(['kubectl', 'apply', '-f', f.name], check=True)
print('GC job created, waiting for completion...')
# Wait up to 10 minutes for GC to complete
for i in range(120):
result = subprocess.run(['kubectl', 'get', 'job', 'registry-gc-once', '-n', 'registry', '-o', 'jsonpath={.status.succeeded}'], capture_output=True, text=True)
if result.stdout.strip() == '1':
print('Garbage collection completed successfully')
break
result = subprocess.run(['kubectl', 'get', 'job', 'registry-gc-once', '-n', 'registry', '-o', 'jsonpath={.status.failed}'], capture_output=True, text=True)
if result.stdout.strip() == '1':
print('GC job failed')
break
time.sleep(5)
# Scale back up
subprocess.run(['kubectl', 'scale', 'deployment', 'docker-registry', '--replicas=1', '-n', 'registry'], check=True)
print('Scaled up docker-registry deployment')
except Exception as e:
print(f'GC trigger failed: {e}')
# Ensure registry is scaled back up even if GC failed
try:
subprocess.run(['kubectl', 'scale', 'deployment', 'docker-registry', '--replicas=1', '-n', 'registry'], check=False)
except:
pass