解决方案代码逻辑:
- 查询待清理镜像 :从数据库获取所有已标记为软删除(
is_deleted = 1)且创建时间超过指定天数的镜像记录,生成待清理清单。 - 安全检查:对于每个待清理镜像,通过 Registry API 获取其 manifest digest,并检查该 digest 是否被多个 tag 引用。只有当引用数为 1(即该 manifest 仅被当前 tag 使用)时,才执行删除操作,避免误删仍被其他 tag 依赖的镜像。
- 删除 manifest :调用 Registry API 的
DELETE /v2/<name>/manifests/<digest>接口,删除镜像的 manifest 文件。 - 释放存储空间:删除 manifest 后,镜像的底层层(blob)并不会立即删除。需要手动运行 Registry 自带的垃圾回收(GC)命令,根据引用计数清理不再被任何 manifest 引用的 blob,从而真正释放磁盘空间。
- 共享层保护:如果多个镜像共享相同的基础层,删除其中一个镜像的 manifest 不会影响其他镜像对该基础层的引用。GC 执行时会保留引用计数大于 0 的 blob,确保共享层不被误删。
总结
- 删除操作 删除的是 manifest 文件(相当于镜像的目录清单),不是直接删层(blob)。
- 手动 GC 才会真正删除不再被任何 manifest 引用的 blob。
- Registry 维护引用计数:每个 blob 被哪些 manifest 引用。共享层(如基础层 L)只要还有至少一个 manifest 引用它,GC 就不会删除它。
- 每层有唯一的内容摘要(digest)。
- 整个镜像也有一个唯一的 digest,即 manifest digest。
完整代码
import pymysql
from datetime import datetime, timedelta
import requests
import logging
import argparse
import os
Registry 配置(请根据实际环境修改)
registry_url = "http://your-registry-host:port"
registry_host = "your-registry-host:port"
def setup_logger():
logger = logging.getLogger("image_cleanup")
logger.setLevel(logging.DEBUG)
清除旧的 handler 防止重复
if logger.hasHandlers():
logger.handlers.clear()
file_handler = logging.FileHandler("image_cleanup.log", encoding="utf-8")
file_handler.setLevel(logging.DEBUG)
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
file_handler.setFormatter(formatter)
console_handler.setFormatter(formatter)
logger.addHandler(file_handler)
logger.addHandler(console_handler)
return logger
logger = setup_logger()
def connect_to_db():
try:
conn = pymysql.connect(
host='your-db-host',
user='your-db-user',
password='your-db-password',
database='your-db-name',
charset='utf8mb4'
)
return conn
except Exception as e:
logger.error(f"Failed to connect to the database: {e}")
exit(1)
def query_images(days):
conn = connect_to_db()
cursor = conn.cursor()
query = """
SELECT name, project_name,
DATE_ADD(create_time, INTERVAL 8 HOUR) as adjusted_create_time,
user_name, real_tag
FROM image
WHERE is_deleted = 1
"""
params = []
if days > 0:
date_threshold = datetime.now() - timedelta(days=days)
date_threshold_str = date_threshold.strftime('%Y-%m-%d %H:%M:%S')
query += " AND create_time < %s"
params.append(date_threshold_str)
try:
cursor.execute(query, params)
results = cursor.fetchall()
except Exception as e:
logger.error(f"Failed to execute query: {e}")
results = []
finally:
cursor.close()
conn.close()
return results
def get_tag_digest(repo, tag):
url = f"{registry_url}/v2/{repo}/manifests/{tag}"
headers = {"Accept": "application/vnd.docker.distribution.manifest.v2+json, application/vnd.oci.image.manifest.v1+json"}
try:
resp = requests.get(url, headers=headers, timeout=5)
if resp.status_code == 200:
return resp.headers.get("Docker-Content-Digest")
else:
logger.debug(f"Get digest failed for {repo}:{tag}, status: {resp.status_code}")
except Exception as e:
logger.error(f"Request error for {repo}:{tag}: {e}")
return None
def get_all_tags(repo):
url = f"{registry_url}/v2/{repo}/tags/list"
try:
resp = requests.get(url, timeout=5)
if resp.status_code == 200:
return resp.json().get("tags", [])
except Exception as e:
logger.error(f"Failed to get tags for {repo}: {e}")
return []
def get_digest_reference_count(repo, digest):
if not digest: return 0
tags = get_all_tags(repo)
count = 0
for tag in tags:
if get_tag_digest(repo, tag) == digest:
count += 1
return count
def safe_delete_image(repo, tag):
logger.info(f"Attempting to delete: {repo}:{tag}")
digest = get_tag_digest(repo, tag)
if not digest:
logger.warning(f"Cannot get digest for {repo}:{tag}, skipping.")
return False
检查是否有其他标签引用同一个镜像层
ref_count = get_digest_reference_count(repo, digest)
if ref_count > 1:
logger.warning(f"Digest {digest} is referenced by {ref_count} tags, skipping deletion of {repo}:{tag}")
return False
执行删除
delete_url = f"{registry_url}/v2/{repo}/manifests/{digest}"
logger.debug(f"Delete URL: {delete_url}")
try:
resp = requests.delete(delete_url)
if resp.status_code in (200, 202):
return True
else:
logger.error(f"Delete API returned status {resp.status_code}: {resp.text}")
return False
except Exception as e:
logger.error(f"Delete request failed: {e}")
return False
def parse_image_string(full_image_string):
"""
解析类似 your-registry-host:port/my-nginx:v1.0 的字符串
返回 (repo, tag)
"""
try:
1. 去掉可能存在的协议头 (http://)
if full_image_string.startswith("http"):
full_image_string = full_image_string.split("//", 1)[1]
2. 分割 域名/仓库路径
if '/' not in full_image_string:
return None, None
_, path_part = full_image_string.split('/', 1)
3. 分割 仓库名:标签
if ':' not in path_part:
return path_part, 'latest'
repo, tag = path_part.rsplit(':', 1)
return repo, tag
except Exception as e:
logger.error(f"Failed to parse image string '{full_image_string}': {e}")
return None, None
def main():
parser = argparse.ArgumentParser(description="Image Cleanup Script")
parser.add_argument("action", choices=["list", "rm"], help="Action to perform: list or rm")
parser.add_argument("param", help="Days for 'list' or file path for 'rm'")
args = parser.parse_args()
if args.action == "list":
days = int(args.param)
results = query_images(days)
with open('output.txt', 'w', encoding='utf-8') as file:
for row in results:
name, project_name, adjusted_create_time, user_name, real_tag = row
repository = name
tag = real_tag
name_with_prefix = f"{registry_host}/{repository}:{tag}"
line = '\t'.join([name_with_prefix, str(project_name), str(adjusted_create_time), str(user_name)])
file.write(line + '\n')
logger.info(f"Listed image: {name_with_prefix}")
elif args.action == "rm":
file_path = args.param
if not os.path.isfile(file_path):
logger.error(f"File not found: {file_path}")
return
with open(file_path, 'r', encoding='utf-8') as file:
for line in file:
line = line.strip()
if not line:
continue
parts = line.split('\t')
full_image = parts[0]
logger.debug(f"Processing line: {line}")
logger.debug(f"Extracted image: {full_image}")
repository, tag = parse_image_string(full_image)
if not repository or not tag:
logger.warning(f"Invalid image format in line: {line}")
continue
if safe_delete_image(repository, tag):
logger.info(f"Successfully deleted: {full_image}")
else:
logger.warning(f"Failed to delete: {full_image}")
if name == "main":
main()