任务: 对other_face_summary根据area进行分bin, 每个bin 1000个样本, 间距500, 将图片从image_path复制到/areas, 分开放文件夹, 在图片上绘制置信框, 左上角(det_1_x,det_1_y), 右下角(det_2_x,det_2_y), 多进程处理
GPT-5.1-Codex-Mini 在ask模式, 短问题下会出现奇怪的问题, 返回代码重复了两次, 同时bin_size使用错误, 应该使用step才对, 他把取1000个样本和间隔500弄混了
Gemini 3 Flash: 注释位置经常用错误注释方法, 但是功能正确
返回结果:
GPT-5.1
python
# ...existing code...
from PIL import ImageDraw
from multiprocessing import Pool
base_target = '/areas'
os.makedirs(base_target, exist_ok=True)
records = other_face_summary.sort('area').select([
'image_path', 'det_1_x', 'det_1_y', 'det_2_x', 'det_2_y', 'area'
]).to_dicts()
total = len(records)
bin_size = 1000
step = 500
def paint_and_copy(task):
image_path, x1, y1, x2, y2, target_dir = task
target_path = os.path.join(target_dir, os.path.basename(image_path))
try:
with Image.open(image_path) as img:
draw = ImageDraw.Draw(img)
draw.rectangle([x1, y1, x2, y2], outline='red', width=3)
img.save(target_path)
except Exception as exc:
return image_path, False, str(exc)
return image_path, True, None
tasks = []
for start in range(0, total, step):
end = min(start + bin_size, total)
if start >= end:
break
folder = os.path.join(base_target, f'bin_{start}_{end}')
os.makedirs(folder, exist_ok=True)
for rec in records[start:end]:
tasks.append((
rec['image_path'],
rec['det_1_x'],
rec['det_1_y'],
rec['det_2_x'],
rec['det_2_y'],
folder,
))
with Pool() as pool:
pool.map(paint_and_copy, tasks)
```// filepath: \jupyter\0.ipynb
# ...existing code...
from PIL import ImageDraw
from multiprocessing import Pool
base_target = '/areas'
os.makedirs(base_target, exist_ok=True)
records = other_face_summary.sort('area').select([
'image_path', 'det_1_x', 'det_1_y', 'det_2_x', 'det_2_y', 'area'
]).to_dicts()
total = len(records)
bin_size = 1000
step = 500
def paint_and_copy(task):
image_path, x1, y1, x2, y2, target_dir = task
target_path = os.path.join(target_dir, os.path.basename(image_path))
try:
with Image.open(image_path) as img:
draw = ImageDraw.Draw(img)
draw.rectangle([x1, y1, x2, y2], outline='red', width=3)
img.save(target_path)
except Exception as exc:
return image_path, False, str(exc)
return image_path, True, None
tasks = []
for start in range(0, total, step):
end = min(start + bin_size, total)
if start >= end:
break
folder = os.path.join(base_target, f'bin_{start}_{end}')
os.makedirs(folder, exist_ok=True)
for rec in records[start:end]:
tasks.append((
rec['image_path'],
rec['det_1_x'],
rec['det_1_y'],
rec['det_2_x'],
rec['det_2_y'],
folder,
))
with Pool() as pool:
pool.map(paint_and_copy, tasks)
Gemini 3 Flash
python
// ...existing code...
import os
from PIL import Image, ImageDraw
from concurrent.futures import ProcessPoolExecutor
from pathlib import Path
def process_item(row):
"""处理单张图片:绘制检测框并保存"""
try:
img_path = row['image_path']
save_dir = Path(f"/areas/{row['bin_name']}")
save_dir.mkdir(parents=True, exist_ok=True)
with Image.open(img_path).convert("RGB") as img:
draw = ImageDraw.Draw(img)
# 绘制置信框 (det_1_x, det_1_y) 为左上角, (det_2_x, det_2_y) 为右下角
draw.rectangle(
[row['det_1_x'], row['det_1_y'], row['det_2_x'], row['det_2_y']],
outline="red",
width=3
)
img.save(save_dir / row['basename'])
except Exception as e:
return f"Error {row['basename']}: {e}"
# 1. 根据 area 分 bin (间距 500),并每组提取 1000 个样本
df_tasks = (
other_face_summary
.with_columns([
((pl.col("area") // 500) * 500).cast(pl.Int64).alias("bin_start")
])
.with_columns([
(pl.col("bin_start").cast(pl.Utf8) + "_" + (pl.col("bin_start") + 500).cast(pl.Utf8)).alias("bin_name")
])
.group_by("bin_name")
.head(1000)
)
# 2. 多进程执行
if __name__ == "__main__":
tasks = df_tasks.to_dicts()
print(f"Total images to process: {len(tasks)}")
with ProcessPoolExecutor(max_workers=min(os.cpu_count(), 16)) as executor:
executor.map(process_item, tasks)