项目背景
https://blog.csdn.net/x1131230123/article/details/140606459
似乎这个任务是简单的,利用目标检测是否可以完成得好呢?
生成数据集
利用这个代码产生数据集:
为了将标签转换为YOLOv5格式,需要将左上角和右下角的坐标转换为YOLO格式的中心点坐标和宽高。YOLOv5标签格式是这样的:
\text{{class}} , \text{{x_center}} , \text{{y_center}} , \text{{width}} , \text{{height}}
其中,(\text{{x_center}})、(\text{{y_center}})、(\text{{width}})和(\text{{height}})都是相对于图片宽度和高度的归一化值。以下是修改后的代码:
python
import os
import random
from PIL import Image
def list_path_all_files(dirname):
result = []
for maindir, subdir, file_name_list in os.walk(dirname):
for filename in file_name_list:
if filename.lower().endswith('.jpg'):
apath = os.path.join(maindir, filename)
result.append(apath)
return result
def resize_image(image, target_size, resize_by='height'):
w, h = image.size
if resize_by == 'height':
if h != target_size:
ratio = target_size / h
new_width = int(w * ratio)
image = image.resize((new_width, target_size), Image.ANTIALIAS)
elif resize_by == 'width':
if w != target_size:
ratio = target_size / w
new_height = int(h * ratio)
image = image.resize((target_size, new_height), Image.ANTIALIAS)
return image
def create_2x2_image(images):
target_size = (640, 640)
new_image = Image.new('RGB', (1280, 1280))
coords = []
for i, img in enumerate(images):
img = img.resize(target_size, Image.ANTIALIAS)
if i == 0:
new_image.paste(img, (0, 0))
coords.append((0, 0, 640, 640))
elif i == 1:
new_image.paste(img, (640, 0))
coords.append((640, 0, 1280, 640))
elif i == 2:
new_image.paste(img, (0, 640))
coords.append((0, 640, 640, 1280))
elif i == 3:
new_image.paste(img, (640, 640))
coords.append((640, 640, 1280, 1280))
return new_image, coords
def concatenate_images(image_list, mode='horizontal', target_size=768):
if mode == 'horizontal':
resized_images = [resize_image(image, target_size, 'height') for image in image_list]
total_width = sum(image.size[0] for image in resized_images)
max_height = target_size
new_image = Image.new('RGB', (total_width, max_height))
x_offset = 0
coords = []
for image in resized_images:
new_image.paste(image, (x_offset, 0))
coords.append((x_offset, 0, x_offset + image.size[0], max_height))
x_offset += image.size[0]
elif mode == 'vertical':
resized_images = [resize_image(image, target_size, 'width') for image in image_list]
total_height = sum(image.size[1] for image in resized_images)
max_width = target_size
new_image = Image.new('RGB', (max_width, total_height))
y_offset = 0
coords = []
for image in resized_images:
new_image.paste(image, (0, y_offset))
coords.append((0, y_offset, max_width, y_offset + image.size[1]))
y_offset += image.size[1]
return new_image, coords
def generate_labels(coords, image_size):
labels = []
width, height = image_size
for coord in coords:
x_min, y_min, x_max, y_max = coord
x_center = (x_min + x_max) / 2.0 / width
y_center = (y_min + y_max) / 2.0 / height
box_width = (x_max - x_min) / width
box_height = (y_max - y_min) / height
labels.append(f"0 {x_center} {y_center} {box_width} {box_height}")
return labels
def generate_dataset(image_folder, output_folder, label_folder, num_images):
image_paths = list_path_all_files(image_folder)
if not os.path.exists(output_folder):
os.makedirs(output_folder)
if not os.path.exists(label_folder):
os.makedirs(label_folder)
for i in range(num_images):
random_choice = random.randint(1, 6)
if random_choice == 1:
selected_images = [Image.open(random.choice(image_paths)) for _ in range(2)]
new_image, coords = concatenate_images(selected_images, mode='horizontal')
elif random_choice == 2:
selected_images = [Image.open(random.choice(image_paths)) for _ in range(3)]
new_image, coords = concatenate_images(selected_images, mode='horizontal')
elif random_choice == 3:
selected_images = [Image.open(random.choice(image_paths)) for _ in range(2)]
new_image, coords = concatenate_images(selected_images, mode='vertical')
elif random_choice == 4:
selected_images = [Image.open(random.choice(image_paths)) for _ in range(3)]
new_image, coords = concatenate_images(selected_images, mode='vertical')
elif random_choice == 5:
selected_images = [Image.open(random.choice(image_paths)) for _ in range(4)]
new_image, coords = create_2x2_image(selected_images)
elif random_choice == 6:
# Single image case
selected_images = [Image.open(random.choice(image_paths))]
new_image = selected_images[0]
coords = [(0, 0, new_image.size[0], new_image.size[1])]
output_image_path = os.path.join(output_folder, f'composite_image_{i + 1:06d}.jpg')
new_image.save(output_image_path, 'JPEG')
label_path = os.path.join(label_folder, f'composite_image_{i + 1:06d}.txt')
labels = generate_labels(coords, new_image.size)
with open(label_path, 'w') as label_file:
for label in labels:
label_file.write(label + '\n')
# 示例用法
image_folder = '/ssd/xiedong/datasets/multilabelsTask/multilabels_new/'
output_folder = '/ssd/xiedong/yolov8detdir/composite_images/train/images/'
label_folder = '/ssd/xiedong/yolov8detdir/composite_images/train/labels/'
num_images = 8000
generate_dataset(image_folder, output_folder, label_folder, num_images)
修改后的 generate_labels
函数将坐标转换为YOLOv5标签格式。生成的标签文件会包含每个图像中的标签,格式为 "0 x_center y_center width height"。
训练
data_det.yaml:
bash
path: /ssd/xiedong/yolov8detdir/composite_images
train: train/images # train images (relative to 'path') 128 images
val: val/images # val images (relative to 'path') 128 images
test: # test images (optional)
# Classes
names:
0: paper
x03train_det.py:
bash
from ultralytics import YOLO
# Load a model
model = YOLO("yolov8m.pt") # load a pretrained model (recommended for training)
project = "/ssd/xiedong/yolov8detdir/paperdet"
# Train the model with 2 GPUs
results = model.train(data="data_det.yaml", epochs=50, imgsz=640, device=[2, 3], batch=180, project=project)
启动容器:
bash
docker run -it --gpus all --shm-size=8g -v /ssd/xiedong/yolov8detdir:/ssd/xiedong/yolov8detdir ultralytics/ultralytics:8.2.62 bash
启动训练:
bash
cd /ssd/xiedong/yolov8detdir
python -m torch.distributed.run --nproc_per_node 2 x03train_det.py
推理
bash
/ssd/xiedong/yolov8detdir/paperdet/train2/weights/best.pt
bash
from ultralytics import YOLO
# Load a model
model = YOLO("/ssd/xiedong/yolov8detdir/paperdet/train2/weights/best.pt") # pretrained YOLOv8n model
# Run batched inference on a list of images
results = model(["composite_image_000006.jpg"]) # return a list of Results objects
# Process results list
for result in results:
boxes = result.boxes # Boxes object for bounding box outputs
masks = result.masks # Masks object for segmentation masks outputs
keypoints = result.keypoints # Keypoints object for pose outputs
probs = result.probs # Probs object for classification outputs
obb = result.obb # Oriented boxes object for OBB outputs
result.save(filename="result.jpg") # save to disk
推理效果是很好的: