【深度学习】yolov8-det目标检测训练,拼接图的分割复原

项目背景

https://blog.csdn.net/x1131230123/article/details/140606459

似乎这个任务是简单的,利用目标检测是否可以完成得好呢?

生成数据集

利用这个代码产生数据集:

为了将标签转换为YOLOv5格式,需要将左上角和右下角的坐标转换为YOLO格式的中心点坐标和宽高。YOLOv5标签格式是这样的:

\text{{class}} , \text{{x_center}} , \text{{y_center}} , \text{{width}} , \text{{height}}

其中,(\text{{x_center}})、(\text{{y_center}})、(\text{{width}})和(\text{{height}})都是相对于图片宽度和高度的归一化值。以下是修改后的代码:

python 复制代码
import os
import random
from PIL import Image


def list_path_all_files(dirname):
    result = []
    for maindir, subdir, file_name_list in os.walk(dirname):
        for filename in file_name_list:
            if filename.lower().endswith('.jpg'):
                apath = os.path.join(maindir, filename)
                result.append(apath)
    return result


def resize_image(image, target_size, resize_by='height'):
    w, h = image.size
    if resize_by == 'height':
        if h != target_size:
            ratio = target_size / h
            new_width = int(w * ratio)
            image = image.resize((new_width, target_size), Image.ANTIALIAS)
    elif resize_by == 'width':
        if w != target_size:
            ratio = target_size / w
            new_height = int(h * ratio)
            image = image.resize((target_size, new_height), Image.ANTIALIAS)
    return image


def create_2x2_image(images):
    target_size = (640, 640)
    new_image = Image.new('RGB', (1280, 1280))
    coords = []
    for i, img in enumerate(images):
        img = img.resize(target_size, Image.ANTIALIAS)
        if i == 0:
            new_image.paste(img, (0, 0))
            coords.append((0, 0, 640, 640))
        elif i == 1:
            new_image.paste(img, (640, 0))
            coords.append((640, 0, 1280, 640))
        elif i == 2:
            new_image.paste(img, (0, 640))
            coords.append((0, 640, 640, 1280))
        elif i == 3:
            new_image.paste(img, (640, 640))
            coords.append((640, 640, 1280, 1280))
    return new_image, coords


def concatenate_images(image_list, mode='horizontal', target_size=768):
    if mode == 'horizontal':
        resized_images = [resize_image(image, target_size, 'height') for image in image_list]
        total_width = sum(image.size[0] for image in resized_images)
        max_height = target_size
        new_image = Image.new('RGB', (total_width, max_height))
        x_offset = 0
        coords = []
        for image in resized_images:
            new_image.paste(image, (x_offset, 0))
            coords.append((x_offset, 0, x_offset + image.size[0], max_height))
            x_offset += image.size[0]
    elif mode == 'vertical':
        resized_images = [resize_image(image, target_size, 'width') for image in image_list]
        total_height = sum(image.size[1] for image in resized_images)
        max_width = target_size
        new_image = Image.new('RGB', (max_width, total_height))
        y_offset = 0
        coords = []
        for image in resized_images:
            new_image.paste(image, (0, y_offset))
            coords.append((0, y_offset, max_width, y_offset + image.size[1]))
            y_offset += image.size[1]
    return new_image, coords


def generate_labels(coords, image_size):
    labels = []
    width, height = image_size
    for coord in coords:
        x_min, y_min, x_max, y_max = coord
        x_center = (x_min + x_max) / 2.0 / width
        y_center = (y_min + y_max) / 2.0 / height
        box_width = (x_max - x_min) / width
        box_height = (y_max - y_min) / height
        labels.append(f"0 {x_center} {y_center} {box_width} {box_height}")
    return labels


def generate_dataset(image_folder, output_folder, label_folder, num_images):
    image_paths = list_path_all_files(image_folder)
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    if not os.path.exists(label_folder):
        os.makedirs(label_folder)

    for i in range(num_images):
        random_choice = random.randint(1, 6)
        if random_choice == 1:
            selected_images = [Image.open(random.choice(image_paths)) for _ in range(2)]
            new_image, coords = concatenate_images(selected_images, mode='horizontal')
        elif random_choice == 2:
            selected_images = [Image.open(random.choice(image_paths)) for _ in range(3)]
            new_image, coords = concatenate_images(selected_images, mode='horizontal')
        elif random_choice == 3:
            selected_images = [Image.open(random.choice(image_paths)) for _ in range(2)]
            new_image, coords = concatenate_images(selected_images, mode='vertical')
        elif random_choice == 4:
            selected_images = [Image.open(random.choice(image_paths)) for _ in range(3)]
            new_image, coords = concatenate_images(selected_images, mode='vertical')
        elif random_choice == 5:
            selected_images = [Image.open(random.choice(image_paths)) for _ in range(4)]
            new_image, coords = create_2x2_image(selected_images)
        elif random_choice == 6:
            # Single image case
            selected_images = [Image.open(random.choice(image_paths))]
            new_image = selected_images[0]
            coords = [(0, 0, new_image.size[0], new_image.size[1])]

        output_image_path = os.path.join(output_folder, f'composite_image_{i + 1:06d}.jpg')
        new_image.save(output_image_path, 'JPEG')

        label_path = os.path.join(label_folder, f'composite_image_{i + 1:06d}.txt')
        labels = generate_labels(coords, new_image.size)
        with open(label_path, 'w') as label_file:
            for label in labels:
                label_file.write(label + '\n')


# 示例用法
image_folder = '/ssd/xiedong/datasets/multilabelsTask/multilabels_new/'
output_folder = '/ssd/xiedong/yolov8detdir/composite_images/train/images/'
label_folder = '/ssd/xiedong/yolov8detdir/composite_images/train/labels/'
num_images = 8000
generate_dataset(image_folder, output_folder, label_folder, num_images)

修改后的 generate_labels 函数将坐标转换为YOLOv5标签格式。生成的标签文件会包含每个图像中的标签,格式为 "0 x_center y_center width height"。

训练

data_det.yaml:

bash 复制代码
path: /ssd/xiedong/yolov8detdir/composite_images
train: train/images # train images (relative to 'path') 128 images
val: val/images # val images (relative to 'path') 128 images
test: # test images (optional)

# Classes
names:
  0: paper

x03train_det.py:

bash 复制代码
from ultralytics import YOLO

# Load a model
model = YOLO("yolov8m.pt")  # load a pretrained model (recommended for training)

project = "/ssd/xiedong/yolov8detdir/paperdet"
# Train the model with 2 GPUs
results = model.train(data="data_det.yaml", epochs=50, imgsz=640, device=[2, 3], batch=180, project=project)

启动容器:

bash 复制代码
docker run -it --gpus all   --shm-size=8g -v /ssd/xiedong/yolov8detdir:/ssd/xiedong/yolov8detdir ultralytics/ultralytics:8.2.62  bash

启动训练:

bash 复制代码
cd /ssd/xiedong/yolov8detdir
python -m torch.distributed.run --nproc_per_node 2 x03train_det.py

推理

bash 复制代码
/ssd/xiedong/yolov8detdir/paperdet/train2/weights/best.pt
bash 复制代码
from ultralytics import YOLO

# Load a model
model = YOLO("/ssd/xiedong/yolov8detdir/paperdet/train2/weights/best.pt")  # pretrained YOLOv8n model

# Run batched inference on a list of images
results = model(["composite_image_000006.jpg"])  # return a list of Results objects

# Process results list
for result in results:
    boxes = result.boxes  # Boxes object for bounding box outputs
    masks = result.masks  # Masks object for segmentation masks outputs
    keypoints = result.keypoints  # Keypoints object for pose outputs
    probs = result.probs  # Probs object for classification outputs
    obb = result.obb  # Oriented boxes object for OBB outputs
    result.save(filename="result.jpg")  # save to disk

推理效果是很好的:

相关推荐
Power202466625 分钟前
NLP论文速读|LongReward:基于AI反馈来提升长上下文大语言模型
人工智能·深度学习·机器学习·自然语言处理·nlp
YRr YRr1 小时前
深度学习:循环神经网络(RNN)详解
人工智能·rnn·深度学习
sp_fyf_20241 小时前
计算机前沿技术-人工智能算法-大语言模型-最新研究进展-2024-11-01
人工智能·深度学习·神经网络·算法·机器学习·语言模型·数据挖掘
红客5971 小时前
Transformer和BERT的区别
深度学习·bert·transformer
多吃轻食1 小时前
大模型微调技术 --> 脉络
人工智能·深度学习·神经网络·自然语言处理·embedding
charles_vaez2 小时前
开源模型应用落地-glm模型小试-glm-4-9b-chat-快速体验(一)
深度学习·语言模型·自然语言处理
YRr YRr2 小时前
深度学习:Transformer Decoder详解
人工智能·深度学习·transformer
Shy9604182 小时前
Bert完形填空
python·深度学习·bert
老艾的AI世界2 小时前
新一代AI换脸更自然,DeepLiveCam下载介绍(可直播)
图像处理·人工智能·深度学习·神经网络·目标检测·机器学习·ai换脸·视频换脸·直播换脸·图片换脸
浊酒南街3 小时前
吴恩达深度学习笔记:卷积神经网络(Foundations of Convolutional Neural Networks)4.9-4.10
人工智能·深度学习·神经网络·cnn