自定义数据集 实例分割 MaskFormer+SOLOV2

facebookresearch/Mask2Former: Code release for "Masked-attention Mask Transformer for Universal Image Segmentation" (github.com

facebookresearch/Mask2Former: Code release for "Masked-attention Mask Transformer for Universal Image Segmentation" (github.com

WXinlong/SOLO:用于实例分割的 SOLO 和 SOLOv2,ECCV 2020 和 NeurIPS 2020。 (github.com)

1.MaskFormer

1.1 环境配置 Ubuntu20.04+cuda11.1

conda create --name mask2former python=3.8 -y

conda activate mask2former

conda install pytorch==1.9.0 torchvision==0.10.0 cudatoolkit=11.1 -c pytorch -c nvidia

pip install -U opencv-python

git clone git@github.com:facebookresearch/detectron2.git

上面这个git不好用的话就手动下载这个代码

cd detectron2

pip install -e .

pip install git+https://github.com/cocodataset/panopticapi.git

cd ..

git clone git@github.com:facebookresearch/Mask2Former.git

同理,可以手动下载这个源码

cd Mask2Former

pip install -r requirements.txt

cd mask2former/modeling/pixel_decoder/ops

sh make.sh

上述内容就是完整的配置环境的过程

1.2 数据集预处理

1.2.1 划分为训练集和验证集

import os
import json
import shutil
from sklearn.model_selection import train_test_split

# 文件夹路径
img_folder = '/hy-tmp/pic'
mask_folder = '/hy-tmp/cv2_mask'
output_dir = '/hy-tmp/output'

# 创建输出文件夹
train_img_folder = os.path.join(output_dir, 'train2017')
train_mask_folder = os.path.join(output_dir, 'train2017_masks')
val_img_folder = os.path.join(output_dir, 'val2017')
val_mask_folder = os.path.join(output_dir, 'val2017_masks')

os.makedirs(train_img_folder, exist_ok=True)
os.makedirs(train_mask_folder, exist_ok=True)
os.makedirs(val_img_folder, exist_ok=True)
os.makedirs(val_mask_folder, exist_ok=True)

# 读取所有图像文件名
img_files = [f for f in os.listdir(img_folder) if os.path.isfile(os.path.join(img_folder, f))]

# 使用 train_test_split 划分数据集
train_files, val_files = train_test_split(img_files, test_size=0.2, random_state=42)

# 复制文件到相应的文件夹
for img_filename in train_files:
    img_name, img_ext = os.path.splitext(img_filename)
    shutil.copy(os.path.join(img_folder, img_filename), os.path.join(train_img_folder, img_filename))
    shutil.copy(os.path.join(mask_folder, img_name + '.png'), os.path.join(train_mask_folder, img_name + '.png'))

for img_filename in val_files:
    img_name, img_ext = os.path.splitext(img_filename)
    shutil.copy(os.path.join(img_folder, img_filename), os.path.join(val_img_folder, img_filename))
    shutil.copy(os.path.join(mask_folder, img_name + '.png'), os.path.join(val_mask_folder, img_name + '.png'))

print("Dataset split completed.")

1.2.2 转换为json格式

import os
import json
import cv2
import numpy as np
from pycocotools import mask as coco_mask
from PIL import Image

# 文件夹路径
img_folder = '/hy-tmp/pic'
mask_folder = '/hy-tmp/cv2_mask'
output_dir = '/hy-tmp/1'
train_img_folder = '/hy-tmp/output/train2017'
train_mask_folder = '/hy-tmp/output/train2017_masks'
val_img_folder = '/hy-tmp/output/val2017'
val_mask_folder = '/hy-tmp/output/val2017_masks'

# 初始化 COCO 格式字典
def init_coco_format():
    return {
        "images": [],
        "annotations": [],
        "categories": [{"id": 1, "name": "object", "supercategory": "object"}]
    }

def create_annotations(img_folder, mask_folder, output_json_path):
    coco_format = init_coco_format()
    annotation_id = 1
    image_id = 1

    for img_filename in os.listdir(img_folder):
        img_name, img_ext = os.path.splitext(img_filename)
        img_path = os.path.join(img_folder, img_filename)
        mask_path = os.path.join(mask_folder, img_name + '.png')

        if not os.path.exists(mask_path):
            continue

        # 读取图像
        img = Image.open(img_path)
        width, height = img.size

        # 生成图像信息
        image_info = {
            "id": image_id,
            "file_name": img_filename,
            "width": width,
            "height": height
        }
        coco_format["images"].append(image_info)

        # 读取掩码
        mask = Image.open(mask_path)
        mask = np.array(mask)

        # 寻找掩码的边界框
        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        for contour in contours:
            x, y, w, h = cv2.boundingRect(contour)
            bbox = [x, y, w, h]

            # 生成掩码的二进制格式
            binary_mask = np.zeros((height, width), dtype=np.uint8)
            cv2.drawContours(binary_mask, [contour], -1, 1, -1)
            encoded_mask = coco_mask.encode(np.asfortranarray(binary_mask))

            # 将 RLE 编码转换为可序列化的格式
            rle = {
                'counts': encoded_mask['counts'].decode('utf-8'),  # 转换为字符串
                'size': encoded_mask['size']
            }

            annotation = {
                "id": annotation_id,
                "image_id": image_id,
                "category_id": 1,
                "segmentation": rle,
                "area": float(coco_mask.area(encoded_mask)),
                "bbox": bbox,
                "iscrowd": 0
            }

            coco_format["annotations"].append(annotation)
            annotation_id += 1

        image_id += 1

    # 保存到JSON文件
    with open(output_json_path, 'w') as f:
        json.dump(coco_format, f)

    print(f"COCO format annotation file created at {output_json_path}.")

# 创建注释文件
create_annotations(train_img_folder, train_mask_folder, os.path.join(output_dir, 'annotations/instances_train2017.json'))
create_annotations(val_img_folder, val_mask_folder, os.path.join(output_dir, 'annotations/instances_val2017.json'))

1.2.3 实例分割数据集

import os
import json
import cv2
import numpy as np
from pycocotools import mask as coco_mask
from PIL import Image
from sklearn.model_selection import train_test_split
import shutil

# 文件夹路径
img_folder = '/hy-tmp/pic'
mask_folder = '/hy-tmp/cv2_mask'
output_dir = '/hy-tmp/json'

# 创建输出文件夹
train_img_folder = os.path.join(output_dir, 'train2017')
train_mask_folder = os.path.join(output_dir, 'train2017_masks')
val_img_folder = os.path.join(output_dir, 'val2017')
val_mask_folder = os.path.join(output_dir, 'val2017_masks')

os.makedirs(train_img_folder, exist_ok=True)
os.makedirs(train_mask_folder, exist_ok=True)
os.makedirs(val_img_folder, exist_ok=True)
os.makedirs(val_mask_folder, exist_ok=True)

# 读取所有图像文件名
img_files = [f for f in os.listdir(img_folder) if os.path.isfile(os.path.join(img_folder, f))]

# 使用 train_test_split 划分数据集
train_files, val_files = train_test_split(img_files, test_size=0.2, random_state=42)

# 复制文件到相应的文件夹
for img_filename in train_files:
    img_name, img_ext = os.path.splitext(img_filename)
    shutil.copy(os.path.join(img_folder, img_filename), os.path.join(train_img_folder, img_filename))
    shutil.copy(os.path.join(mask_folder, img_name + '.png'), os.path.join(train_mask_folder, img_name + '.png'))

for img_filename in val_files:
    img_name, img_ext = os.path.splitext(img_filename)
    shutil.copy(os.path.join(img_folder, img_filename), os.path.join(val_img_folder, img_filename))
    shutil.copy(os.path.join(mask_folder, img_name + '.png'), os.path.join(val_mask_folder, img_name + '.png'))

print("Dataset split completed.")

# 初始化 COCO 格式字典
def init_coco_format():
    return {
        "images": [],
        "annotations": [],
        "categories": [{"id": 1, "name": "object", "supercategory": "object"}]
    }

def create_instance_annotations(img_folder, mask_folder, output_json_path):
    coco_format = init_coco_format()
    annotation_id = 1
    image_id = 1

    for img_filename in os.listdir(img_folder):
        img_name, img_ext = os.path.splitext(img_filename)
        img_path = os.path.join(img_folder, img_filename)
        mask_path = os.path.join(mask_folder, img_name + '.png')

        if not os.path.exists(mask_path):
            continue

        # 读取图像
        img = Image.open(img_path)
        width, height = img.size

        # 生成图像信息
        image_info = {
            "id": image_id,
            "file_name": img_filename,
            "width": width,
            "height": height
        }
        coco_format["images"].append(image_info)

        # 读取掩码
        mask = Image.open(mask_path)
        mask = np.array(mask)

        # 寻找掩码的边界框
        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        for contour in contours:
            x, y, w, h = cv2.boundingRect(contour)
            bbox = [x, y, w, h]

            # 生成掩码的二进制格式
            binary_mask = np.zeros((height, width), dtype=np.uint8)
            cv2.drawContours(binary_mask, [contour], -1, 1, -1)
            encoded_mask = coco_mask.encode(np.asfortranarray(binary_mask))

            # 将 RLE 编码转换为可序列化的格式
            rle = {
                'counts': encoded_mask['counts'].decode('utf-8'),  # 转换为字符串
                'size': encoded_mask['size']
            }

            annotation = {
                "id": annotation_id,
                "image_id": image_id,
                "category_id": 1,
                "segmentation": rle,
                "area": float(coco_mask.area(encoded_mask)),
                "bbox": bbox,
                "iscrowd": 0
            }

            coco_format["annotations"].append(annotation)
            annotation_id += 1

        image_id += 1

    # 保存到JSON文件
    with open(output_json_path, 'w') as f:
        json.dump(coco_format, f)

    print(f"COCO format instance annotation file created at {output_json_path}.")

# 创建实例分割注释文件
create_instance_annotations(train_img_folder, train_mask_folder, os.path.join(output_dir, 'annotations/instances_train2017.json'))
create_instance_annotations(val_img_folder, val_mask_folder, os.path.join(output_dir, 'annotations/instances_val2017.json'))

1.2.4 全景分割数据集

import os
import json
import cv2
import numpy as np
from pycocotools import mask as coco_mask
from PIL import Image

# 初始化 COCO 格式字典
def init_coco_format():
    return {
        "images": [],
        "annotations": [],
        "categories": [{"id": 1, "name": "object", "supercategory": "object"}],
        "licenses": [],
        "info": {
            "year": 2023,
            "version": "1.0",
            "description": "COCO Panoptic Dataset",
            "contributor": "",
            "url": "",
            "date_created": "2023-01-01"
        }
    }

def create_panoptic_annotations(img_folder, mask_folder, output_json_path):
    coco_format = init_coco_format()
    panoptic_annotations = []
    annotation_id = 1
    image_id = 1

    for img_filename in os.listdir(img_folder):
        img_name, img_ext = os.path.splitext(img_filename)
        img_path = os.path.join(img_folder, img_filename)
        mask_path = os.path.join(mask_folder, img_name + '.png')

        if not os.path.exists(mask_path):
            continue

        # 读取图像
        img = Image.open(img_path)
        width, height = img.size

        # 生成图像信息
        image_info = {
            "id": image_id,
            "file_name": img_filename,
            "width": width,
            "height": height
        }
        coco_format["images"].append(image_info)

        # 读取掩码
        mask = Image.open(mask_path)
        mask = np.array(mask)

        # 生成全景注释信息
        panoptic_annotation = {
            "image_id": image_id,
            "file_name": img_filename.replace('.jpg', '.png'),
            "segments_info": []
        }

        # 寻找掩码的边界框
        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        for contour in contours:
            x, y, w, h = cv2.boundingRect(contour)
            bbox = [x, y, w, h]

            # 生成掩码的二进制格式
            binary_mask = np.zeros((height, width), dtype=np.uint8)
            cv2.drawContours(binary_mask, [contour], -1, 1, -1)
            encoded_mask = coco_mask.encode(np.asfortranarray(binary_mask))

            # 将 RLE 编码转换为可序列化的格式
            rle = {
                'counts': encoded_mask['counts'].decode('utf-8'),  # 转换为字符串
                'size': encoded_mask['size']
            }

            segment_info = {
                "id": annotation_id,
                "category_id": 1,
                "iscrowd": 0,
                "bbox": bbox,
                "area": float(coco_mask.area(encoded_mask)),
                "segmentation": rle
            }

            panoptic_annotation["segments_info"].append(segment_info)
            annotation_id += 1

        panoptic_annotations.append(panoptic_annotation)
        image_id += 1

    # 保存到JSON文件
    os.makedirs(os.path.dirname(output_json_path), exist_ok=True)
    with open(output_json_path, 'w') as f:
        json.dump({
            "images": coco_format["images"],
            "annotations": panoptic_annotations,
            "categories": coco_format["categories"],
            "licenses": coco_format["licenses"],
            "info": coco_format["info"]
        }, f)

    print(f"COCO format panoptic annotation file created at {output_json_path}.")

# 创建全景注释文件
create_panoptic_annotations(train_img_folder, train_mask_folder, os.path.join(output_dir, 'annotations/panoptic_train2017.json'))
create_panoptic_annotations(val_img_folder, val_mask_folder, os.path.join(output_dir, 'annotations/panoptic_val2017.json'))

1.2.5 全景分割数据集找到对应的图片(.png格式)

import json
import os
import shutil

# 定义文件路径
json_path = 'path/to/panoptic_train2017.json'
img_dir = 'path/to/img'
output_dir = 'path/to/output_folder'

# 如果输出文件夹不存在,则创建
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# 加载 JSON 文件
with open(json_path, 'r', encoding='utf-8') as f:
    data = json.load(f)

# 假设 JSON 文件中有一个键 "images",其中包含每个图像的相关信息
image_info_list = data.get('images', [])

# 遍历每个图像信息并复制对应的图片到输出文件夹
for image_info in image_info_list:
    # 假设每个图像信息包含 "file_name" 键
    file_name = image_info.get('file_name')
    
    if file_name:
        # 构建原始图片路径
        img_path = os.path.join(img_dir, file_name)
        
        # 构建目标图片路径
        output_path = os.path.join(output_dir, file_name)
        
        # 检查文件是否存在并复制
        if os.path.isfile(img_path):
            shutil.copy(img_path, output_path)
            print(f"图片 {file_name} 已复制到 {output_path}")
        else:
            print(f"图片 {file_name} 不存在于路径 {img_path}")
    else:
        print("图像信息中缺少 'file_name' 键")

1.3训练过程

单卡训练太慢了,需要好几天,选择多卡训练

单卡训练的代码

python train_net.py \
  --config-file configs/coco/panoptic-segmentation/maskformer2_R50_bs16_50ep.yaml \
  --num-gpus 1 SOLVER.IMS_PER_BATCH 2 SOLVER.BASE_LR 0.00025

下图为正式运行的过程

多卡训练的代码

遇到的小插曲

报错

TypeError: init() got an unexpected keyword argument 'dtype'

修改events.py文件以及

from packaging.version import parse as LooseVersion

1.4测试过程

2. SOLOV2

2.1 环境配置

2.1.1选择pytorch版本

torch 1.5.0+cu101 pypi_0 pypi

2.1.2 编译

git clone https://github.com/WXinlong/SOLO.git

cd SOLO

pip install -r requirements/build.txt

pip install "git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI"

pip install -v -e . # or "python setup.py develop"

2.2 数据集预处理

将数据集转换为coco数据集格式

2.2.1 把img以及对应的mask转换为train_2017.json以及val_2017.json格式

代码如下

import os
import json
import cv2
import numpy as np
from pycocotools import mask as coco_mask
from PIL import Image
from sklearn.model_selection import train_test_split

# 文件夹路径
img_folder = '/hy-tmp/pic'
mask_folder = '/hy-tmp/cv2_mask'
output_train_json_path = '/hy-tmp/coco/annotations/instances_train2017.json'
output_val_json_path = '/hy-tmp/coco/annotations/instances_val2017.json'

# 初始化 COCO 格式字典
def init_coco_format():
    return {
        "images": [],
        "annotations": [],
        "categories": [{"id": 1, "name": "object", "supercategory": "object"}]
    }

# 读取文件名
img_files = [f for f in os.listdir(img_folder) if os.path.isfile(os.path.join(img_folder, f))]
train_files, val_files = train_test_split(img_files, test_size=0.2, random_state=42)

def create_annotations(files, coco_format, start_image_id=1, start_annotation_id=1):
    image_id = start_image_id
    annotation_id = start_annotation_id

    for img_filename in files:
        img_name, img_ext = os.path.splitext(img_filename)
        img_path = os.path.join(img_folder, img_filename)
        mask_path = os.path.join(mask_folder, img_name + '.png')  # 假设掩码是 .png 格式

        if not os.path.exists(mask_path):
            continue

        # 读取图像
        img = Image.open(img_path)
        width, height = img.size

        # 生成图像信息
        image_info = {
            "id": image_id,
            "file_name": img_filename,
            "width": width,
            "height": height
        }
        coco_format["images"].append(image_info)

        # 读取掩码
        mask = Image.open(mask_path)
        mask = np.array(mask)

        # 寻找掩码的边界框
        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        for contour in contours:
            x, y, w, h = cv2.boundingRect(contour)
            bbox = [x, y, w, h]

            # 生成掩码的二进制格式
            binary_mask = np.zeros((height, width), dtype=np.uint8)
            cv2.drawContours(binary_mask, [contour], -1, 1, -1)
            encoded_mask = coco_mask.encode(np.asfortranarray(binary_mask))

            # 将 RLE 编码转换为可序列化的格式
            rle = {
                'counts': encoded_mask['counts'].decode('utf-8'),  # 转换为字符串
                'size': encoded_mask['size']
            }

            annotation = {
                "id": annotation_id,
                "image_id": image_id,
                "category_id": 1,
                "segmentation": rle,
                "area": float(coco_mask.area(encoded_mask)),
                "bbox": bbox,
                "iscrowd": 0
            }

            coco_format["annotations"].append(annotation)
            annotation_id += 1

        image_id += 1

    return coco_format, image_id, annotation_id

# 初始化 COCO 格式字典
coco_train_format = init_coco_format()
coco_val_format = init_coco_format()

# 创建训练集注释
coco_train_format, train_image_id, train_annotation_id = create_annotations(train_files, coco_train_format)

# 创建验证集注释
coco_val_format, _, _ = create_annotations(val_files, coco_val_format, start_image_id=train_image_id, start_annotation_id=train_annotation_id)

# 保存训练集注释到JSON文件
with open(output_train_json_path, 'w') as f:
    json.dump(coco_train_format, f)

# 保存验证集注释到JSON文件
with open(output_val_json_path, 'w') as f:
    json.dump(coco_val_format, f)

print("COCO format annotation files created.")

2.2.2 把数据集划分为train和val

import os
import json
import shutil

# 文件夹路径
img_folder = '/hy-tmp/pic'
val_img_folder = '/hy-tmp/coco/train2017'
val_json_path = '/hy-tmp/coco/annotations/instances_train2017.json'

# 创建 val2017 文件夹,如果不存在
os.makedirs(val_img_folder, exist_ok=True)

# 读取 val2017.json 文件
with open(val_json_path, 'r') as f:
    val_data = json.load(f)

# 获取 val2017.json 中所有图片文件名
val_img_files = [img_info['file_name'] for img_info in val_data['images']]

# 复制图片到 val2017 文件夹
for img_filename in val_img_files:
    src_path = os.path.join(img_folder, img_filename)
    dst_path = os.path.join(val_img_folder, img_filename)
    if os.path.exists(src_path):
        shutil.copy2(src_path, dst_path)
        print(f"Copied: {src_path} to {dst_path}")
    else:
        print(f"File not found: {src_path}")

print("All val2017 images have been copied.")

2.2.3 如果img和mask不是成对数据

首先运行下面的代码,然后在运行2.2.1以及2.2.2

import os

# 文件夹路径
img_folder = '/hy-tmp/pic'
mask_folder = '/hy-tmp/cv2_mask'

# 获取文件夹中所有文件名(不包括扩展名)
img_files = {os.path.splitext(f)[0] for f in os.listdir(img_folder) if os.path.isfile(os.path.join(img_folder, f))}
mask_files = {os.path.splitext(f)[0] for f in os.listdir(mask_folder) if os.path.isfile(os.path.join(mask_folder, f))}

# 找出在img文件夹中但不在mask文件夹中的文件
img_files_to_delete = img_files - mask_files
# 找出在mask文件夹中但不在img文件夹中的文件
mask_files_to_delete = mask_files - img_files

# 删除img文件夹中的不对应文件
for file in img_files_to_delete:
    file_path = os.path.join(img_folder, file + '.jpg')  # 假设文件扩展名是.jpg,根据实际情况修改
    if os.path.exists(file_path):
        os.remove(file_path)
        print(f"Deleted from img: {file_path}")

# 删除mask文件夹中的不对应文件
for file in mask_files_to_delete:
    file_path = os.path.join(mask_folder, file + '.png')  # 假设文件扩展名是.png,根据实际情况修改
    if os.path.exists(file_path):
        os.remove(file_path)
        print(f"Deleted from mask: {file_path}")

print("Deletion completed.")

2.3 使用单卡进行训练

python tools/train.py configs/solov2/solov2_r50_fpn_8gpu_1x.py 

成功运行界面如下图所示,目前官方给的epoch是12个,未修改

2.4 使用单卡进行测试

python tools/test_ins.py configs/solov2/solov2_r50_fpn_8gpu_1x.py /hy-tmp/SOLO/work_dirs/solov2_release_r50_fpn_8gpu_1x/latest.pth --show --out results_solo.pkl --eval segm

结果如下图

2.5 使用单卡进行可视化

python tools/test_ins_vis.py configs/solov2/solov2_r50_fpn_8gpu_1x.py  /hy-tmp/SOLO/work_dirs/solov2_release_r50_fpn_8gpu_1x/latest.pth --show --save_dir  work_dirs/vis_solo

结果在work_dirs/vis_solo文件夹下面

相关推荐
赛男丨木子丿小喵4 分钟前
服务器raid5坏盘-换盘-修复阵列过程
运维·服务器
天下·第二9 分钟前
使用【docker】简单部署打包构建好的镜像并运行python项目
python·docker·eureka
来自于狂人18 分钟前
ansible常用命令
linux·运维·ansible
念晚91720 分钟前
Linux各种命令——tac命令,more 命令, less命令,head命令,tail命令,file 命令, stat 命令
linux·运维·服务器·前端·网络·数据库·less
左手の明天24 分钟前
【Python网络爬虫案例】python爬虫之模拟登录
开发语言·爬虫·python·模拟登录
诸葛悠闲38 分钟前
【数据结构(邓俊辉)学习笔记】二叉搜索树04——AVL树
数据结构·笔记·学习
铁蛋Q40 分钟前
Linux基础指令
linux·运维·服务器
2401_8554507942 分钟前
Pc端多功能视频混剪工具/便携版打开即用
经验分享·笔记·学习
攻城狮方叔1 小时前
共有5部手机,如果通过服务器让1号手机执行打开 “闲鱼.js“ 脚本
服务器·javascript·vscode
CHNMSCS1 小时前
Django项目 - 合并PDF文件
python·django·pdf