自定义数据集实例分割 MaskFormer+SOLOV2

facebookresearch/Mask2Former: Code release for "Masked-attention Mask Transformer for Universal Image Segmentation" (github.com

WXinlong/SOLO：用于实例分割的 SOLO 和 SOLOv2，ECCV 2020 和 NeurIPS 2020。 (github.com)

1.MaskFormer

1.1 环境配置 Ubuntu20.04+cuda11.1

conda create --name mask2former python=3.8 -y

conda activate mask2former

conda install pytorch==1.9.0 torchvision==0.10.0 cudatoolkit=11.1 -c pytorch -c nvidia

pip install -U opencv-python

git clone git@github.com:facebookresearch/detectron2.git

上面这个git不好用的话就手动下载这个代码

cd detectron2

pip install -e .

pip install git+https://github.com/cocodataset/panopticapi.git

cd ..

git clone git@github.com:facebookresearch/Mask2Former.git

同理，可以手动下载这个源码

cd Mask2Former

pip install -r requirements.txt

cd mask2former/modeling/pixel_decoder/ops

sh make.sh

上述内容就是完整的配置环境的过程

1.2 数据集预处理

1.2.1 划分为训练集和验证集

复制代码

import os
import json
import shutil
from sklearn.model_selection import train_test_split

# 文件夹路径
img_folder = '/hy-tmp/pic'
mask_folder = '/hy-tmp/cv2_mask'
output_dir = '/hy-tmp/output'

# 创建输出文件夹
train_img_folder = os.path.join(output_dir, 'train2017')
train_mask_folder = os.path.join(output_dir, 'train2017_masks')
val_img_folder = os.path.join(output_dir, 'val2017')
val_mask_folder = os.path.join(output_dir, 'val2017_masks')

os.makedirs(train_img_folder, exist_ok=True)
os.makedirs(train_mask_folder, exist_ok=True)
os.makedirs(val_img_folder, exist_ok=True)
os.makedirs(val_mask_folder, exist_ok=True)

# 读取所有图像文件名
img_files = [f for f in os.listdir(img_folder) if os.path.isfile(os.path.join(img_folder, f))]

# 使用 train_test_split 划分数据集
train_files, val_files = train_test_split(img_files, test_size=0.2, random_state=42)

# 复制文件到相应的文件夹
for img_filename in train_files:
    img_name, img_ext = os.path.splitext(img_filename)
    shutil.copy(os.path.join(img_folder, img_filename), os.path.join(train_img_folder, img_filename))
    shutil.copy(os.path.join(mask_folder, img_name + '.png'), os.path.join(train_mask_folder, img_name + '.png'))

for img_filename in val_files:
    img_name, img_ext = os.path.splitext(img_filename)
    shutil.copy(os.path.join(img_folder, img_filename), os.path.join(val_img_folder, img_filename))
    shutil.copy(os.path.join(mask_folder, img_name + '.png'), os.path.join(val_mask_folder, img_name + '.png'))

print("Dataset split completed.")

1.2.2 转换为json格式

复制代码

import os
import json
import cv2
import numpy as np
from pycocotools import mask as coco_mask
from PIL import Image

# 文件夹路径
img_folder = '/hy-tmp/pic'
mask_folder = '/hy-tmp/cv2_mask'
output_dir = '/hy-tmp/1'
train_img_folder = '/hy-tmp/output/train2017'
train_mask_folder = '/hy-tmp/output/train2017_masks'
val_img_folder = '/hy-tmp/output/val2017'
val_mask_folder = '/hy-tmp/output/val2017_masks'

# 初始化 COCO 格式字典
def init_coco_format():
    return {
        "images": [],
        "annotations": [],
        "categories": [{"id": 1, "name": "object", "supercategory": "object"}]
    }

def create_annotations(img_folder, mask_folder, output_json_path):
    coco_format = init_coco_format()
    annotation_id = 1
    image_id = 1

    for img_filename in os.listdir(img_folder):
        img_name, img_ext = os.path.splitext(img_filename)
        img_path = os.path.join(img_folder, img_filename)
        mask_path = os.path.join(mask_folder, img_name + '.png')

        if not os.path.exists(mask_path):
            continue

        # 读取图像
        img = Image.open(img_path)
        width, height = img.size

        # 生成图像信息
        image_info = {
            "id": image_id,
            "file_name": img_filename,
            "width": width,
            "height": height
        }
        coco_format["images"].append(image_info)

        # 读取掩码
        mask = Image.open(mask_path)
        mask = np.array(mask)

        # 寻找掩码的边界框
        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        for contour in contours:
            x, y, w, h = cv2.boundingRect(contour)
            bbox = [x, y, w, h]

            # 生成掩码的二进制格式
            binary_mask = np.zeros((height, width), dtype=np.uint8)
            cv2.drawContours(binary_mask, [contour], -1, 1, -1)
            encoded_mask = coco_mask.encode(np.asfortranarray(binary_mask))

            # 将 RLE 编码转换为可序列化的格式
            rle = {
                'counts': encoded_mask['counts'].decode('utf-8'),  # 转换为字符串
                'size': encoded_mask['size']
            }

            annotation = {
                "id": annotation_id,
                "image_id": image_id,
                "category_id": 1,
                "segmentation": rle,
                "area": float(coco_mask.area(encoded_mask)),
                "bbox": bbox,
                "iscrowd": 0
            }

            coco_format["annotations"].append(annotation)
            annotation_id += 1

        image_id += 1

    # 保存到JSON文件
    with open(output_json_path, 'w') as f:
        json.dump(coco_format, f)

    print(f"COCO format annotation file created at {output_json_path}.")

# 创建注释文件
create_annotations(train_img_folder, train_mask_folder, os.path.join(output_dir, 'annotations/instances_train2017.json'))
create_annotations(val_img_folder, val_mask_folder, os.path.join(output_dir, 'annotations/instances_val2017.json'))

1.2.3 实例分割数据集

复制代码

import os
import json
import cv2
import numpy as np
from pycocotools import mask as coco_mask
from PIL import Image
from sklearn.model_selection import train_test_split
import shutil

# 文件夹路径
img_folder = '/hy-tmp/pic'
mask_folder = '/hy-tmp/cv2_mask'
output_dir = '/hy-tmp/json'

# 创建输出文件夹
train_img_folder = os.path.join(output_dir, 'train2017')
train_mask_folder = os.path.join(output_dir, 'train2017_masks')
val_img_folder = os.path.join(output_dir, 'val2017')
val_mask_folder = os.path.join(output_dir, 'val2017_masks')

os.makedirs(train_img_folder, exist_ok=True)
os.makedirs(train_mask_folder, exist_ok=True)
os.makedirs(val_img_folder, exist_ok=True)
os.makedirs(val_mask_folder, exist_ok=True)

# 读取所有图像文件名
img_files = [f for f in os.listdir(img_folder) if os.path.isfile(os.path.join(img_folder, f))]

# 使用 train_test_split 划分数据集
train_files, val_files = train_test_split(img_files, test_size=0.2, random_state=42)

# 复制文件到相应的文件夹
for img_filename in train_files:
    img_name, img_ext = os.path.splitext(img_filename)
    shutil.copy(os.path.join(img_folder, img_filename), os.path.join(train_img_folder, img_filename))
    shutil.copy(os.path.join(mask_folder, img_name + '.png'), os.path.join(train_mask_folder, img_name + '.png'))

for img_filename in val_files:
    img_name, img_ext = os.path.splitext(img_filename)
    shutil.copy(os.path.join(img_folder, img_filename), os.path.join(val_img_folder, img_filename))
    shutil.copy(os.path.join(mask_folder, img_name + '.png'), os.path.join(val_mask_folder, img_name + '.png'))

print("Dataset split completed.")

# 初始化 COCO 格式字典
def init_coco_format():
    return {
        "images": [],
        "annotations": [],
        "categories": [{"id": 1, "name": "object", "supercategory": "object"}]
    }

def create_instance_annotations(img_folder, mask_folder, output_json_path):
    coco_format = init_coco_format()
    annotation_id = 1
    image_id = 1

    for img_filename in os.listdir(img_folder):
        img_name, img_ext = os.path.splitext(img_filename)
        img_path = os.path.join(img_folder, img_filename)
        mask_path = os.path.join(mask_folder, img_name + '.png')

        if not os.path.exists(mask_path):
            continue

        # 读取图像
        img = Image.open(img_path)
        width, height = img.size

        # 生成图像信息
        image_info = {
            "id": image_id,
            "file_name": img_filename,
            "width": width,
            "height": height
        }
        coco_format["images"].append(image_info)

        # 读取掩码
        mask = Image.open(mask_path)
        mask = np.array(mask)

        # 寻找掩码的边界框
        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        for contour in contours:
            x, y, w, h = cv2.boundingRect(contour)
            bbox = [x, y, w, h]

            # 生成掩码的二进制格式
            binary_mask = np.zeros((height, width), dtype=np.uint8)
            cv2.drawContours(binary_mask, [contour], -1, 1, -1)
            encoded_mask = coco_mask.encode(np.asfortranarray(binary_mask))

            # 将 RLE 编码转换为可序列化的格式
            rle = {
                'counts': encoded_mask['counts'].decode('utf-8'),  # 转换为字符串
                'size': encoded_mask['size']
            }

            annotation = {
                "id": annotation_id,
                "image_id": image_id,
                "category_id": 1,
                "segmentation": rle,
                "area": float(coco_mask.area(encoded_mask)),
                "bbox": bbox,
                "iscrowd": 0
            }

            coco_format["annotations"].append(annotation)
            annotation_id += 1

        image_id += 1

    # 保存到JSON文件
    with open(output_json_path, 'w') as f:
        json.dump(coco_format, f)

    print(f"COCO format instance annotation file created at {output_json_path}.")

# 创建实例分割注释文件
create_instance_annotations(train_img_folder, train_mask_folder, os.path.join(output_dir, 'annotations/instances_train2017.json'))
create_instance_annotations(val_img_folder, val_mask_folder, os.path.join(output_dir, 'annotations/instances_val2017.json'))

1.2.4 全景分割数据集

复制代码

import os
import json
import cv2
import numpy as np
from pycocotools import mask as coco_mask
from PIL import Image

# 初始化 COCO 格式字典
def init_coco_format():
    return {
        "images": [],
        "annotations": [],
        "categories": [{"id": 1, "name": "object", "supercategory": "object"}],
        "licenses": [],
        "info": {
            "year": 2023,
            "version": "1.0",
            "description": "COCO Panoptic Dataset",
            "contributor": "",
            "url": "",
            "date_created": "2023-01-01"
        }
    }

def create_panoptic_annotations(img_folder, mask_folder, output_json_path):
    coco_format = init_coco_format()
    panoptic_annotations = []
    annotation_id = 1
    image_id = 1

    for img_filename in os.listdir(img_folder):
        img_name, img_ext = os.path.splitext(img_filename)
        img_path = os.path.join(img_folder, img_filename)
        mask_path = os.path.join(mask_folder, img_name + '.png')

        if not os.path.exists(mask_path):
            continue

        # 读取图像
        img = Image.open(img_path)
        width, height = img.size

        # 生成图像信息
        image_info = {
            "id": image_id,
            "file_name": img_filename,
            "width": width,
            "height": height
        }
        coco_format["images"].append(image_info)

        # 读取掩码
        mask = Image.open(mask_path)
        mask = np.array(mask)

        # 生成全景注释信息
        panoptic_annotation = {
            "image_id": image_id,
            "file_name": img_filename.replace('.jpg', '.png'),
            "segments_info": []
        }

        # 寻找掩码的边界框
        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        for contour in contours:
            x, y, w, h = cv2.boundingRect(contour)
            bbox = [x, y, w, h]

            # 生成掩码的二进制格式
            binary_mask = np.zeros((height, width), dtype=np.uint8)
            cv2.drawContours(binary_mask, [contour], -1, 1, -1)
            encoded_mask = coco_mask.encode(np.asfortranarray(binary_mask))

            # 将 RLE 编码转换为可序列化的格式
            rle = {
                'counts': encoded_mask['counts'].decode('utf-8'),  # 转换为字符串
                'size': encoded_mask['size']
            }

            segment_info = {
                "id": annotation_id,
                "category_id": 1,
                "iscrowd": 0,
                "bbox": bbox,
                "area": float(coco_mask.area(encoded_mask)),
                "segmentation": rle
            }

            panoptic_annotation["segments_info"].append(segment_info)
            annotation_id += 1

        panoptic_annotations.append(panoptic_annotation)
        image_id += 1

    # 保存到JSON文件
    os.makedirs(os.path.dirname(output_json_path), exist_ok=True)
    with open(output_json_path, 'w') as f:
        json.dump({
            "images": coco_format["images"],
            "annotations": panoptic_annotations,
            "categories": coco_format["categories"],
            "licenses": coco_format["licenses"],
            "info": coco_format["info"]
        }, f)

    print(f"COCO format panoptic annotation file created at {output_json_path}.")

# 创建全景注释文件
create_panoptic_annotations(train_img_folder, train_mask_folder, os.path.join(output_dir, 'annotations/panoptic_train2017.json'))
create_panoptic_annotations(val_img_folder, val_mask_folder, os.path.join(output_dir, 'annotations/panoptic_val2017.json'))

1.2.5 全景分割数据集找到对应的图片（.png格式）

复制代码

import json
import os
import shutil

# 定义文件路径
json_path = 'path/to/panoptic_train2017.json'
img_dir = 'path/to/img'
output_dir = 'path/to/output_folder'

# 如果输出文件夹不存在，则创建
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# 加载 JSON 文件
with open(json_path, 'r', encoding='utf-8') as f:
    data = json.load(f)

# 假设 JSON 文件中有一个键 "images"，其中包含每个图像的相关信息
image_info_list = data.get('images', [])

# 遍历每个图像信息并复制对应的图片到输出文件夹
for image_info in image_info_list:
    # 假设每个图像信息包含 "file_name" 键
    file_name = image_info.get('file_name')
    
    if file_name:
        # 构建原始图片路径
        img_path = os.path.join(img_dir, file_name)
        
        # 构建目标图片路径
        output_path = os.path.join(output_dir, file_name)
        
        # 检查文件是否存在并复制
        if os.path.isfile(img_path):
            shutil.copy(img_path, output_path)
            print(f"图片 {file_name} 已复制到 {output_path}")
        else:
            print(f"图片 {file_name} 不存在于路径 {img_path}")
    else:
        print("图像信息中缺少 'file_name' 键")

1.3训练过程

单卡训练太慢了，需要好几天，选择多卡训练

单卡训练的代码

复制代码

python train_net.py \
  --config-file configs/coco/panoptic-segmentation/maskformer2_R50_bs16_50ep.yaml \
  --num-gpus 1 SOLVER.IMS_PER_BATCH 2 SOLVER.BASE_LR 0.00025

下图为正式运行的过程

多卡训练的代码

遇到的小插曲

报错

TypeError: init() got an unexpected keyword argument 'dtype'

修改events.py文件以及

复制代码

from packaging.version import parse as LooseVersion

1.4测试过程

2. SOLOV2

2.1 环境配置

2.1.1选择pytorch版本

torch 1.5.0+cu101 pypi_0 pypi

2.1.2 编译

git clone https://github.com/WXinlong/SOLO.git

cd SOLO

pip install -r requirements/build.txt

pip install "git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI"

pip install -v -e . # or "python setup.py develop"

2.2 数据集预处理

将数据集转换为coco数据集格式

2.2.1 把img以及对应的mask转换为train_2017.json以及val_2017.json格式

代码如下

复制代码

import os
import json
import cv2
import numpy as np
from pycocotools import mask as coco_mask
from PIL import Image
from sklearn.model_selection import train_test_split

# 文件夹路径
img_folder = '/hy-tmp/pic'
mask_folder = '/hy-tmp/cv2_mask'
output_train_json_path = '/hy-tmp/coco/annotations/instances_train2017.json'
output_val_json_path = '/hy-tmp/coco/annotations/instances_val2017.json'

# 初始化 COCO 格式字典
def init_coco_format():
    return {
        "images": [],
        "annotations": [],
        "categories": [{"id": 1, "name": "object", "supercategory": "object"}]
    }

# 读取文件名
img_files = [f for f in os.listdir(img_folder) if os.path.isfile(os.path.join(img_folder, f))]
train_files, val_files = train_test_split(img_files, test_size=0.2, random_state=42)

def create_annotations(files, coco_format, start_image_id=1, start_annotation_id=1):
    image_id = start_image_id
    annotation_id = start_annotation_id

    for img_filename in files:
        img_name, img_ext = os.path.splitext(img_filename)
        img_path = os.path.join(img_folder, img_filename)
        mask_path = os.path.join(mask_folder, img_name + '.png')  # 假设掩码是 .png 格式

        if not os.path.exists(mask_path):
            continue

        # 读取图像
        img = Image.open(img_path)
        width, height = img.size

        # 生成图像信息
        image_info = {
            "id": image_id,
            "file_name": img_filename,
            "width": width,
            "height": height
        }
        coco_format["images"].append(image_info)

        # 读取掩码
        mask = Image.open(mask_path)
        mask = np.array(mask)

        # 寻找掩码的边界框
        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        for contour in contours:
            x, y, w, h = cv2.boundingRect(contour)
            bbox = [x, y, w, h]

            # 生成掩码的二进制格式
            binary_mask = np.zeros((height, width), dtype=np.uint8)
            cv2.drawContours(binary_mask, [contour], -1, 1, -1)
            encoded_mask = coco_mask.encode(np.asfortranarray(binary_mask))

            # 将 RLE 编码转换为可序列化的格式
            rle = {
                'counts': encoded_mask['counts'].decode('utf-8'),  # 转换为字符串
                'size': encoded_mask['size']
            }

            annotation = {
                "id": annotation_id,
                "image_id": image_id,
                "category_id": 1,
                "segmentation": rle,
                "area": float(coco_mask.area(encoded_mask)),
                "bbox": bbox,
                "iscrowd": 0
            }

            coco_format["annotations"].append(annotation)
            annotation_id += 1

        image_id += 1

    return coco_format, image_id, annotation_id

# 初始化 COCO 格式字典
coco_train_format = init_coco_format()
coco_val_format = init_coco_format()

# 创建训练集注释
coco_train_format, train_image_id, train_annotation_id = create_annotations(train_files, coco_train_format)

# 创建验证集注释
coco_val_format, _, _ = create_annotations(val_files, coco_val_format, start_image_id=train_image_id, start_annotation_id=train_annotation_id)

# 保存训练集注释到JSON文件
with open(output_train_json_path, 'w') as f:
    json.dump(coco_train_format, f)

# 保存验证集注释到JSON文件
with open(output_val_json_path, 'w') as f:
    json.dump(coco_val_format, f)

print("COCO format annotation files created.")

2.2.2 把数据集划分为train和val

复制代码

import os
import json
import shutil

# 文件夹路径
img_folder = '/hy-tmp/pic'
val_img_folder = '/hy-tmp/coco/train2017'
val_json_path = '/hy-tmp/coco/annotations/instances_train2017.json'

# 创建 val2017 文件夹，如果不存在
os.makedirs(val_img_folder, exist_ok=True)

# 读取 val2017.json 文件
with open(val_json_path, 'r') as f:
    val_data = json.load(f)

# 获取 val2017.json 中所有图片文件名
val_img_files = [img_info['file_name'] for img_info in val_data['images']]

# 复制图片到 val2017 文件夹
for img_filename in val_img_files:
    src_path = os.path.join(img_folder, img_filename)
    dst_path = os.path.join(val_img_folder, img_filename)
    if os.path.exists(src_path):
        shutil.copy2(src_path, dst_path)
        print(f"Copied: {src_path} to {dst_path}")
    else:
        print(f"File not found: {src_path}")

print("All val2017 images have been copied.")

2.2.3 如果img和mask不是成对数据

首先运行下面的代码，然后在运行2.2.1以及2.2.2

复制代码

import os

# 文件夹路径
img_folder = '/hy-tmp/pic'
mask_folder = '/hy-tmp/cv2_mask'

# 获取文件夹中所有文件名（不包括扩展名）
img_files = {os.path.splitext(f)[0] for f in os.listdir(img_folder) if os.path.isfile(os.path.join(img_folder, f))}
mask_files = {os.path.splitext(f)[0] for f in os.listdir(mask_folder) if os.path.isfile(os.path.join(mask_folder, f))}

# 找出在img文件夹中但不在mask文件夹中的文件
img_files_to_delete = img_files - mask_files
# 找出在mask文件夹中但不在img文件夹中的文件
mask_files_to_delete = mask_files - img_files

# 删除img文件夹中的不对应文件
for file in img_files_to_delete:
    file_path = os.path.join(img_folder, file + '.jpg')  # 假设文件扩展名是.jpg，根据实际情况修改
    if os.path.exists(file_path):
        os.remove(file_path)
        print(f"Deleted from img: {file_path}")

# 删除mask文件夹中的不对应文件
for file in mask_files_to_delete:
    file_path = os.path.join(mask_folder, file + '.png')  # 假设文件扩展名是.png，根据实际情况修改
    if os.path.exists(file_path):
        os.remove(file_path)
        print(f"Deleted from mask: {file_path}")

print("Deletion completed.")

2.3 使用单卡进行训练

复制代码

python tools/train.py configs/solov2/solov2_r50_fpn_8gpu_1x.py

成功运行界面如下图所示,目前官方给的epoch是12个，未修改

2.4 使用单卡进行测试

复制代码

python tools/test_ins.py configs/solov2/solov2_r50_fpn_8gpu_1x.py /hy-tmp/SOLO/work_dirs/solov2_release_r50_fpn_8gpu_1x/latest.pth --show --out results_solo.pkl --eval segm

结果如下图

2.5 使用单卡进行可视化

复制代码

python tools/test_ins_vis.py configs/solov2/solov2_r50_fpn_8gpu_1x.py  /hy-tmp/SOLO/work_dirs/solov2_release_r50_fpn_8gpu_1x/latest.pth --show --save_dir  work_dirs/vis_solo

结果在work_dirs/vis_solo文件夹下面

自定义数据集 实例分割 MaskFormer+SOLOV2

1.MaskFormer

1.1 环境配置 Ubuntu20.04+cuda11.1

1.2 数据集预处理

1.2.1 划分为训练集和验证集

1.2.2 转换为json格式

1.2.3 实例分割数据集

1.2.4 全景分割数据集

1.2.5 全景分割数据集找到对应的图片（.png格式）

1.3训练过程

遇到的小插曲

1.4测试过程

2. SOLOV2

2.1 环境配置

2.1.1选择pytorch版本

2.1.2 编译

2.2 数据集预处理

2.2.1 把img以及对应的mask转换为train_2017.json以及val_2017.json格式

2.2.2 把数据集划分为train和val

2.2.3 如果img和mask不是成对数据

2.3 使用单卡进行训练

2.4 使用单卡进行测试

2.5 使用单卡进行可视化

自定义数据集实例分割 MaskFormer+SOLOV2