伪装目标检测中数据集的标注格式:COCO和VOC

1.OSFormer中提供的COD10K的json格式,是coco的格式,但由于伪装目标检测任务的特殊性,标注信息中还有一个segmentation段

python 复制代码
 "images": [
        {
            "id": 3039,
            "file_name": "COD10K-CAM-1-Aquatic-1-BatFish-3.jpg",
            "width": 800,
            "height": 533,
            "date_captured": "2020-08-21 01:23:18.643991",
            "license": 1,
            "url": ""
        }
    ],
    "categories": [
        {
            "id": 1,
            "name": "foreground",
            "supercategory": "saliency"
        }
    ],
    "annotations": [
        {
            "id": 3533,
            "image_id": 3039,
            "category_id": 1,
            "iscrowd": 0,
            "area": 104946,
            "bbox": [
                96.0,
                60.0,
                544.0,
                431.0
            ],
            "segmentation": [
                [
                    513.0,
                    490.9980392156863,
                    505.0,
                    490.9980392156863,
                    469.0,
                    476.9980392156863,
                    459.0,
                    479.9980392156863,
                    450.0,
                    471.9980392156863,
                    442.0,
                    472.9980392156863,
                    439.0,
                    467.9980392156863,
                    434.0,
                    477.9980392156863,
                    428.0,
                    467.9980392156863,
                    427.9980392156863,
                    473.0,
                    424.0,
                    475.9980392156863,

首先将一整个json文件分解:

python 复制代码
from __future__ import print_function
import json
json_file='D:/projects/SINet-V2-main/json/train_instance.json' #
# Object Instance 类型的标注
# person_keypoints_val2017.json
# Object Keypoint 类型的标注格式
# captions_val2017.json
# Image Caption的标注格式
data=json.load(open(json_file,'r'))
data_2={}
# da ta_2['info']=data['info']
# data_2['licenses']=data['licenses']
for i in range(3040): # 一共234张图片

    data_2['images']=[data['images'][i]] # 只提取第i张图片
    data_2['categories']=data['categories']
    annotation=[] # 通过imgID 找到其所有对象
    imgID=data_2['images'][0]['id']
    for ann in data['annotations']:
        if ann['image_id']==imgID:
            annotation.append(ann)
    data_2['annotations']=annotation # 保存到新的JSON文件,便于查看数据特点
    savepath = 'D:/projects/SINet-V2-main/json/single/' + data_2['images'][0]['file_name']+ '.json'
    json.dump(data_2,open(savepath,'w'),indent=4) # indent=4 更加美观显示

然后转化为VOC格式:

python 复制代码
import os
import numpy as np
import codecs
import json
from glob import glob
import cv2
import shutil
from sklearn.model_selection import train_test_split

# 1.存放的json标签路径
labelme_path = "D:/projects/SINet-V2-main/json/single/"

# 原始labelme标注数据路径
saved_path = "D:/projects/SINet-V2-main/json/COD10K-voc/"
# 保存路径
isUseTest = None  # 是否创建test集

# 2.创建要求文件夹
if not os.path.exists(saved_path + "Annotations"):
    os.makedirs(saved_path + "Annotations")
if not os.path.exists(saved_path + "JPEGImages/"):
    os.makedirs(saved_path + "JPEGImages/")
if not os.path.exists(saved_path + "ImageSets/Main/"):
    os.makedirs(saved_path + "ImageSets/Main/")

# 3.获取待处理文件
files = glob(labelme_path + "*.json")
files = [i.replace("\\", "/").split("/")[-1].split(".json")[0] for i in files]
print(files)

# 4.读取标注信息并写入 xml
for json_file_ in files:
    json_filename = labelme_path + json_file_ + ".json"
    json_file = json.load(open(json_filename, "r", encoding="utf-8"))

    height, width, channels = cv2.imread('D:/projects/SINet-V2-main/json/dataset/image/' + json_file_).shape
    with codecs.open(saved_path + "Annotations/" + json_file_ + ".xml", "w", "utf-8") as xml:

        xml.write('<annotation>\n')
        xml.write('\t<folder>' + 'CELL_data' + '</folder>\n')
        xml.write('\t<filename>' + json_file_  + '</filename>\n')
        xml.write('\t<source>\n')
        xml.write('\t\t<database>CELL Data</database>\n')
        xml.write('\t\t<annotation>CELL</annotation>\n')
        xml.write('\t\t<image>bloodcell</image>\n')
        xml.write('\t\t<flickrid>NULL</flickrid>\n')
        xml.write('\t</source>\n')
        xml.write('\t<owner>\n')
        xml.write('\t\t<flickrid>NULL</flickrid>\n')
        xml.write('\t\t<name>CELL</name>\n')
        xml.write('\t</owner>\n')
        xml.write('\t<size>\n')
        xml.write('\t\t<width>' + str(width) + '</width>\n')
        xml.write('\t\t<height>' + str(height) + '</height>\n')
        xml.write('\t\t<depth>' + str(channels) + '</depth>\n')
        xml.write('\t</size>\n')
        xml.write('\t\t<segmented>0</segmented>\n')# 是否用于分割(在图像物体识别中01无所谓)
        cName = json_file["categories"]
        Name = cName[0]["name"]
        print(Name)
        for multi in json_file["annotations"]:
            points = np.array(multi["bbox"])
            labelName = Name
            xmin = points[0]
            xmax = points[0]+points[2]
            ymin = points[1]
            ymax = points[1]+points[3]
            label = Name
            if xmax <= xmin:
                pass
            elif ymax <= ymin:
                pass
            else:
                xml.write('\t<object>\n')
                xml.write('\t\t<name>' + labelName + '</name>\n')# 物体类别
                xml.write('\t\t<pose>Unspecified</pose>\n')# 拍摄角度
                xml.write('\t\t<truncated>0</truncated>\n')# 是否被截断(0表示完整)
                xml.write('\t\t<difficult>0</difficult>\n')# 目标是否难以识别(0表示容易识别)
                xml.write('\t\t<bndbox>\n')
                xml.write('\t\t\t<xmin>' + str(int(xmin)) + '</xmin>\n')
                xml.write('\t\t\t<ymin>' + str(int(ymin)) + '</ymin>\n')
                xml.write('\t\t\t<xmax>' + str(int(xmax)) + '</xmax>\n')
                xml.write('\t\t\t<ymax>' + str(int(ymax)) + '</ymax>\n')
                xml.write('\t\t</bndbox>\n')
                xml.write('\t</object>\n')
                print(json_filename, xmin, ymin, xmax, ymax, label)
        xml.write('</annotation>')

# 5.复制图片到 VOC2007/JPEGImages/下
image_files = glob("labelmedataset/images/" + "*.jpg")
print("copy image files to VOC007/JPEGImages/")
for image in image_files:
    shutil.copy(image, saved_path + "JPEGImages/")

# 6.拆分训练集、测试集、验证集
txtsavepath = saved_path + "ImageSets/Main/"
ftrainval = open(txtsavepath + '/trainval.txt', 'w')
ftest = open(txtsavepath + '/test.txt', 'w')
ftrain = open(txtsavepath + '/train.txt', 'w')
fval = open(txtsavepath + '/val.txt', 'w')
total_files = glob("./VOC2007/Annotations/*.xml")
total_files = [i.replace("\\", "/").split("/")[-1].split(".xml")[0] for i in total_files]
trainval_files = []
test_files = []
if isUseTest:
    trainval_files, test_files = train_test_split(total_files, test_size=0.15, random_state=55)
else:
    trainval_files = total_files
for file in trainval_files:
    ftrainval.write(file + "\n")

# split
train_files, val_files = train_test_split(trainval_files, test_size=0.15, random_state=55)

# train
for file in train_files:
    ftrain.write(file + "\n")

# val
for file in val_files:
    print(file)
    fval.write(file + "\n")
for file in test_files:
    print("test:"+file)
    ftest.write(file + "\n")
ftrainval.close()
ftrain.close()
fval.close()
ftest.close()

这样生成的xml文件,没有之前COD10K标注的segmentation信息,还需要进一步考虑,在转换为xml的脚本中加上识别segmentation部分。

参考博客:https://blog.csdn.net/ytusdc/article/details/1319729224

https://blog.csdn.net/xjx19991226/article/details/123386207

相关推荐
陈广亮1 天前
构建具有长期记忆的 AI Agent:从设计模式到生产实践
人工智能
会写代码的柯基犬1 天前
DeepSeek vs Kimi vs Qwen —— AI 生成俄罗斯方块代码效果横评
人工智能·llm
Mintopia1 天前
OpenClaw 是什么?为什么节后热度如此之高?
人工智能
爱可生开源社区1 天前
DBA 的未来?八位行业先锋的年度圆桌讨论
人工智能·dba
叁两1 天前
用opencode打造全自动公众号写作流水线,AI 代笔太香了!
前端·人工智能·agent
敏编程1 天前
一天一个Python库:jsonschema - JSON 数据验证利器
python
前端付豪1 天前
LangChain记忆:通过Memory记住上次的对话细节
人工智能·python·langchain
strayCat232551 天前
Clawdbot 源码解读 7: 扩展机制
人工智能·开源
王鑫星1 天前
SWE-bench 首次突破 80%:Claude Opus 4.5 发布,Anthropic 的野心不止于写代码
人工智能