一、复现
网上有很多教程,复现yolov8的目标检测。在复现的过程中,会用到模型yolov8n.pt,可以选择命令下载和网站下载。复现后,runs文件包下会生成最优的权重文件best.py,在ultralytics/assets中放一张图片,按照remead指示,输入命令,用权重文件best.py做预测。运行之后,runs/detect/predict中的图片就是预测识别之后的。
二、数据集
1、准备
搜集一些你需要进行目标检测的图片,比如网上下载、自行拍摄等,可以使用旋转个、放大、缩小、调亮度的方式,对数据进行扩增。代码如下:需要修改第83行的文件夹路径,改为自己存放图片的路径即可。
python
# -*- coding: utf-8 -*-
import cv2
import numpy as np
import os.path
import copy
# 椒盐噪声
def SaltAndPepper(src, percetage):
SP_NoiseImg = src.copy()
SP_NoiseNum = int(percetage * src.shape[0] * src.shape[1])
for i in range(SP_NoiseNum):
randR = np.random.randint(0, src.shape[0] - 1)
randG = np.random.randint(0, src.shape[1] - 1)
randB = np.random.randint(0, 3)
if np.random.randint(0, 1) == 0:
SP_NoiseImg[randR, randG, randB] = 0
else:
SP_NoiseImg[randR, randG, randB] = 255
return SP_NoiseImg
# 高斯噪声
def addGaussianNoise(image, percetage):
G_Noiseimg = image.copy()
w = image.shape[1]
h = image.shape[0]
G_NoiseNum = int(percetage * image.shape[0] * image.shape[1])
for i in range(G_NoiseNum):
temp_x = np.random.randint(0, h)
temp_y = np.random.randint(0, w)
G_Noiseimg[temp_x][temp_y][np.random.randint(3)] = np.random.randn(1)[0]
return G_Noiseimg
# 昏暗
def darker(image, percetage=0.9):
image_copy = image.copy()
w = image.shape[1]
h = image.shape[0]
# get darker
for xi in range(0, w):
for xj in range(0, h):
image_copy[xj, xi, 0] = int(image[xj, xi, 0] * percetage)
image_copy[xj, xi, 1] = int(image[xj, xi, 1] * percetage)
image_copy[xj, xi, 2] = int(image[xj, xi, 2] * percetage)
return image_copy
# 亮度
def brighter(image, percetage=1.5):
image_copy = image.copy()
w = image.shape[1]
h = image.shape[0]
# get brighter
for xi in range(0, w):
for xj in range(0, h):
image_copy[xj, xi, 0] = np.clip(int(image[xj, xi, 0] * percetage), a_max=255, a_min=0)
image_copy[xj, xi, 1] = np.clip(int(image[xj, xi, 1] * percetage), a_max=255, a_min=0)
image_copy[xj, xi, 2] = np.clip(int(image[xj, xi, 2] * percetage), a_max=255, a_min=0)
return image_copy
# 旋转
def rotate(image, angle, center=None, scale=1.0):
(h, w) = image.shape[:2]
# If no rotation center is specified, the center of the image is set as the rotation center
if center is None:
center = (w / 2, h / 2)
m = cv2.getRotationMatrix2D(center, angle, scale)
rotated = cv2.warpAffine(image, m, (w, h))
return rotated
# 翻转
def flip(image):
flipped_image = np.fliplr(image)
return flipped_image
# 图片文件夹路径
file_dir = r'E:\yolo_data\mx/'
for img_name in os.listdir(file_dir):
img_path = file_dir + img_name
img = cv2.imread(img_path)
# cv2.imshow("1",img)
# cv2.waitKey(5000)
# 旋转
rotated_90 = rotate(img, 90)
cv2.imwrite(file_dir + img_name[0:-4] + '_r90.jpg', rotated_90)
rotated_180 = rotate(img, 180)
cv2.imwrite(file_dir + img_name[0:-4] + '_r180.jpg', rotated_180)
for img_name in os.listdir(file_dir):
img_path = file_dir + img_name
img = cv2.imread(img_path)
# 镜像
flipped_img = flip(img)
cv2.imwrite(file_dir + img_name[0:-4] + '_fli.jpg', flipped_img)
# 增加噪声
# img_salt = SaltAndPepper(img, 0.3)
# cv2.imwrite(file_dir + img_name[0:7] + '_salt.jpg', img_salt)
img_gauss = addGaussianNoise(img, 0.3)
cv2.imwrite(file_dir + img_name[0:-4] + '_noise.jpg', img_gauss)
# 变亮、变暗
img_darker = darker(img)
cv2.imwrite(file_dir + img_name[0:-4] + '_darker.jpg', img_darker)
img_brighter = brighter(img)
cv2.imwrite(file_dir + img_name[0:-4] + '_brighter.jpg', img_brighter)
blur = cv2.GaussianBlur(img, (7, 7), 1.5)
# cv2.GaussianBlur(图像,卷积核,标准差)
cv2.imwrite(file_dir + img_name[0:-4] + '_blur.jpg', blur)
2、标注
将jpg图片使用labelme标注或者make sence(Make Sense),我使用的后者,后者是网页版的,可以批量导入,进行标注。标注完后,导出为yolov8需要的数据格式,即txt的标注文件。
3、划分
将数据集按照训练集、测试集、验证集=8:1:1的比例进行划分,代码如下:第84、85、86行分别需要改成自己的需要划分的图片的地址、txt文件的地址,以及划分后存放的位置。运行后,我的split文件结构是split下有images和labels,这两个下级文件都含有test、train、val。
python
# 将图片和标注数据hatDataXml文件,按比例切分为 训练集和测试集 分割后的在split文件中
import shutil
import random
import os
import argparse
# 检查文件夹是否存在
def mkdir(path):
if not os.path.exists(path):
os.makedirs(path)
def main(image_dir, txt_dir, save_dir):
# 创建文件夹
mkdir(save_dir)
images_dir = os.path.join(save_dir, 'images')
labels_dir = os.path.join(save_dir, 'labels')
img_train_path = os.path.join(images_dir, 'train')
img_test_path = os.path.join(images_dir, 'test')
img_val_path = os.path.join(images_dir, 'val')
label_train_path = os.path.join(labels_dir, 'train')
label_test_path = os.path.join(labels_dir, 'test')
label_val_path = os.path.join(labels_dir, 'val')
mkdir(images_dir)
mkdir(labels_dir)
mkdir(img_train_path)
mkdir(img_test_path)
mkdir(img_val_path)
mkdir(label_train_path)
mkdir(label_test_path)
mkdir(label_val_path)
# 数据集划分比例,训练集75%,验证集15%,测试集15%,按需修改
train_percent = 0.8
val_percent = 0.1
test_percent = 0.1
total_txt = os.listdir(txt_dir)
num_txt = len(total_txt)
list_all_txt = range(num_txt) # 范围 range(0, num)
num_train = int(num_txt * train_percent)
num_val = int(num_txt * val_percent)
num_test = num_txt - num_train - num_val
train = random.sample(list_all_txt, num_train)
# 在全部数据集中取出train
val_test = [i for i in list_all_txt if not i in train]
# 再从val_test取出num_val个元素,val_test剩下的元素就是test
val = random.sample(val_test, num_val)
print("训练集数目:{}, 验证集数目:{},测试集数目:{}".format(len(train), len(val), len(val_test) - len(val)))
for i in list_all_txt:
name = total_txt[i][:-4]
srcImage = os.path.join(image_dir, name + '.jpg')
srcLabel = os.path.join(txt_dir, name + '.txt')
if i in train:
dst_train_Image = os.path.join(img_train_path, name + '.jpg')
dst_train_Label = os.path.join(label_train_path, name + '.txt')
shutil.copyfile(srcImage, dst_train_Image)
shutil.copyfile(srcLabel, dst_train_Label)
elif i in val:
dst_val_Image = os.path.join(img_val_path, name + '.jpg')
dst_val_Label = os.path.join(label_val_path, name + '.txt')
shutil.copyfile(srcImage, dst_val_Image)
shutil.copyfile(srcLabel, dst_val_Label)
else:
dst_test_Image = os.path.join(img_test_path, name + '.jpg')
dst_test_Label = os.path.join(label_test_path, name + '.txt')
shutil.copyfile(srcImage, dst_test_Image)
shutil.copyfile(srcLabel, dst_test_Label)
if __name__ == '__main__':
"""
python split_datasets.py --image-dir my_datasets/color_rings/imgs --txt-dir my_datasets/color_rings/txts --save-dir my_datasets/color_rings/train_data
"""
parser = argparse.ArgumentParser(description='split datasets to train,val,test params')
parser.add_argument('--image-dir', type=str, default='/Users/jing/Documents/code/ultralytics-8.1.0/hatDataXml/JPEGImages', help='image path dir')
parser.add_argument('--txt-dir', type=str, default='/Users/jing/Documents/code/ultralytics-8.1.0/hatDataXml/Annotations', help='txt path dir')
parser.add_argument('--save-dir', default='/Users/jing/Documents/code/ultralytics-8.1.0//split', type=str, help='save dir')
args = parser.parse_args()
image_dir = args.image_dir
txt_dir = args.txt_dir
save_dir = args.save_dir
main(image_dir, txt_dir, save_dir)
划分后,在datasets下新建文件DataSet1Parts,下级目录新建test、train、val,再下级分别新建images和labels,将划分好的,按照文件名复制进去。
三、改yaml
打开ultralytics/cfg/datasets/,复制文件coco128.yaml,命名为mydata.yaml,修改路径如下,其中nc代表类别数,names代表类别名称,按照自己的数据集改即可。
python
# Ultralytics YOLO 🚀, AGPL-3.0 license
# COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
# Documentation: https://docs.ultralytics.com/datasets/detect/coco/
# Example usage: yolo train data=coco128.yaml
# parent
# ├── ultralytics
# └── datasets
# └── coco128 ← downloads here (7 MB)
path: ../datasets/DataSet1Parts
test: test/images
train: train/images
val: val/images
nc: 3
#names: ['cnn','mxn','xcn']
names:
0: cnn
1: mxn
2: xcn
四、训练及验证
1、训练
在Terminal控制台输入命令:
#终端复制以下代码训练
#yolo train data=/Users/jing/Documents/code/ultralytics-8.1.0/ultralytics/cfg/datasets/mydata.yaml model=yolov8n.pt epochs=100 lr0=0.01 batch=4
2、验证
#验证测试结果,找图片放在ultralytics/assets文件夹下
#输入以下代码测试,使用效果最好的权重文件 best.pt