yolov10 学习笔记

推理代码，source可以是文件名，路径，

预测可视化：

预测可视化加nms

训练自己的数据集，

训练一段时间报错：dill库

解决方法：

推理代码，source可以是文件名，路径，

保存结果：

python 复制代码

from ultralytics import YOLOv10

# model = YOLOv10.from_pretrained('jameslahm/yolov10{n/s/m/b/l/x}')
# or
# wget https://github.com/THU-MIG/yolov10/releases/download/v1.1/yolov10{n/s/m/b/l/x}.pt
model = YOLOv10('yolov10s.pt')

# model.val(data='coco.yaml', batch=256)

source = 'http://images.cocodataset.org/val2017/000000039769.jpg'
source = 'F:\data\qijun\dao\pics_re_1'
model.predict(source=source, save=True)

预测可视化：

python 复制代码

import cv2
import time
# import torch
from ultralytics import YOLOv10

cv2.namedWindow('window', cv2.WINDOW_NORMAL)
cv2.resizeWindow('window', 640, 480)

model = YOLOv10('yolov10s.pt')

# 打开摄像头
cap = cv2.VideoCapture(0)

# 检查摄像头是否打开
if not cap.isOpened():
    print("无法打开摄像头")
    exit()

# 获取视频帧的宽度和高度
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
print(width, height)

# 计时器和FPS初始化
prev_time = 0
fps = 0

while True:
    # 读取帧
    ret, frame = cap.read()
    if not ret:
        print("无法读取帧")
        break

    # 改变输入图像尺寸，加快推理速度
    # frame = cv2.resize(frame, (width // 4, height // 4))
    # frame = cv2.resize(frame,(128,128) )
    prev_time = time.time()
    # 将帧传递给模型进行预测，并明确指定使用CPU
    results = model(frame, device='0')
    curr_time = time.time()
    # 获取预测结果并绘制在帧上
    for result in results:
        boxes = result.boxes.xyxy.cpu().numpy()
        confidences = result.boxes.conf.cpu().numpy()
        class_ids = result.boxes.cls.cpu().numpy().astype(int)

        for i in range(len(boxes)):
            box = boxes[i]
            x1, y1, x2, y2 = map(int, box[:4])
            confidence = confidences[i]
            class_id = class_ids[i]
            label = result.names[class_id]
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, f'{label} {confidence:.2f}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (36, 255, 12), 1)

    fps =  (curr_time - prev_time)
    cv2.putText(frame, f'FPS: {fps:.2f}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)

    cv2.imshow('window', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# 释放摄像头并关闭窗口
cap.release()
cv2.destroyAllWindows()

预测可视化加nms

python 复制代码

import cv2
import time

import numpy as np
import torch

from img_reader import ImgReader
# import torch
from ultralytics import YOLOv10

# cv2.namedWindow('window', cv2.WINDOW_NORMAL)
# cv2.resizeWindow('window', 640, 480)

# model = YOLOv10('yolov10s.pt')
model = YOLOv10('runs/train/exp2/weights/best.pt')

# 计时器和FPS初始化
prev_time = 0
fps = 0


f_type='img'
source = r'B:\project\qijun\data\dataSet-coins\images\train'

# file_reader = ImgReader(source, f_type=f_type)

f_type='cam'
source=0
f_type='mp4'
source = r"B:\project\qijun\data\test\shuiguo1.mp4"
file_reader = ImgReader(source, f_type=f_type)

for img_i in range(file_reader.total_frames):
    img_o, img_index, img_file = file_reader.get_img()

    if max(img_o.shape[:2]) > 1500:
        x_scale = 1500 / max(img_o.shape[:2])
        img_o = cv2.resize(img_o, None, fx=x_scale, fy=x_scale, interpolation=cv2.INTER_AREA)
    img=img_o
    frame=img_o.copy()
    if img_file is not None:
        print(img_file)


    # 改变输入图像尺寸，加快推理速度
    # frame = cv2.resize(frame, (width // 4, height // 4))
    # frame = cv2.resize(frame,(128,128) )
    prev_time = time.time()
    # 将帧传递给模型进行预测，并明确指定使用CPU
    results = model(frame, device='0')
    curr_time = time.time()
    # 获取预测结果并绘制在帧上

    for result in results:
        boxes = result.boxes.xyxy.cpu().numpy()
        confidences = result.boxes.conf.cpu().numpy()
        class_ids = result.boxes.cls.cpu().numpy().astype(int)

        for i in range(len(boxes)):
            box = boxes[i]
            x1, y1, x2, y2 = map(int, box[:4])
            confidence = confidences[i]
            class_id = class_ids[i]
            label = result.names[class_id]
            cv2.rectangle(img, (x1, y1), (x2, y2), (0, 0, 255), 3)
            # cv2.putText(img, f'{label} {confidence:.2f}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (36, 255, 12), 1)

        final_boxes = []
        final_confidences = []
        final_class_ids = []

        # 对每个类别单独进行NMS
        unique_classes = set(class_ids)
        for cls in unique_classes:
            cls_indices = (class_ids == cls)

            # 提取当前类别的boxes, confidences
            boxes_cls = torch.tensor(boxes[cls_indices])
            confidences_cls = torch.tensor(confidences[cls_indices])

            # 对当前类别进行NMS
            keep_indices = torch.ops.torchvision.nms(boxes_cls, confidences_cls, iou_threshold=0.5)  # 设置你的IoU阈值

            num_filtered = len(boxes_cls) - len(keep_indices)
            if num_filtered>0:
                print(f"Class {cls}: {num_filtered} boxes filtered out by NMS")
            # 过滤当前类别的boxes, confidences, class_ids
            final_boxes.append(boxes_cls[keep_indices].numpy())
            final_confidences.append(confidences_cls[keep_indices].numpy())
            final_class_ids.append([cls] * len(keep_indices))


        # 合并所有类别的结果
        final_boxes = np.concatenate(final_boxes, axis=0)
        final_confidences = np.concatenate(final_confidences, axis=0)
        final_class_ids = np.concatenate(final_class_ids, axis=0)

        for i in range(len(final_boxes)):
            box = final_boxes[i]
            x1, y1, x2, y2 = map(int, box[:4])
            confidence = final_confidences[i]
            class_id = final_class_ids[i]
            label = result.names[class_id]
            cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(img, f'{label} {confidence:.2f}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (36, 255, 12), 1)


    # for result in results:
    #     boxes = result.boxes.xyxy.cpu().numpy()
    #     confidences = result.boxes.conf.cpu().numpy()
    #     class_ids = result.boxes.cls.cpu().numpy().astype(int)
    #
    #     for i in range(len(boxes)):
    #         box = boxes[i]
    #         x1, y1, x2, y2 = map(int, box[:4])
    #         confidence = confidences[i]
    #         class_id = class_ids[i]
    #         label = result.names[class_id]
    #         cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
    #         cv2.putText(img, f'{label} {confidence:.2f}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (36, 255, 12), 1)

    fps =  (curr_time - prev_time)
    cv2.putText(img, f'{img_i} FPS: {fps:.2f}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)

    cv2.imshow('window', img)

    waitkey=0
    if f_type == 'cam':
        waitkey=2
    if cv2.waitKey(waitkey) & 0xFF == ord('q'):
        break

训练自己的数据集，

原版标签是txt格式

我下载了完整代码，自己修改数据集

https://download.csdn.net/download/qq_38408785/89356134

python 复制代码

from ultralytics import YOLOv10

if __name__ == '__main__':
    model = YOLOv10('ultralytics/cfg/models/v10/yolov10n.yaml')
    model.load('yolov10n.pt') # loading pretrain weights
    model.train(data='data/NEU-DET.yaml',
                cache=False,
                imgsz=640,
                epochs=200,
                batch=16,
                close_mosaic=10,
                device='0',
                optimizer='SGD', # using SGD
                project='runs/train',
                name='exp',
                )

训练一段时间报错：dill库

bash 复制代码

  File "D:\ProgramData\miniconda3\envs\py310\lib\pickle.py", line 603, in save
    self.save_reduce(obj=obj, *rv)
  File "D:\ProgramData\miniconda3\envs\py310\lib\pickle.py", line 717, in save_reduce
    save(state)
  File "D:\ProgramData\miniconda3\envs\py310\lib\site-packages\dill\_dill.py", line 388, in save
    StockPickler.save(self, obj, save_persistent_id)
  File "D:\ProgramData\miniconda3\envs\py310\lib\pickle.py", line 560, in save
    f(self, obj)  # Call unbound method with explicit self
  File "D:\ProgramData\miniconda3\envs\py310\lib\site-packages\dill\_dill.py", line 1186, in save_module_dict
    StockPickler.save_dict(pickler, obj)
  File "D:\ProgramData\miniconda3\envs\py310\lib\pickle.py", line 972, in save_dict
    self._batch_setitems(obj.items())
  File "D:\ProgramData\miniconda3\envs\py310\lib\pickle.py", line 997, in _batch_setitems
    save(k)
  File "D:\ProgramData\miniconda3\envs\py310\lib\site-packages\dill\_dill.py", line 388, in save
    StockPickler.save(self, obj, save_persistent_id)
  File "D:\ProgramData\miniconda3\envs\py310\lib\pickle.py", line 539, in save
    pid = self.persistent_id(obj)
  File "D:\ProgramData\miniconda3\envs\py310\lib\site-packages\torch\serialization.py", line 622, in persistent_id
    storage_type = normalize_storage_type(type(obj))
  File "D:\ProgramData\miniconda3\envs\py310\lib\site-packages\torch\serialization.py", line 226, in normalize_storage_type
    return getattr(torch, storage_type.__name__)
AttributeError: module 'torch' has no attribute 'str'

解决方法：

pip install dill -U

升级为dill-0.3.8 后报错没有了。