YOLOv8检测图片和视频

一、检测图片

Python

python 复制代码
import cv2
from ultralytics import YOLO
import torch

model_path = 'object_detection/best.pt'  # Change this to your YOLOv8 model's path
image_path = 'object_detection/32.jpg'  # Change this to your video's path

# Load the trained YOLOv8 model
model = YOLO(model_path)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Using device: %s" % device)
model.to(device)


# Process video frames
image = cv2.imread(image_path)
width, height, _ = image.shape
new_shape = [32*int(height/128), 32*int(width/128)]
image = cv2.resize(image, new_shape)

with torch.no_grad():
    results = model.predict(image)
    for result in results:
        for box in result.boxes:
            x1, y1, x2, y2 = box.xyxy[0]
            # Draw the bounding box on the BGR frame
            cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
            # Add a label above the box
            cv2.putText(image, result.names[int(box.cls)], (int(x1) - 30, int(y1) + 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)

    cv2.imshow('Video', image)

    cv2.waitKey(0)


cv2.destroyAllWindows()

二、检测视频

Python

python 复制代码
import cv2
from ultralytics import YOLO
import torch

model_path = 'object_detection/best.pt'  # Change this to your YOLOv8 model's path
video_path = 'object_detection/物块.mp4'  # Change this to your video's path

# Load the trained YOLOv8 model
model = YOLO(model_path)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Using device: %s" % device)
model.to(device)
batch_size = 8
frames_rgb = []
frames = []
cap = cv2.VideoCapture(video_path)

if not cap.isOpened():
    print("Error: Could not open video.")
    exit()

# Process video frames
while True:
    ret, frame = cap.read()
    if not ret:
        print("Finished processing video.")
        break
    width, height, _ = frame.shape
    new_shape = [32*int(height/64), 32*int(width/64)]
    frame = cv2.resize(frame, new_shape)
    frames.append(frame)
    # YOLOv8 expects RGB images
    if len(frames) == batch_size:
        with torch.no_grad():
            results = model.predict(frames)

        # Process each detection
        for i, result in enumerate(results):
            for box in result.boxes:
                print(box.conf)
                if float(box.conf) > 0.9:
                    x1, y1, x2, y2 = box.xyxy[0]
                    # Draw the bounding box on the BGR frame
                    cv2.rectangle(frames[i], (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
                    # Add a label above the box
                    cv2.putText(frames[i], result.names[int(box.cls)], (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

            cv2.imshow('Video', frames[i])

            if cv2.waitKey(1) & 0xFF == ord('q'):
                cap.release()
                cv2.destroyAllWindows()
                exit()
        frames.clear()
        frames_rgb.clear()
cap.release()
cv2.destroyAllWindows()

使用了sahi的视频检测

python 复制代码
import argparse
import sys
import cv2
from sahi import AutoDetectionModel
from sahi.predict import get_sliced_prediction
import imageio
import numpy as np


def run(weights="yolov8n.pt", source="test.mp4", view_img=False):
    """
    Run object detection on a video using YOLOv8 and SAHI.

    Args:
        weights (str): Model weights path.
        source (str): Video file path.
        view_img (bool): Show results.
    """

    yolov8_model_path = weights
    detection_model = AutoDetectionModel.from_pretrained(
        model_type="yolov8", model_path=yolov8_model_path, confidence_threshold=0.3, device="cuda:0"
    )
    videocapture = cv2.VideoCapture(0)

    new_shape = 32 * int(videocapture.get(3) / 64), 32 * int(videocapture.get(4) / 64)
    writer = imageio.get_writer("object_detection/object_detection.mp4", fps=1 / 0.025)

    while videocapture.isOpened():
        success, frame = videocapture.read()
        if not success:
            break
        frame = cv2.resize(frame, new_shape)
        image = frame.copy()
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = get_sliced_prediction(
            frame, detection_model, slice_height=512, slice_width=512, overlap_height_ratio=0.2, overlap_width_ratio=0.2
        )
        object_prediction_list = results.object_prediction_list

        boxes_list = []
        clss_list = []

        for ind, _ in enumerate(object_prediction_list):
            print(object_prediction_list[ind].score.value)
            if float(object_prediction_list[ind].score.value) > 0.85:
                boxes = (
                    object_prediction_list[ind].bbox.minx,
                    object_prediction_list[ind].bbox.miny,
                    object_prediction_list[ind].bbox.maxx,
                    object_prediction_list[ind].bbox.maxy,
                )
                clss = object_prediction_list[ind].category.name
                boxes_list.append(boxes)
                clss_list.append(clss)

        for box, cls in zip(boxes_list, clss_list):
            x1, y1, x2, y2 = box
            cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), (56, 56, 255), 2)
            label = str(cls)
            t_size = cv2.getTextSize(label, 0, fontScale=0.6, thickness=1)[0]
            cv2.rectangle(
                image, (int(x1), int(y1) - t_size[1] - 3), (int(x1) + t_size[0], int(y1) + 3), (56, 56, 255), -1
            )
            cv2.putText(
                image, label, (int(x1), int(y1) - 2), 0, 0.6, [255, 255, 255], thickness=1, lineType=cv2.LINE_AA
            )

        if view_img:
            cv2.imshow("result", image)
            frame = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            writer.append_data((np.asarray(frame)).astype(np.uint8))

        if cv2.waitKey(1) == ord("q"):
            videocapture.release()
            cv2.destroyAllWindows()
            sys.exit()
    writer.close()


def parse_opt():
    """Parse command line arguments."""
    parser = argparse.ArgumentParser()
    parser.add_argument("--weights", type=str, default="object_detection/best.pt", help="initial weights path")
    parser.add_argument("--source", type=str, default="object_detection/物块.mp4", help="video file path")
    parser.add_argument("--view-img", type=bool, default=True, help="show results")
    return parser.parse_args()


def main(options):
    """Main function."""
    run(**vars(options))


if __name__ == "__main__":
    opt = parse_opt()
    main(opt)
相关推荐
向哆哆18 分钟前
粉尘环境分类检测千张图数据集(适用YOLO系列)(已标注+划分/可直接训练)
yolo·分类·数据挖掘
浩瀚之水_csdn23 分钟前
avformat_new_stream 详细解析
计算机视觉
fie88892 小时前
基于Matlab实现的指纹识别系统流程
opencv·计算机视觉·matlab
琅琊榜首202015 小时前
移动端AI挂机新范式:YOLOv8+NCNN实现无Root视觉自动化
人工智能·yolo·自动化
sali-tec16 小时前
C# 基于OpenCv的视觉工作流-章26-图像拼接
图像处理·人工智能·opencv·算法·计算机视觉
xinxiangwangzhi_17 小时前
立体匹配--Cross-Scale Cost Aggregation for Stereo Matching
图像处理·计算机视觉
沃达德软件18 小时前
模糊图像复原技术解析
图像处理·人工智能·深度学习·目标检测·机器学习·计算机视觉·目标跟踪
CoovallyAIHub18 小时前
模糊、噪声、压缩……让检测器学会主动评估画质
深度学习·算法·计算机视觉
智驱力人工智能19 小时前
地铁隧道轨道障碍物实时检测方案 守护城市地下动脉的工程实践 轨道障碍物检测 高铁站区轨道障碍物AI预警 铁路轨道异物识别系统价格
人工智能·算法·yolo·目标检测·计算机视觉·边缘计算
智驱力人工智能20 小时前
机场鸟类活动智能监测 守护航空安全的精准工程实践 飞鸟检测 机场鸟击预防AI预警系统方案 机场停机坪鸟类干扰实时监测机场航站楼鸟击预警
人工智能·opencv·算法·安全·yolo·目标检测·边缘计算