基于Label Studio的视频标注与YOLO模型训练全流程指南

下面是从视频标注到YOLO模型训练的完整工作流程，我将用详细的步骤和代码示例为您展示整个过程。

📋 全流程概览

原始视频
视频预处理
Label Studio标注
标注数据导出
格式转换
数据集划分
YOLO模型训练
模型评估
模型部署

第一阶段：环境准备与数据预处理

1.1 安装必要的软件包

bash 复制代码

# 安装Label Studio
pip install label-studio

# 安装视频处理相关库
pip install opencv-python moviepy scikit-learn

# 安装YOLO相关库（以YOLOv8为例）
pip install ultralytics

1.2 视频预处理脚本

python 复制代码

# video_preprocessing.py
import cv2
import os
from pathlib import Path

def extract_frames_from_video(video_path, output_dir, frame_interval=10):
    """
    从视频中提取关键帧
    """
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"无法打开视频文件: {video_path}")
        return
    
    # 创建输出目录
    os.makedirs(output_dir, exist_ok=True)
    
    frame_count = 0
    saved_count = 0
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break
            
        # 按间隔保存帧
        if frame_count % frame_interval == 0:
            frame_filename = f"frame_{saved_count:06d}.jpg"
            frame_path = os.path.join(output_dir, frame_filename)
            cv2.imwrite(frame_path, frame)
            saved_count += 1
            
        frame_count += 1
    
    cap.release()
    print(f"从 {video_path} 提取了 {saved_count} 帧到 {output_dir}")

def prepare_video_dataset(video_directory, output_base_dir):
    """
    批量处理视频目录
    """
    video_extensions = ['.mp4', '.avi', '.mov', '.mkv']
    
    for video_file in Path(video_directory).iterdir():
        if video_file.suffix.lower() in video_extensions:
            video_name = video_file.stem
            output_dir = Path(output_base_dir) / video_name / "frames"
            
            print(f"处理视频: {video_file}")
            extract_frames_from_video(str(video_file), str(output_dir))

# 使用示例
if __name__ == "__main__":
    prepare_video_dataset("raw_videos", "processed_data")

第二阶段：Label Studio标注

2.1 启动Label Studio

bash 复制代码

# 启动Label Studio
label-studio start

# 或者使用特定端口
label-studio start --port 8080

2.2 配置标注模板

创建视频目标检测的标注模板：

xml 复制代码

<!-- video_annotation_template.xml -->
<View>
  <Header value="视频目标检测标注"/>
  <Video name="video" value="$video" framerate="original"/>
  <VideoRectangle 
    name="object" 
    toName="video"
    maxUsages="20"
    fillOpacity="0.5"
    strokeColor="#ff0000"
  />
  <Choices name="class" toName="video" choice="multiple">
    <Choice value="person" background="red"/>
    <Choice value="car" background="blue"/>
    <Choice value="bicycle" background="green"/>
    <Choice value="motorcycle" background="orange"/>
    <Choice value="truck" background="purple"/>
  </Choices>
</View>

2.3 创建Label Studio项目

python 复制代码

# create_labelstudio_project.py
import requests
import json

def create_video_annotation_project(api_url, api_token, project_name):
    """
    通过API创建Label Studio项目
    """
    headers = {
        'Authorization': f'Token {api_token}',
        'Content-Type': 'application/json'
    }
    
    # 读取标注模板
    with open('video_annotation_template.xml', 'r') as f:
        labeling_config = f.read()
    
    project_data = {
        "title": project_name,
        "description": "视频目标检测标注项目",
        "labeling_config": labeling_config,
        "expert_instruction": "请仔细标注视频中的目标物体",
        "show_instruction": True,
        "show_skip_button": True,
        "enable_empty_annotation": False,
        "show_annotation_history": True
    }
    
    response = requests.post(
        f"{api_url}/api/projects", 
        headers=headers, 
        json=project_data
    )
    
    if response.status_code == 201:
        project_id = response.json()['id']
        print(f"项目创建成功，ID: {project_id}")
        return project_id
    else:
        print(f"项目创建失败: {response.text}")
        return None

def import_tasks_to_project(api_url, api_token, project_id, tasks_data):
    """
    导入标注任务到项目
    """
    headers = {
        'Authorization': f'Token {api_token}',
        'Content-Type': 'application/json'
    }
    
    response = requests.post(
        f"{api_url}/api/projects/{project_id}/tasks", 
        headers=headers, 
        json=tasks_data
    )
    
    return response.status_code == 201

第三阶段：标注数据导出与格式转换

3.1 从Label Studio导出数据

python 复制代码

# export_annotations.py
import requests
import json
import os
from pathlib import Path

def export_labelstudio_annotations(api_url, api_token, project_id, export_format='JSON'):
    """
    导出Label Studio标注数据
    """
    headers = {
        'Authorization': f'Token {api_token}',
        'Content-Type': 'application/json'
    }
    
    # 请求导出
    export_url = f"{api_url}/api/projects/{project_id}/export"
    params = {'export_type': export_format}
    
    response = requests.get(export_url, headers=headers, params=params)
    
    if response.status_code == 200:
        # 保存导出的数据
        export_filename = f"labelstudio_export_{project_id}.json"
        with open(export_filename, 'w', encoding='utf-8') as f:
            json.dump(response.json(), f, ensure_ascii=False, indent=2)
        
        print(f"标注数据已导出到: {export_filename}")
        return export_filename
    else:
        print(f"导出失败: {response.text}")
        return None

def convert_to_yolo_format(export_file, output_dir, class_mapping):
    """
    将Label Studio格式转换为YOLO格式
    """
    with open(export_file, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    # 创建输出目录
    images_dir = Path(output_dir) / "images"
    labels_dir = Path(output_dir) / "labels"
    images_dir.mkdir(parents=True, exist_ok=True)
    labels_dir.mkdir(parents=True, exist_ok=True)
    
    for task in data:
        # 获取视频信息
        video_path = task['data'].get('video', '')
        if not video_path:
            continue
            
        # 处理每个标注结果
        for annotation in task.get('annotations', []):
            result = annotation.get('result', [])
            
            # 按帧处理标注
            frame_annotations = {}
            for item in result:
                if item['type'] == 'videorectangle':
                    frame = item.get('frame', 0)
                    if frame not in frame_annotations:
                        frame_annotations[frame] = []
                    
                    # 获取边界框坐标
                    x = item['value']['x']
                    y = item['value']['y']
                    width = item['value']['width']
                    height = item['value']['height']
                    
                    # 获取类别
                    class_name = None
                    for choice in item.get('choices', []):
                        if choice in class_mapping:
                            class_name = choice
                            break
                    
                    if class_name is not None:
                        frame_annotations[frame].append({
                            'class_id': class_mapping[class_name],
                            'x_center': x + width / 2,
                            'y_center': y + height / 2,
                            'width': width,
                            'height': height
                        })
            
            # 为每一帧创建YOLO格式标签文件
            for frame, annotations in frame_annotations.items():
                # 生成对应的图像文件名
                video_name = Path(video_path).stem
                image_filename = f"{video_name}_frame_{frame:06d}.jpg"
                label_filename = f"{video_name}_frame_{frame:06d}.txt"
                
                # 写入标签文件
                label_path = labels_dir / label_filename
                with open(label_path, 'w') as f:
                    for ann in annotations:
                        # 转换为YOLO格式（归一化坐标）
                        # 注意：这里假设坐标已经是百分比，否则需要根据图像尺寸归一化
                        line = f"{ann['class_id']} {ann['x_center']/100:.6f} {ann['y_center']/100:.6f} {ann['width']/100:.6f} {ann['height']/100:.6f}\n"
                        f.write(line)
    
    print(f"YOLO格式数据已保存到: {output_dir}")

# 类别映射配置
CLASS_MAPPING = {
    "person": 0,
    "car": 1, 
    "bicycle": 2,
    "motorcycle": 3,
    "truck": 4
}

第四阶段：数据集准备与划分

4.1 数据集划分脚本

python 复制代码

# dataset_preparation.py
import os
import random
import shutil
from pathlib import Path
from sklearn.model_selection import train_test_split

def prepare_yolo_dataset(raw_data_dir, output_dir, test_size=0.2, val_size=0.1):
    """
    准备YOLO格式的数据集
    """
    # 创建目录结构
    dataset_dir = Path(output_dir)
    (dataset_dir / "images" / "train").mkdir(parents=True, exist_ok=True)
    (dataset_dir / "images" / "val").mkdir(parents=True, exist_ok=True)
    (dataset_dir / "images" / "test").mkdir(parents=True, exist_ok=True)
    (dataset_dir / "labels" / "train").mkdir(parents=True, exist_ok=True)
    (dataset_dir / "labels" / "val").mkdir(parents=True, exist_ok=True)
    (dataset_dir / "labels" / "test").mkdir(parents=True, exist_ok=True)
    
    # 获取所有图像文件
    raw_images_dir = Path(raw_data_dir) / "images"
    image_files = list(raw_images_dir.glob("*.jpg"))
    image_files = [f for f in image_files if f.is_file()]
    
    # 划分数据集
    train_files, test_files = train_test_split(
        image_files, test_size=test_size + val_size, random_state=42
    )
    val_files, test_files = train_test_split(
        test_files, test_size=val_size/(test_size + val_size), random_state=42
    )
    
    # 复制文件到相应目录
    def copy_files(files, image_dest, label_dest):
        for img_file in files:
            # 复制图像文件
            shutil.copy2(img_file, image_dest / img_file.name)
            
            # 复制对应的标签文件
            label_file = Path(raw_data_dir) / "labels" / f"{img_file.stem}.txt"
            if label_file.exists():
                shutil.copy2(label_file, label_dest / label_file.name)
    
    copy_files(train_files, dataset_dir / "images" / "train", dataset_dir / "labels" / "train")
    copy_files(val_files, dataset_dir / "images" / "val", dataset_dir / "labels" / "val")
    copy_files(test_files, dataset_dir / "images" / "test", dataset_dir / "labels" / "test")
    
    print(f"数据集划分完成:")
    print(f"训练集: {len(train_files)} 张图像")
    print(f"验证集: {len(val_files)} 张图像") 
    print(f"测试集: {len(test_files)} 张图像")
    
    return dataset_dir

def create_yaml_config(dataset_dir, class_names, config_name="dataset.yaml"):
    """
    创建YOLO配置文件
    """
    config_content = f"""# YOLO数据集配置文件
path: {dataset_dir.absolute()}  # 数据集根目录
train: images/train  # 训练集图像目录
val: images/val      # 验证集图像目录
test: images/test    # 测试集图像目录

# 类别数量
nc: {len(class_names)}

# 类别名称
names: {class_names}
"""
    
    config_path = dataset_dir / config_name
    with open(config_path, 'w') as f:
        f.write(config_content)
    
    print(f"配置文件已创建: {config_path}")
    return config_path

# 使用示例
if __name__ == "__main__":
    # 准备数据集
    dataset_path = prepare_yolo_dataset("converted_data", "yolo_dataset")
    
    # 创建配置文件
    classes = ["person", "car", "bicycle", "motorcycle", "truck"]
    yaml_path = create_yaml_config(dataset_path, classes)

第五阶段：YOLO模型训练

5.1 训练脚本

python 复制代码

# train_yolo.py
from ultralytics import YOLO
import yaml
import torch
from pathlib import Path

def train_yolo_model(config_path, model_size='yolov8n', epochs=50, imgsz=640):
    """
    训练YOLO模型
    """
    # 加载预训练模型
    model = YOLO(f'{model_size}.pt')
    
    # 训练模型
    results = model.train(
        data=config_path,      # 数据集配置文件路径
        epochs=epochs,         # 训练轮数
        imgsz=imgsz,          # 输入图像尺寸
        batch=16,             # 批次大小
        patience=10,          # 早停耐心值
        save=True,            # 保存检查点
        exist_ok=True,        # 覆盖现有文件
        verbose=True          # 显示训练详情
    )
    
    return results

def evaluate_model(model_path, config_path):
    """
    评估训练好的模型
    """
    # 加载训练好的模型
    model = YOLO(model_path)
    
    # 在验证集上评估
    metrics = model.val(data=config_path)
    
    print("模型评估结果:")
    print(f"mAP50: {metrics.box.map50:.3f}")
    print(f"mAP50-95: {metrics.box.map:.3f}")
    print(f"精确率: {metrics.box.precision:.3f}")
    print(f"召回率: {metrics.box.recall:.3f}")
    
    return metrics

# 使用示例
if __name__ == "__main__":
    # 数据集配置文件路径
    dataset_config = "yolo_dataset/dataset.yaml"
    
    # 训练模型
    print("开始训练YOLO模型...")
    training_results = train_yolo_model(
        config_path=dataset_config,
        model_size='yolov8n',
        epochs=100,
        imgsz=640
    )
    
    # 评估模型
    best_model_path = "runs/detect/train/weights/best.pt"
    evaluation_results = evaluate_model(best_model_path, dataset_config)

5.2 高级训练配置

python 复制代码

# advanced_training.py
from ultralytics import YOLO

def advanced_training_with_augmentation(config_path):
    """
    使用数据增强的高级训练配置
    """
    model = YOLO('yolov8n.pt')
    
    # 高级训练参数
    results = model.train(
        data=config_path,
        epochs=100,
        imgsz=640,
        batch=16,
        lr0=0.01,           # 初始学习率
        lrf=0.01,           # 最终学习率
        momentum=0.937,      # 动量
        weight_decay=0.0005, # 权重衰减
        warmup_epochs=3.0,   # 热身轮数
        warmup_momentum=0.8, # 热身动量
        box=7.5,            # 框损失权重
        cls=0.5,            # 分类损失权重
        dfl=1.5,            # DFL损失权重
        hsv_h=0.015,        # 色调增强
        hsv_s=0.7,          # 饱和度增强
        hsv_v=0.4,          # 明度增强
        degrees=0.0,        # 旋转角度
        translate=0.1,      # 平移
        scale=0.5,          # 缩放
        shear=0.0,          # 剪切
        perspective=0.0,    # 透视变换
        flipud=0.0,         # 上下翻转
        fliplr=0.5,         # 左右翻转
        mosaic=1.0,         # Mosaic数据增强
        mixup=0.0,          # MixUp增强
        copy_paste=0.0,     # 复制粘贴增强
    )
    
    return results

第六阶段：模型部署与推理

6.1 模型推理脚本

python 复制代码

# inference.py
from ultralytics import YOLO
import cv2
import numpy as np

class VideoObjectDetector:
    def __init__(self, model_path, conf_threshold=0.5, iou_threshold=0.5):
        """
        初始化视频目标检测器
        """
        self.model = YOLO(model_path)
        self.conf_threshold = conf_threshold
        self.iou_threshold = iou_threshold
        
    def detect_video(self, video_path, output_path=None):
        """
        对视频进行目标检测
        """
        cap = cv2.VideoCapture(video_path)
        
        # 获取视频属性
        fps = int(cap.get(cv2.CAP_PROP_FPS))
        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        
        # 设置输出视频
        if output_path:
            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
        
        frame_count = 0
        while True:
            ret, frame = cap.read()
            if not ret:
                break
                
            # 进行推理
            results = self.model(
                frame, 
                conf=self.conf_threshold, 
                iou=self.iou_threshold,
                verbose=False
            )
            
            # 绘制检测结果
            annotated_frame = results[0].plot()
            
            if output_path:
                out.write(annotated_frame)
            else:
                cv2.imshow('Detection Result', annotated_frame)
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break
                    
            frame_count += 1
            print(f"处理帧: {frame_count}")
        
        cap.release()
        if output_path:
            out.release()
        cv2.destroyAllWindows()

# 使用示例
if __name__ == "__main__":
    # 初始化检测器
    detector = VideoObjectDetector("runs/detect/train/weights/best.pt")
    
    # 检测视频
    detector.detect_video("test_video.mp4", "output_video.mp4")

全流程自动化脚本

python 复制代码

# full_pipeline.py
import os
import sys
from pathlib import Path

def run_full_pipeline():
    """
    全流程自动化执行
    """
    print("=== Label Studio视频标注到YOLO训练全流程 ===")
    
    # 1. 视频预处理
    print("步骤1: 视频预处理...")
    from video_preprocessing import prepare_video_dataset
    prepare_video_dataset("raw_videos", "processed_data")
    
    # 2. Label Studio标注（需要手动完成）
    print("步骤2: 请在Label Studio中完成标注...")
    input("标注完成后按Enter键继续...")
    
    # 3. 数据导出与转换
    print("步骤3: 导出并转换标注数据...")
    from export_annotations import export_labelstudio_annotations, convert_to_yolo_format
    export_file = export_labelstudio_annotations(
        "http://localhost:8080", 
        "your_api_token", 
        "your_project_id"
    )
    
    CLASS_MAPPING = {"person": 0, "car": 1, "bicycle": 2, "motorcycle": 3, "truck": 4}
    convert_to_yolo_format(export_file, "converted_data", CLASS_MAPPING)
    
    # 4. 数据集准备
    print("步骤4: 准备YOLO数据集...")
    from dataset_preparation import prepare_yolo_dataset, create_yaml_config
    dataset_path = prepare_yolo_dataset("converted_data", "yolo_dataset")
    yaml_path = create_yaml_config(dataset_path, list(CLASS_MAPPING.keys()))
    
    # 5. 模型训练
    print("步骤5: 训练YOLO模型...")
    from train_yolo import train_yolo_model, evaluate_model
    train_yolo_model(str(yaml_path))
    
    # 6. 模型评估
    print("步骤6: 评估训练好的模型...")
    best_model = "runs/detect/train/weights/best.pt"
    evaluate_model(best_model, str(yaml_path))
    
    print("全流程完成！")

if __name__ == "__main__":
    run_full_pipeline()

关键注意事项

标注质量：确保标注的准确性和一致性，这是模型性能的基础
数据平衡：尽量保持各个类别的样本数量均衡
硬件要求：YOLO训练需要GPU支持，确保有足够的显存
版本兼容性：注意Label Studio和YOLO版本的兼容性
备份数据：定期备份标注数据和训练结果