基于Label Studio的视频标注与YOLO模型训练全流程指南
下面是从视频标注到YOLO模型训练的完整工作流程,我将用详细的步骤和代码示例为您展示整个过程。
📋 全流程概览
原始视频
视频预处理
Label Studio标注
标注数据导出
格式转换
数据集划分
YOLO模型训练
模型评估
模型部署
第一阶段:环境准备与数据预处理
1.1 安装必要的软件包
bash
# 安装Label Studio
pip install label-studio
# 安装视频处理相关库
pip install opencv-python moviepy scikit-learn
# 安装YOLO相关库(以YOLOv8为例)
pip install ultralytics
1.2 视频预处理脚本
python
# video_preprocessing.py
import cv2
import os
from pathlib import Path
def extract_frames_from_video(video_path, output_dir, frame_interval=10):
"""
从视频中提取关键帧
"""
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
print(f"无法打开视频文件: {video_path}")
return
# 创建输出目录
os.makedirs(output_dir, exist_ok=True)
frame_count = 0
saved_count = 0
while True:
ret, frame = cap.read()
if not ret:
break
# 按间隔保存帧
if frame_count % frame_interval == 0:
frame_filename = f"frame_{saved_count:06d}.jpg"
frame_path = os.path.join(output_dir, frame_filename)
cv2.imwrite(frame_path, frame)
saved_count += 1
frame_count += 1
cap.release()
print(f"从 {video_path} 提取了 {saved_count} 帧到 {output_dir}")
def prepare_video_dataset(video_directory, output_base_dir):
"""
批量处理视频目录
"""
video_extensions = ['.mp4', '.avi', '.mov', '.mkv']
for video_file in Path(video_directory).iterdir():
if video_file.suffix.lower() in video_extensions:
video_name = video_file.stem
output_dir = Path(output_base_dir) / video_name / "frames"
print(f"处理视频: {video_file}")
extract_frames_from_video(str(video_file), str(output_dir))
# 使用示例
if __name__ == "__main__":
prepare_video_dataset("raw_videos", "processed_data")
第二阶段:Label Studio标注
2.1 启动Label Studio
bash
# 启动Label Studio
label-studio start
# 或者使用特定端口
label-studio start --port 8080
2.2 配置标注模板
创建视频目标检测的标注模板:
xml
<!-- video_annotation_template.xml -->
<View>
<Header value="视频目标检测标注"/>
<Video name="video" value="$video" framerate="original"/>
<VideoRectangle
name="object"
toName="video"
maxUsages="20"
fillOpacity="0.5"
strokeColor="#ff0000"
/>
<Choices name="class" toName="video" choice="multiple">
<Choice value="person" background="red"/>
<Choice value="car" background="blue"/>
<Choice value="bicycle" background="green"/>
<Choice value="motorcycle" background="orange"/>
<Choice value="truck" background="purple"/>
</Choices>
</View>
2.3 创建Label Studio项目
python
# create_labelstudio_project.py
import requests
import json
def create_video_annotation_project(api_url, api_token, project_name):
"""
通过API创建Label Studio项目
"""
headers = {
'Authorization': f'Token {api_token}',
'Content-Type': 'application/json'
}
# 读取标注模板
with open('video_annotation_template.xml', 'r') as f:
labeling_config = f.read()
project_data = {
"title": project_name,
"description": "视频目标检测标注项目",
"labeling_config": labeling_config,
"expert_instruction": "请仔细标注视频中的目标物体",
"show_instruction": True,
"show_skip_button": True,
"enable_empty_annotation": False,
"show_annotation_history": True
}
response = requests.post(
f"{api_url}/api/projects",
headers=headers,
json=project_data
)
if response.status_code == 201:
project_id = response.json()['id']
print(f"项目创建成功,ID: {project_id}")
return project_id
else:
print(f"项目创建失败: {response.text}")
return None
def import_tasks_to_project(api_url, api_token, project_id, tasks_data):
"""
导入标注任务到项目
"""
headers = {
'Authorization': f'Token {api_token}',
'Content-Type': 'application/json'
}
response = requests.post(
f"{api_url}/api/projects/{project_id}/tasks",
headers=headers,
json=tasks_data
)
return response.status_code == 201
第三阶段:标注数据导出与格式转换
3.1 从Label Studio导出数据
python
# export_annotations.py
import requests
import json
import os
from pathlib import Path
def export_labelstudio_annotations(api_url, api_token, project_id, export_format='JSON'):
"""
导出Label Studio标注数据
"""
headers = {
'Authorization': f'Token {api_token}',
'Content-Type': 'application/json'
}
# 请求导出
export_url = f"{api_url}/api/projects/{project_id}/export"
params = {'export_type': export_format}
response = requests.get(export_url, headers=headers, params=params)
if response.status_code == 200:
# 保存导出的数据
export_filename = f"labelstudio_export_{project_id}.json"
with open(export_filename, 'w', encoding='utf-8') as f:
json.dump(response.json(), f, ensure_ascii=False, indent=2)
print(f"标注数据已导出到: {export_filename}")
return export_filename
else:
print(f"导出失败: {response.text}")
return None
def convert_to_yolo_format(export_file, output_dir, class_mapping):
"""
将Label Studio格式转换为YOLO格式
"""
with open(export_file, 'r', encoding='utf-8') as f:
data = json.load(f)
# 创建输出目录
images_dir = Path(output_dir) / "images"
labels_dir = Path(output_dir) / "labels"
images_dir.mkdir(parents=True, exist_ok=True)
labels_dir.mkdir(parents=True, exist_ok=True)
for task in data:
# 获取视频信息
video_path = task['data'].get('video', '')
if not video_path:
continue
# 处理每个标注结果
for annotation in task.get('annotations', []):
result = annotation.get('result', [])
# 按帧处理标注
frame_annotations = {}
for item in result:
if item['type'] == 'videorectangle':
frame = item.get('frame', 0)
if frame not in frame_annotations:
frame_annotations[frame] = []
# 获取边界框坐标
x = item['value']['x']
y = item['value']['y']
width = item['value']['width']
height = item['value']['height']
# 获取类别
class_name = None
for choice in item.get('choices', []):
if choice in class_mapping:
class_name = choice
break
if class_name is not None:
frame_annotations[frame].append({
'class_id': class_mapping[class_name],
'x_center': x + width / 2,
'y_center': y + height / 2,
'width': width,
'height': height
})
# 为每一帧创建YOLO格式标签文件
for frame, annotations in frame_annotations.items():
# 生成对应的图像文件名
video_name = Path(video_path).stem
image_filename = f"{video_name}_frame_{frame:06d}.jpg"
label_filename = f"{video_name}_frame_{frame:06d}.txt"
# 写入标签文件
label_path = labels_dir / label_filename
with open(label_path, 'w') as f:
for ann in annotations:
# 转换为YOLO格式(归一化坐标)
# 注意:这里假设坐标已经是百分比,否则需要根据图像尺寸归一化
line = f"{ann['class_id']} {ann['x_center']/100:.6f} {ann['y_center']/100:.6f} {ann['width']/100:.6f} {ann['height']/100:.6f}\n"
f.write(line)
print(f"YOLO格式数据已保存到: {output_dir}")
# 类别映射配置
CLASS_MAPPING = {
"person": 0,
"car": 1,
"bicycle": 2,
"motorcycle": 3,
"truck": 4
}
第四阶段:数据集准备与划分
4.1 数据集划分脚本
python
# dataset_preparation.py
import os
import random
import shutil
from pathlib import Path
from sklearn.model_selection import train_test_split
def prepare_yolo_dataset(raw_data_dir, output_dir, test_size=0.2, val_size=0.1):
"""
准备YOLO格式的数据集
"""
# 创建目录结构
dataset_dir = Path(output_dir)
(dataset_dir / "images" / "train").mkdir(parents=True, exist_ok=True)
(dataset_dir / "images" / "val").mkdir(parents=True, exist_ok=True)
(dataset_dir / "images" / "test").mkdir(parents=True, exist_ok=True)
(dataset_dir / "labels" / "train").mkdir(parents=True, exist_ok=True)
(dataset_dir / "labels" / "val").mkdir(parents=True, exist_ok=True)
(dataset_dir / "labels" / "test").mkdir(parents=True, exist_ok=True)
# 获取所有图像文件
raw_images_dir = Path(raw_data_dir) / "images"
image_files = list(raw_images_dir.glob("*.jpg"))
image_files = [f for f in image_files if f.is_file()]
# 划分数据集
train_files, test_files = train_test_split(
image_files, test_size=test_size + val_size, random_state=42
)
val_files, test_files = train_test_split(
test_files, test_size=val_size/(test_size + val_size), random_state=42
)
# 复制文件到相应目录
def copy_files(files, image_dest, label_dest):
for img_file in files:
# 复制图像文件
shutil.copy2(img_file, image_dest / img_file.name)
# 复制对应的标签文件
label_file = Path(raw_data_dir) / "labels" / f"{img_file.stem}.txt"
if label_file.exists():
shutil.copy2(label_file, label_dest / label_file.name)
copy_files(train_files, dataset_dir / "images" / "train", dataset_dir / "labels" / "train")
copy_files(val_files, dataset_dir / "images" / "val", dataset_dir / "labels" / "val")
copy_files(test_files, dataset_dir / "images" / "test", dataset_dir / "labels" / "test")
print(f"数据集划分完成:")
print(f"训练集: {len(train_files)} 张图像")
print(f"验证集: {len(val_files)} 张图像")
print(f"测试集: {len(test_files)} 张图像")
return dataset_dir
def create_yaml_config(dataset_dir, class_names, config_name="dataset.yaml"):
"""
创建YOLO配置文件
"""
config_content = f"""# YOLO数据集配置文件
path: {dataset_dir.absolute()} # 数据集根目录
train: images/train # 训练集图像目录
val: images/val # 验证集图像目录
test: images/test # 测试集图像目录
# 类别数量
nc: {len(class_names)}
# 类别名称
names: {class_names}
"""
config_path = dataset_dir / config_name
with open(config_path, 'w') as f:
f.write(config_content)
print(f"配置文件已创建: {config_path}")
return config_path
# 使用示例
if __name__ == "__main__":
# 准备数据集
dataset_path = prepare_yolo_dataset("converted_data", "yolo_dataset")
# 创建配置文件
classes = ["person", "car", "bicycle", "motorcycle", "truck"]
yaml_path = create_yaml_config(dataset_path, classes)
第五阶段:YOLO模型训练
5.1 训练脚本
python
# train_yolo.py
from ultralytics import YOLO
import yaml
import torch
from pathlib import Path
def train_yolo_model(config_path, model_size='yolov8n', epochs=50, imgsz=640):
"""
训练YOLO模型
"""
# 加载预训练模型
model = YOLO(f'{model_size}.pt')
# 训练模型
results = model.train(
data=config_path, # 数据集配置文件路径
epochs=epochs, # 训练轮数
imgsz=imgsz, # 输入图像尺寸
batch=16, # 批次大小
patience=10, # 早停耐心值
save=True, # 保存检查点
exist_ok=True, # 覆盖现有文件
verbose=True # 显示训练详情
)
return results
def evaluate_model(model_path, config_path):
"""
评估训练好的模型
"""
# 加载训练好的模型
model = YOLO(model_path)
# 在验证集上评估
metrics = model.val(data=config_path)
print("模型评估结果:")
print(f"mAP50: {metrics.box.map50:.3f}")
print(f"mAP50-95: {metrics.box.map:.3f}")
print(f"精确率: {metrics.box.precision:.3f}")
print(f"召回率: {metrics.box.recall:.3f}")
return metrics
# 使用示例
if __name__ == "__main__":
# 数据集配置文件路径
dataset_config = "yolo_dataset/dataset.yaml"
# 训练模型
print("开始训练YOLO模型...")
training_results = train_yolo_model(
config_path=dataset_config,
model_size='yolov8n',
epochs=100,
imgsz=640
)
# 评估模型
best_model_path = "runs/detect/train/weights/best.pt"
evaluation_results = evaluate_model(best_model_path, dataset_config)
5.2 高级训练配置
python
# advanced_training.py
from ultralytics import YOLO
def advanced_training_with_augmentation(config_path):
"""
使用数据增强的高级训练配置
"""
model = YOLO('yolov8n.pt')
# 高级训练参数
results = model.train(
data=config_path,
epochs=100,
imgsz=640,
batch=16,
lr0=0.01, # 初始学习率
lrf=0.01, # 最终学习率
momentum=0.937, # 动量
weight_decay=0.0005, # 权重衰减
warmup_epochs=3.0, # 热身轮数
warmup_momentum=0.8, # 热身动量
box=7.5, # 框损失权重
cls=0.5, # 分类损失权重
dfl=1.5, # DFL损失权重
hsv_h=0.015, # 色调增强
hsv_s=0.7, # 饱和度增强
hsv_v=0.4, # 明度增强
degrees=0.0, # 旋转角度
translate=0.1, # 平移
scale=0.5, # 缩放
shear=0.0, # 剪切
perspective=0.0, # 透视变换
flipud=0.0, # 上下翻转
fliplr=0.5, # 左右翻转
mosaic=1.0, # Mosaic数据增强
mixup=0.0, # MixUp增强
copy_paste=0.0, # 复制粘贴增强
)
return results
第六阶段:模型部署与推理
6.1 模型推理脚本
python
# inference.py
from ultralytics import YOLO
import cv2
import numpy as np
class VideoObjectDetector:
def __init__(self, model_path, conf_threshold=0.5, iou_threshold=0.5):
"""
初始化视频目标检测器
"""
self.model = YOLO(model_path)
self.conf_threshold = conf_threshold
self.iou_threshold = iou_threshold
def detect_video(self, video_path, output_path=None):
"""
对视频进行目标检测
"""
cap = cv2.VideoCapture(video_path)
# 获取视频属性
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
# 设置输出视频
if output_path:
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
frame_count = 0
while True:
ret, frame = cap.read()
if not ret:
break
# 进行推理
results = self.model(
frame,
conf=self.conf_threshold,
iou=self.iou_threshold,
verbose=False
)
# 绘制检测结果
annotated_frame = results[0].plot()
if output_path:
out.write(annotated_frame)
else:
cv2.imshow('Detection Result', annotated_frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
frame_count += 1
print(f"处理帧: {frame_count}")
cap.release()
if output_path:
out.release()
cv2.destroyAllWindows()
# 使用示例
if __name__ == "__main__":
# 初始化检测器
detector = VideoObjectDetector("runs/detect/train/weights/best.pt")
# 检测视频
detector.detect_video("test_video.mp4", "output_video.mp4")
全流程自动化脚本
python
# full_pipeline.py
import os
import sys
from pathlib import Path
def run_full_pipeline():
"""
全流程自动化执行
"""
print("=== Label Studio视频标注到YOLO训练全流程 ===")
# 1. 视频预处理
print("步骤1: 视频预处理...")
from video_preprocessing import prepare_video_dataset
prepare_video_dataset("raw_videos", "processed_data")
# 2. Label Studio标注(需要手动完成)
print("步骤2: 请在Label Studio中完成标注...")
input("标注完成后按Enter键继续...")
# 3. 数据导出与转换
print("步骤3: 导出并转换标注数据...")
from export_annotations import export_labelstudio_annotations, convert_to_yolo_format
export_file = export_labelstudio_annotations(
"http://localhost:8080",
"your_api_token",
"your_project_id"
)
CLASS_MAPPING = {"person": 0, "car": 1, "bicycle": 2, "motorcycle": 3, "truck": 4}
convert_to_yolo_format(export_file, "converted_data", CLASS_MAPPING)
# 4. 数据集准备
print("步骤4: 准备YOLO数据集...")
from dataset_preparation import prepare_yolo_dataset, create_yaml_config
dataset_path = prepare_yolo_dataset("converted_data", "yolo_dataset")
yaml_path = create_yaml_config(dataset_path, list(CLASS_MAPPING.keys()))
# 5. 模型训练
print("步骤5: 训练YOLO模型...")
from train_yolo import train_yolo_model, evaluate_model
train_yolo_model(str(yaml_path))
# 6. 模型评估
print("步骤6: 评估训练好的模型...")
best_model = "runs/detect/train/weights/best.pt"
evaluate_model(best_model, str(yaml_path))
print("全流程完成!")
if __name__ == "__main__":
run_full_pipeline()
关键注意事项
- 标注质量:确保标注的准确性和一致性,这是模型性能的基础
- 数据平衡:尽量保持各个类别的样本数量均衡
- 硬件要求:YOLO训练需要GPU支持,确保有足够的显存
- 版本兼容性:注意Label Studio和YOLO版本的兼容性
- 备份数据:定期备份标注数据和训练结果
这个全流程指南涵盖了从视频数据准备到YOLO模型训练部署的完整过程。您可以根据具体项目需求调整各个步骤的参数和配置。