若该文为原创文章,转载请注明原文出处。
一、功能介绍
主要是想实现跟踪视频中的一个特定目标。
使用了OpenCV库来实现视频中特定目标的跟踪。需要提供视频文件路径以及目标在第一帧中的位置坐标(x, y, width, height),程序会自动跟踪该目标在整个视频中的移动。
二、环境搭建
pip install opencv-contrib-python==3.4.13.47 -i https://pypi.tuna.tsinghua.edu.cn/simple
其他版本自行测试。
三、工作原理
1. 初始化阶段
- 程序首先读取视频的第一帧,获取图像尺寸
- 根据用户提供的坐标初始化目标区域(ROI - Region of Interest)
- 保存初始目标区域作为模板用于后续重新检测
- 尝试创建并初始化跟踪器(优先级顺序:CSRT、KCF、MOSSE)
2. 主跟踪循环
- 逐帧读取视频
- 使用跟踪器更新目标位置
- 如果跟踪成功:
- 在当前帧上绘制目标边界框
- 更新位置历史信息
- 定期更新模板库(用于重新检测)
- 重置失败计数器
- 如果跟踪失败:
- 启动重新检测机制
3. 重新检测机制
当跟踪失败时,程序采用多种策略来重新找到目标:
多层次搜索策略:
- 局部搜索:在预测位置附近区域搜索
- 扩展搜索:逐渐扩大搜索区域
- 全图搜索:在整个图像中搜索
多种检测方法:
- 模板匹配:使用保存的模板与当前图像进行匹配
- ORB特征匹配:使用ORB特征检测和匹配算法
- 全图特征匹配:在整个图像上进行特征匹配
动态调整机制:
- 根据失败次数动态调整搜索区域大小
- 使用历史位置预测目标可能位置
- 循环使用不同检测方法
4. 模板管理
程序维护一个模板库:
- 保存多个不同时刻的目标图像作为模板
- 定期更新模板以适应目标外观变化
- 限制模板数量防止内存过度使用
5. 运动预测
- 记录目标的历史位置
- 计算平均运动向量预测下一位置
- 根据预测位置调整搜索区域中心
6. 失败处理
- 设置最大失败次数限制
- 多种重新检测策略轮换使用
- 如果重新检测成功则重新初始化跟踪器
四、源码
import cv2
import sys
import os
import numpy as np
import time
import math
def safe_roi(roi, img_width, img_height):
"""确保ROI在图像范围内"""
x, y, w, h = roi
x = max(0, x)
y = max(0, y)
w = min(w, img_width - x)
h = min(h, img_height - y)
w = max(0, w)
h = max(0, h)
return (x, y, w, h)
def adaptive_template_match(search_area, templates, scales=[0.4, 0.6, 0.8, 1.0, 1.2, 1.5, 1.8, 2.0]):
"""自适应模板匹配,支持多模板和多尺度"""
best_match = None
best_val = -1
best_scale = 1.0
best_template_idx = 0
for scale in scales:
# 缩放搜索区域
if scale != 1.0:
scaled_search = cv2.resize(search_area, None, fx=scale, fy=scale)
else:
scaled_search = search_area
for idx, template in enumerate(templates):
# 确保模板小于搜索区域
if template.shape[0] > scaled_search.shape[0] or template.shape[1] > scaled_search.shape[1]:
continue
try:
# 模板匹配
res = cv2.matchTemplate(scaled_search, template, cv2.TM_CCOEFF_NORMED)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
# 更新最佳匹配
if max_val > best_val:
best_val = max_val
best_loc = max_loc
best_scale = scale
best_template_idx = idx
except:
continue
return best_val, best_loc, best_scale, best_template_idx
def validate_detection(frame, candidate_roi, templates, min_similarity=0.3):
"""验证检测结果是否有效"""
x, y, w, h = candidate_roi
# 确保ROI有效
if w <= 5 or h <= 5:
return False
# 提取候选区域
candidate_img = frame[y:y+h, x:x+w]
if candidate_img.size == 0 or candidate_img.shape[0] == 0 or candidate_img.shape[1] == 0:
return False
# 与所有模板比较相似度
max_similarity = 0
for template in templates:
try:
# 调整模板大小以匹配候选区域
resized_template = cv2.resize(template, (w, h))
# 计算直方图相似度
hist1 = cv2.calcHist([candidate_img], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
hist2 = cv2.calcHist([resized_template], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
cv2.normalize(hist1, hist1)
cv2.normalize(hist2, hist2)
similarity = cv2.compareHist(hist1, hist2, cv2.HISTCMP_CORREL)
if similarity > max_similarity:
max_similarity = similarity
if similarity > min_similarity:
return True
except:
continue
print(f"直方图验证失败: 最大相似度={max_similarity:.2f}")
return False
def contour_similarity(img1, template):
"""通过轮廓比较图像相似度"""
try:
# 预处理图像
if len(img1.shape) == 3:
gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
else:
gray1 = img1.copy()
if len(template.shape) == 3:
gray2 = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
else:
gray2 = template.copy()
# 二值化
_, thresh1 = cv2.threshold(gray1, 127, 255, cv2.THRESH_BINARY)
_, thresh2 = cv2.threshold(gray2, 127, 255, cv2.THRESH_BINARY)
# 查找轮廓
contours1, _ = cv2.findContours(thresh1, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours2, _ = cv2.findContours(thresh2, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if not contours1 or not contours2:
return 0.0
# 取最大轮廓
cnt1 = max(contours1, key=cv2.contourArea)
cnt2 = max(contours2, key=cv2.contourArea)
# 计算相似度 (值越小越相似)
similarity = cv2.matchShapes(cnt1, cnt2, cv2.CONTOURS_MATCH_I2, 0.0)
# 转换为相似度分数 (1-相似度,值越大越相似)
return 1.0 - min(similarity, 1.0)
except Exception as e:
print(f"轮廓相似度计算错误: {e}")
return 0.0
def template_match_score(frame, roi, templates):
"""评估候选区域的模板匹配分数"""
x, y, w, h = roi
if w <= 5 or h <= 5:
return 0.0
patch = frame[y:y+h, x:x+w]
if patch.size == 0:
return 0.0
best_score = 0.0
for template in templates:
try:
# 调整模板大小
resized_tpl = cv2.resize(template, (w, h))
# 计算匹配分数
result = cv2.matchTemplate(patch, resized_tpl, cv2.TM_CCOEFF_NORMED)
_, max_val, _, _ = cv2.minMaxLoc(result)
if max_val > best_score:
best_score = max_val
except:
continue
return best_score
def detect_with_orb(frame, templates, search_area_roi=None):
"""使用ORB特征匹配检测目标"""
if search_area_roi:
x1, y1, x2, y2 = search_area_roi
search_area = frame[y1:y2, x1:x2]
else:
search_area = frame
if search_area.size == 0:
return None
# 初始化ORB检测器
orb = cv2.ORB_create(nfeatures=2000)
# 检测搜索区域的关键点和描述符
kp_search, des_search = orb.detectAndCompute(search_area, None)
if des_search is None or len(kp_search) < 10:
return None
best_match = None
best_matches = 0
for template in templates:
# 检测模板的关键点和描述符
kp_template, des_template = orb.detectAndCompute(template, None)
if des_template is None or len(kp_template) < 5:
continue
# 创建BFMatcher对象
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
try:
# 匹配描述符
matches = bf.match(des_template, des_search)
matches = sorted(matches, key=lambda x: x.distance)
# 选择最佳匹配
if len(matches) > 10:
# 获取匹配点坐标
src_pts = np.float32([kp_template[m.queryIdx].pt for m in matches]).reshape(-1, 1, 2)
dst_pts = np.float32([kp_search[m.trainIdx].pt for m in matches]).reshape(-1, 1, 2)
# 计算单应性矩阵
M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
if M is not None:
# 获取模板的角点
h, w = template.shape[:2]
pts = np.float32([[0, 0], [0, h-1], [w-1, h-1], [w-1, 0]]).reshape(-1, 1, 2)
# 应用单应性矩阵
dst = cv2.perspectiveTransform(pts, M)
# 计算边界框
xs = [p[0][0] for p in dst]
ys = [p[0][1] for p in dst]
x, y, w, h = int(min(xs)), int(min(ys)), int(max(xs)-min(xs)), int(max(ys)-min(ys))
if w > 5 and h > 5 and w < frame.shape[1] and h < frame.shape[0]:
# 计算匹配质量
match_quality = len(matches) * (1.0 - np.mean([m.distance for m in matches[:10]])/100.0)
if match_quality > best_matches:
best_matches = match_quality
best_match = (x, y, w, h)
except Exception as e:
print(f"ORB匹配错误: {e}")
continue
if best_match and search_area_roi:
# 调整坐标到原图
x, y, w, h = best_match
best_match = (x + search_area_roi[0], y + search_area_roi[1], w, h)
return best_match
def main(video_path, roi_coords):
# 打开视频文件
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
print("无法打开视频文件")
return
# 读取第一帧获取图像尺寸
ret, frame = cap.read()
if not ret:
print("无法读取第一帧")
return
height, width = frame.shape[:2]
print(f"视频尺寸: {width}x{height}")
# 解析并验证ROI坐标
try:
if len(roi_coords) != 4:
raise ValueError("需要4个坐标值: x, y, width, height")
x, y, w, h = map(int, roi_coords)
print(f"初始ROI: x={x}, y={y}, w={w}, h={h}")
if w <= 0 or h <= 0:
raise ValueError("ROI宽度和高度必须为正数")
roi_box = safe_roi((x, y, w, h), width, height)
if roi_box[2] <= 0 or roi_box[3] <= 0:
raise ValueError(f"调整后ROI无效: {roi_box}")
print(f"有效ROI: x={roi_box[0]}, y={roi_box[1]}, w={roi_box[2]}, h={roi_box[3]}")
except Exception as e:
print(f"ROI坐标错误: {e}")
print(f"请确保ROI在图像范围内 (0-{width}, 0-{height})")
cap.release()
return
# 保存初始模板用于重新检测
x0, y0, w0, h0 = roi_box
initial_template = frame[y0:y0+h0, x0:x0+w0].copy()
# 创建跟踪器
tracker = None
tracker_types = [
('CSRT', cv2.TrackerCSRT_create),
('KCF', cv2.TrackerKCF_create),
('MOSSE', cv2.TrackerMOSSE_create)
]
# 重新读取第一帧
cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
ret, frame = cap.read()
# 尝试不同的跟踪器
for tracker_name, tracker_creator in tracker_types:
try:
print(f"尝试使用 {tracker_name} 跟踪器...")
tracker = tracker_creator()
success = tracker.init(frame, roi_box)
if success:
print(f"{tracker_name} 跟踪器初始化成功")
break
else:
print(f"{tracker_name} 跟踪器初始化失败")
tracker = None
except:
print(f"{tracker_name} 跟踪器创建失败")
tracker = None
if tracker is None:
print("无法初始化任何跟踪器")
cap.release()
return
print("开始跟踪目标...")
# 创建窗口
cv2.namedWindow("目标跟踪", cv2.WINDOW_NORMAL)
# 显示初始帧和ROI
cv2.rectangle(frame, (roi_box[0], roi_box[1]),
(roi_box[0] + roi_box[2], roi_box[1] + roi_box[3]),
(0, 255, 0), 2)
cv2.putText(frame, "按ESC退出", (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
cv2.imshow("目标跟踪", frame)
cv2.waitKey(1000)
frame_count = 0
tracking_failures = 0
max_failures = 200 # 允许更多失败帧
reinit_threshold = 3 # 更早开始重新检测
last_known_position = roi_box
reinit_attempts = 0
last_success_time = time.time()
# 模板管理
templates = [initial_template] # 模板列表
max_templates = 5 # 更多模板
template_update_interval = 5 # 更频繁更新模板
# 运动预测
prev_positions = []
max_history = 10
# 重新检测状态
reinit_mode = 0 # 0: 模板匹配, 1: ORB特征匹配, 2: 全图搜索
while True:
ret, frame = cap.read()
if not ret:
print("视频结束")
break
frame_count += 1
# 更新跟踪器
success, bbox = tracker.update(frame)
# 处理跟踪结果
if success:
x, y, w, h = [int(v) for v in bbox]
safe_bbox = safe_roi((x, y, w, h), width, height)
if safe_bbox[2] > 0 and safe_bbox[3] > 0:
# 更新位置历史
if len(prev_positions) >= max_history:
prev_positions.pop(0)
prev_positions.append((x, y, w, h))
cv2.rectangle(frame,
(safe_bbox[0], safe_bbox[1]),
(safe_bbox[0] + safe_bbox[2], safe_bbox[1] + safe_bbox[3]),
(0, 255, 0), 2)
status_text = f"跟踪成功 (帧 {frame_count})"
cv2.putText(frame, status_text, (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
tracking_failures = 0
last_known_position = safe_bbox
reinit_attempts = 0
last_success_time = time.time()
reinit_mode = 0 # 重置重新检测模式
# 定期更新模板
if frame_count % template_update_interval == 0:
# 获取当前目标区域
target_roi = frame[safe_bbox[1]:safe_bbox[1]+safe_bbox[3],
safe_bbox[0]:safe_bbox[0]+safe_bbox[2]]
# 检查与现有模板的相似度
min_similarity = 0.7
too_similar = False
for tpl in templates:
if target_roi.shape[0] > 5 and target_roi.shape[1] > 5:
# 计算直方图相似度
hist1 = cv2.calcHist([target_roi], [0], None, [256], [0,256])
hist2 = cv2.calcHist([tpl], [0], None, [256], [0,256])
cv2.normalize(hist1, hist1)
cv2.normalize(hist2, hist2)
similarity = cv2.compareHist(hist1, hist2, cv2.HISTCMP_CORREL)
if similarity > min_similarity:
too_similar = True
break
# 只添加显著不同的模板
if not too_similar:
# 如果模板数量已达上限,移除最旧的
if len(templates) >= max_templates:
templates.pop(0)
# 添加新模板
if target_roi.size > 0 and target_roi.shape[0] > 5 and target_roi.shape[1] > 5:
templates.append(target_roi.copy())
print(f"更新模板,当前模板数: {len(templates)}")
# 处理跟踪失败
if not success:
tracking_failures += 1
status_text = f"跟踪失败 (帧 {frame_count})"
cv2.putText(frame, status_text, (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
# 尝试重新检测目标
if tracking_failures >= reinit_threshold and reinit_attempts < 30:
reinit_attempts += 1
print(f"尝试重新检测目标 (尝试 {reinit_attempts}, 模式 {reinit_mode})")
# 动态调整搜索区域边界
base_margin = 300
dynamic_margin = min(1000, base_margin + 50 * reinit_attempts) # 更大搜索范围
# 使用位置历史预测搜索区域
if len(prev_positions) >= 2:
# 计算平均运动向量
dx = 0
dy = 0
speeds = []
for i in range(1, len(prev_positions)):
dx_i = prev_positions[i][0] - prev_positions[i-1][0]
dy_i = prev_positions[i][1] - prev_positions[i-1][1]
dx += dx_i
dy += dy_i
speeds.append(np.sqrt(dx_i**2 + dy_i**2))
# 计算平均速度
avg_speed = np.mean(speeds) if speeds else 0
# 基于速度和方向预测
predict_frames = min(20, 5 + reinit_attempts) # 预测帧数
predict_x = last_known_position[0] + int(dx * predict_frames / len(prev_positions))
predict_y = last_known_position[1] + int(dy * predict_frames / len(prev_positions))
# 根据速度调整搜索范围
speed_factor = min(3.0, 1.0 + avg_speed/50.0)
dynamic_margin = int(min(1000, 300 * speed_factor + 50 * reinit_attempts))
# 确保预测位置在图像范围内
predict_x = max(0, min(width - 1, predict_x))
predict_y = max(0, min(height - 1, predict_y))
search_center = (predict_x, predict_y)
else:
search_center = (last_known_position[0] + last_known_position[2] // 2,
last_known_position[1] + last_known_position[3] // 2)
# 计算搜索区域
search_x1 = max(0, search_center[0] - dynamic_margin)
search_y1 = max(0, search_center[1] - dynamic_margin)
search_x2 = min(width, search_center[0] + dynamic_margin)
search_y2 = min(height, search_center[1] + dynamic_margin)
search_area_roi = (search_x1, search_y1, search_x2, search_y2)
# 根据重新检测模式选择方法
candidate_roi = None
# 模式0: 模板匹配
if reinit_mode == 0:
if search_x2 > search_x1 and search_y2 > search_y1:
search_area = frame[search_y1:search_y2, search_x1:search_x2]
# 自适应模板匹配
best_val, best_loc, best_scale, best_template_idx = adaptive_template_match(search_area, templates)
print(f"模板匹配结果: 置信度={best_val:.2f}, 尺度={best_scale}, 模板={best_template_idx}")
# 动态匹配阈值
match_threshold = max(0.35, 0.7 - 0.02 * reinit_attempts) # 更低阈值
if best_val > match_threshold:
# 计算匹配位置
match_x = search_x1 + int(best_loc[0] / best_scale)
match_y = search_y1 + int(best_loc[1] / best_scale)
# 计算缩放后的模板尺寸
scaled_w = int(w0 * (1.0 / best_scale))
scaled_h = int(h0 * (1.0 / best_scale))
# 创建新的ROI
candidate_roi = (match_x, match_y, scaled_w, scaled_h)
print(f"模板匹配候选: {candidate_roi}")
# 模式1: ORB特征匹配
if reinit_mode == 1 or (reinit_mode == 0 and candidate_roi is None):
print("尝试ORB特征匹配...")
candidate_roi = detect_with_orb(frame, templates, (search_x1, search_y1, search_x2, search_y2))
if candidate_roi:
print(f"ORB检测到候选目标: {candidate_roi}")
# 模式2: 全图搜索
if reinit_mode == 2 or (reinit_mode == 1 and candidate_roi is None):
print("尝试金字塔全图搜索...")
# 使用图像金字塔
pyramid_levels = 3
best_candidate = None
best_score = -1
for level in range(pyramid_levels):
scale = 1.0 / (2 ** level)
resized_frame = cv2.resize(frame, None, fx=scale, fy=scale)
# 在缩小后的图像上搜索
candidate = detect_with_orb(resized_frame, templates)
if candidate:
# 缩放回原图坐标
x, y, w, h = candidate
candidate = (int(x/scale), int(y/scale), int(w/scale), int(h/scale))
# 评分 (使用模板匹配验证)
score = template_match_score(frame, candidate, templates)
if score > best_score:
best_score = score
best_candidate = candidate
candidate_roi = best_candidate
if candidate_roi:
print(f"金字塔搜索检测到候选目标: {candidate_roi}, 分数={best_score:.2f}")
# 处理检测结果
if candidate_roi:
safe_new_roi = safe_roi(candidate_roi, width, height)
if safe_new_roi[2] > 5 and safe_new_roi[3] > 5:
# 添加更灵活的验证阈值
min_sim = max(0.25, 0.4 - 0.01 * reinit_attempts) # 随尝试次数降低阈值
# 添加多种验证方法
valid = validate_detection(frame, safe_new_roi, templates, min_sim)
# 添加轮廓相似度验证
if not valid and len(templates) > 0:
template = templates[-1] # 使用最新模板
candidate_img = frame[safe_new_roi[1]:safe_new_roi[1]+safe_new_roi[3],
safe_new_roi[0]:safe_new_roi[0]+safe_new_roi[2]]
if candidate_img.size > 0:
contour_sim = contour_similarity(candidate_img, template)
print(f"轮廓相似度: {contour_sim:.2f}")
if contour_sim > 0.6: # 轮廓相似度阈值
print(f"轮廓验证通过: {contour_sim:.2f}")
valid = True
if valid:
# 尝试重新初始化跟踪器
for tracker_name, tracker_creator in tracker_types:
try:
new_tracker = tracker_creator()
init_success = new_tracker.init(frame, safe_new_roi)
if init_success:
tracker = new_tracker
tracking_failures = 0
reinit_attempts = 0
last_known_position = safe_new_roi
print(f"重新检测到目标!使用 {tracker_name} 跟踪器")
# 绘制重新检测到的区域
cv2.rectangle(frame,
(safe_new_roi[0], safe_new_roi[1]),
(safe_new_roi[0] + safe_new_roi[2], safe_new_roi[1] + safe_new_roi[3]),
(255, 0, 0), 2)
cv2.putText(frame, "重新检测到目标!", (10, 60),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2)
# 更新最后成功时间
last_success_time = time.time()
break
except:
continue
else:
print("检测结果验证失败")
# 绘制搜索区域
cv2.rectangle(frame, (search_x1, search_y1), (search_x2, search_y2), (0, 255, 255), 1)
cv2.putText(frame, f"搜索区域 (模式:{reinit_mode})", (search_x1, search_y1-10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 1)
# 升级重新检测模式
if reinit_attempts % 5 == 0:
reinit_mode = (reinit_mode + 1) % 3
print(f"升级重新检测模式到 {reinit_mode}")
if tracking_failures > max_failures:
print(f"连续跟踪失败超过 {max_failures} 次,停止跟踪")
break
# 显示帧
cv2.imshow("目标跟踪", frame)
# 按ESC退出
if cv2.waitKey(30) & 0xFF == 27:
break
# 释放资源
cap.release()
cv2.destroyAllWindows()
print(f"跟踪完成,处理了 {frame_count} 帧")
def get_first_frame(video_path):
"""提取并保存视频第一帧"""
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
print("无法打开视频文件")
return None
ret, frame = cap.read()
cap.release()
if not ret:
print("无法读取视频第一帧")
return None
output_path = "first_frame.jpg"
cv2.imwrite(output_path, frame)
print(f"已保存第一帧为: {output_path}")
return output_path
if __name__ == "__main__":
if len(sys.argv) < 6:
print("请提供视频文件路径和初始ROI坐标")
print("用法: python tracker.py <video_path> <x> <y> <width> <height>")
print("示例: python tracker.py video.mp4 100 50 200 150")
# 如果只有视频路径,提取第一帧
if len(sys.argv) == 2:
video_path = sys.argv[1]
if os.path.exists(video_path):
first_frame = get_first_frame(video_path)
if first_frame:
print(f"请使用图像查看软件打开 '{first_frame}' 获取ROI坐标")
else:
video_path = sys.argv[1]
roi_coords = sys.argv[2:6] # 读取四个坐标值
# 检查视频文件是否存在
if not os.path.exists(video_path):
print(f"错误: 视频文件 '{video_path}' 不存在")
else:
main(video_path, roi_coords)
五、测试
python .\02_tracker.py .\normal_video.mp4 185 375 70 70
测试结果

测试过程中发现,中途如果目标消失或目标过小,那就检测不到,算法还有待优化
如有侵权,或需要完整代码,请及时联系博主。