在计算机视觉的实际应用中,背景建模、文档透视矫正、光流估计、目标跟踪是最基础、最常用的四大核心技术。本文将用四段可直接运行的 Python+OpenCV 代码,带你从零掌握这四大知识点,覆盖视频分析、文档扫描、运动跟踪等真实场景,新手也能快速上手。
一、背景建模:运动目标自动检测
原理介绍
背景建模的核心是区分静态背景和动态前景,通过 MOG2 算法构建背景模型,自动提取视频中的运动物体,常用于监控、人流统计、运动分析等场景。
python
import cv2
import numpy as np
# 打开视频/摄像头
cap = cv2.VideoCapture('test.avi') # 改为0则调用摄像头
# 定义结构元与背景减法器
kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3))
fgbg = cv2.createBackgroundSubtractorMOG2()
while True:
ret, frame = cap.read()
if not ret:
break
# 背景建模,提取前景
fgmask = fgbg.apply(frame)
# 开运算去噪
fgmask_clean = cv2.morphologyEx(fgmask, cv2.MORPH_OPEN, kernel)
# 轮廓检测
contours, _ = cv2.findContours(fgmask_clean, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
for c in contours:
perimeter = cv2.arcLength(c, True)
# 过滤小目标
if perimeter > 188:
x, y, w, h = cv2.boundingRect(c)
cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
# 显示结果
cv2.imshow("Motion Detection", frame)
cv2.imshow("Foreground Mask", fgmask_clean)
if cv2.waitKey(1) & 0xFF == 27:
break
cap.release()
cv2.destroyAllWindows()
createBackgroundSubtractorMOG2():自适应背景建模,自动区分运动物体
形态学开运算:去除噪点,让检测结果更干净
轮廓过滤:按周长筛选,避免小干扰物误检
二、文档透视矫正:
OCR 必备预处理 原理介绍 通过边缘检测 + 轮廓拟合 + 透视变换,将倾斜拍摄的文档自动矫正为俯视视角,是摄像头 OCR、扫描全能类 APP 的核心技术。
python
import numpy as np
import cv2
# 图像显示工具
def cv_show(name, img):
cv2.imshow(name, img)
cv2.waitKey(1)
# 四点坐标排序
def order_points(pts):
rect = np.zeros((4, 2), dtype="float32")
s = pts.sum(axis=1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
diff = np.diff(pts, axis=1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
return rect
# 透视变换
def four_point_transform(image, pts):
rect = order_points(pts)
tl, tr, br, bl = rect
widthA = np.linalg.norm(br - bl)
widthB = np.linalg.norm(tr - tl)
maxWidth = max(int(widthA), int(widthB))
heightA = np.linalg.norm(tr - br)
heightB = np.linalg.norm(tl - bl)
maxHeight = max(int(heightA), int(heightB))
dst = np.array([[0, 0], [maxWidth-1, 0], [maxWidth-1, maxHeight-1], [0, maxHeight-1]], dtype="float32")
M = cv2.getPerspectiveTransform(rect, dst)
return cv2.warpPerspective(image, M, (maxWidth, maxHeight))
# 调用摄像头
cap = cv2.VideoCapture(0)
while True:
ret, image = cap.read()
if not ret: break
orig = image.copy()
# 边缘检测
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (5, 5), 0)
edged = cv2.Canny(gray, 75, 200)
# 轮廓检测
cnts = cv2.findContours(edged, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:3]
screenCnt = None
for c in cnts:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.05 * peri, True)
if cv2.contourArea(approx) > 20000 and len(approx) == 4:
screenCnt = approx
break
if screenCnt is not None:
cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 2)
warped = four_point_transform(orig, screenCnt.reshape(4, 2))
ref = cv2.threshold(cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
cv_show("Scan Result", ref)
cv_show("Camera", image)
if cv2.waitKey(1) == 27: break
cap.release()
cv2.destroyAllWindows()
Canny 边缘检测:精准提取文档边界
轮廓近似:自动拟合四边形,定位文档
透视变换:将倾斜图像矫正为标准俯视视角
三、光流估计:Lucas-Kanade 特征点跟踪
原理介绍
光流法通过跟踪连续帧之间的特征点运动,计算物体移动轨迹,适用于动作捕捉、视频稳像、运动分析
python
import numpy as np
import cv2
cap = cv2.VideoCapture('test.avi')
color = np.random.randint(0, 255, (100, 3))
# 读取第一帧
ret, old_frame = cap.read()
old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)
# 角点检测
feature_params = dict(maxCorners=100, qualityLevel=0.3, minDistance=7)
p0 = cv2.goodFeaturesToTrack(old_gray, mask=None, **feature_params)
mask = np.zeros_like(old_frame)
lk_params = dict(winSize=(15, 15), maxLevel=2)
while True:
ret, frame = cap.read()
if not ret: break
frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# 计算光流
p1, st, err = cv2.calcOpticalFlowPyrLK(old_gray, frame_gray, p0, None, **lk_params)
good_new = p1[st == 1]
good_old = p0[st == 1]
# 绘制轨迹
for i, (new, old) in enumerate(zip(good_new, good_old)):
a, b = new.ravel()
c, d = old.ravel()
a, b, c, d = map(int, [a, b, c, d])
mask = cv2.line(mask, (a, b), (c, d), color[i].tolist(), 2)
img = cv2.add(frame, mask)
cv2.imshow("Optical Flow", img)
if cv2.waitKey(150) & 0xFF == 27:
break
old_gray = frame_gray.copy()
p0 = good_new.reshape(-1, 1, 2)
cv2.destroyAllWindows()
cap.release()
- Shi-Tomasi 角点检测:提取稳定特征点
- Lucas-Kanade 稀疏光流:高效计算特征点运动
- 轨迹绘制:可视化物体运动路径
四、CSRT 目标跟踪:
实时锁定任意物体 原理介绍 CSRT 是高精度单目标跟踪算法,手动框选目标后自动持续跟踪,抗遮挡、抗形变,适合摄像头实时追踪。
python
import cv2
# 创建跟踪器
tracker = cv2.TrackerCSRT_create()
tracking = False
cap = cv2.VideoCapture(0)
while True:
ret, frame = cap.read()
if not ret: break
# 按 S 键选择目标
if cv2.waitKey(1) == ord('s'):
tracking = True
roi = cv2.selectROI("Tracking", frame, showCrosshair=False)
tracker.init(frame, roi)
# 跟踪更新
if tracking:
success, box = tracker.update(frame)
if success:
x, y, w, h = map(int, box)
cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
cv2.imshow("Tracking", frame)
if cv2.waitKey(1) == 27: break
cap.release()
cv2.destroyAllWindows()
TrackerCSRT_create():高精度、高稳定性跟踪器
selectROI():手动框选目标
实时跟踪更新:适合人脸、车辆、物体等任意目标跟踪
| 技术 | 核心用途 | 输入 | 输出 |
|---|---|---|---|
| 背景建模 | 运动物体检测 | 视频 / 摄像头 | 运动目标框 |
| 文档透视矫正 | 文档扫描、OCR 预处理 | 摄像头 | 矫正后俯视文档 |
| 光流估计 | 特征点运动跟踪 | 视频 | 运动轨迹 |
| CSRT 目标跟踪 | 实时单目标跟踪 | 摄像头 | 目标实时框选 |