在视频分析系统中,数据库模块负责:
-
获取待处理的视频列表;
-
存储视频分析结果;
-
记录已处理的视频,防止重复分析;
-
支持从多表获取路径、转换为 Linux 可访问路径。
下面我们以 VideoDB
类为例,详细讲解。
3.1 数据库连接与初始化
python
import pymysql
from typing import List, Tuple
import json
class VideoDB:
def __init__(self, host="your_db_host", port=3306, user="your_user", password="your_password", database="your_database"):
self.db_config = {
"host": host,
"port": port,
"user": user,
"password": password,
"database": database,
"charset": "utf8mb4"
}
def _get_connection(self):
"""获取数据库连接"""
return pymysql.connect(**self.db_config)
说明:
-
所有敏感信息(IP、账号、密码)已替换为占位符,实际使用时请替换成自己的配置;
-
_get_connection
每次调用都返回一个新的连接,确保线程安全。
3.2 获取待处理视频
python
def get_pending_videos(self, limit: int = 10) -> List[Tuple[int, str]]:
"""
获取待处理视频
返回 [(id, video_path), ...]
"""
conn = self._get_connection()
cursor = conn.cursor()
cursor.execute(
"SELECT id, video_path FROM video_tasks WHERE status='pending' LIMIT %s",
(limit,)
)
rows = cursor.fetchall()
cursor.close()
conn.close()
return rows
说明:
-
status='pending'
表示视频尚未分析; -
返回列表,供分析程序批量分发到 Ray Worker。
3.3 更新视频状态
python
def update_video_status(self, video_id: int, status: str):
"""
更新视频状态
status: pending / processing / done / failed
"""
conn = self._get_connection()
cursor = conn.cursor()
cursor.execute(
"UPDATE video_tasks SET status=%s WHERE id=%s",
(status, video_id)
)
conn.commit()
cursor.close()
conn.close()
说明:
-
每处理完一个视频,及时更新状态,防止重复分析或遗漏;
-
支持多种状态,方便监控系统进度。
3.4 保存分析结果
python
def save_video_result(self, result: dict):
"""
result = {
"video_id": "xxx",
"video_path": "/full/path/to/video.mp4",
"duration": 123.45,
"info": {
"0": {"age": 25, "emotion": "happy", "emotion_richness": 0.67, "hand_richness": 0.32},
"1": {"age": 30, "emotion": "sad", "emotion_richness": 0.45, "hand_richness": 0.21}
}
}
"""
conn = self._get_connection()
cursor = conn.cursor()
sql = """
INSERT INTO video_sft_results (video_id, video_path, duration, info)
VALUES (%s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
video_path=VALUES(video_path),
duration=VALUES(duration),
info=VALUES(info)
"""
cursor.execute(sql, (
result["video_id"],
result["video_path"],
result.get("duration", None),
json.dumps(result.get("info")) # 转成 JSON 存入
))
conn.commit()
cursor.close()
conn.close()
说明:
-
分析结果以 JSON 格式存储
info
,便于后续统计或可视化; -
使用
ON DUPLICATE KEY UPDATE
避免重复写入。
3.5 获取服务器视频路径并转换 Linux 路径
python
def fetch_vcfile_server_path(self):
"""
获取 vc_file.new_path,转换为 /mnt/nas/ Linux 路径
并排除已处理的视频
"""
conn = self._get_connection()
try:
with conn.cursor() as cursor:
sql = """
SELECT vf.new_path
FROM vc_file vf
WHERE vf.id NOT IN (
SELECT s.original_video_id
FROM al_video_segment_info s
)
AND vf.type = 1
AND vf.width > 1080
AND vf.audio_bitrate IS NOT NULL
"""
cursor.execute(sql)
paths = [row[0].strip().replace('\\', '/') for row in cursor.fetchall() if row[0]]
# 排除已处理视频
with conn.cursor() as cursor2:
cursor2.execute("SELECT video_path FROM processed_SFT_video")
existing = set(row[0] for row in cursor2.fetchall())
final_paths = [p.replace('//172.16.177.70/tzvchain/', '/mnt/nas/') for p in paths if p not in existing]
return final_paths
finally:
conn.close()
说明:
-
自动替换 Windows/UNC 网络路径为 Linux 路径
/mnt/nas/
; -
排除已经处理的视频,避免重复分析;
-
提供给 Ray Worker 批量分析使用。
3.6 插入已处理视频
python
def insert_processed_video(self, video_path: str):
"""插入已处理视频路径"""
conn = self._get_connection()
try:
with conn.cursor() as cursor:
cursor.execute(
"INSERT IGNORE INTO processed_SFT_video (video_path) VALUES (%s)",
(video_path,)
)
conn.commit()
finally:
conn.close()
说明:
-
每处理完一个视频,记录到
processed_SFT_video
表,保证幂等性; -
使用
INSERT IGNORE
防止重复插入报错。
✅ 小结
通过 VideoDB
模块,我们实现了:
-
视频任务管理(待处理、处理中、已完成、失败);
-
视频分析结果存储(JSON info,支持多指标);
-
路径获取与转换,保证分布式 Worker 能访问视频文件;
-
防重复机制,保证分析任务幂等。
这为 Ray 分布式视频分析 提供了坚实的数据支持和管理能力。
完整代码如下:
python
import pymysql
from typing import List, Tuple
import statistics
from collections import Counter
import json
class VideoDB:
def __init__(self, host="127.0.0.1", port=3306, user="db", password="123456", database="db"):
self.db_config = {
"host": host,
"port": port,
"user": user,
"password": password,
"database": database,
"charset": "utf8mb4"
}
def _get_connection(self):
"""获取数据库连接"""
return pymysql.connect(**self.db_config)
def get_pending_videos(self, limit: int = 10) -> List[Tuple[int, str]]:
"""
获取待处理视频
返回 [(id, video_path), ...]
"""
conn = self._get_connection()
cursor = conn.cursor()
cursor.execute(
"SELECT id, video_path FROM video_tasks WHERE status='pending' LIMIT %s",
(limit,)
)
rows = cursor.fetchall()
cursor.close()
conn.close()
return rows
def update_video_status(self, video_id: int, status: str):
"""
更新视频状态
status: pending / processing / done / failed
"""
conn = self._get_connection()
cursor = conn.cursor()
cursor.execute(
"UPDATE video_tasks SET status=%s WHERE id=%s",
(status, video_id)
)
conn.commit()
cursor.close()
conn.close()
def get_existing_video_path(self):
"""
获取数据库中已存在的 video_path
返回一个 set,方便快速查重
"""
conn = self._get_connection()
cursor = conn.cursor()
cursor.execute("SELECT video_path FROM video_sft_results")
rows = cursor.fetchall()
cursor.close()
conn.close()
return {row[0] for row in rows}
def add_video(self, video_path: str):
"""插入一个新视频任务"""
conn = self._get_connection()
cursor = conn.cursor()
cursor.execute(
"INSERT INTO video_tasks (video_path, status) VALUES (%s, 'pending')",
(video_path,)
)
conn.commit()
cursor.close()
conn.close()
def save_video_result(self, result: dict):
"""
result = {
"video_id": "xxx",
"video_path": "/full/path/to/video.mp4",
"duration": 123.45, # 秒
"info": {
"0": {"age": 25, "emotion": "happy", "emotion_richness": 0.67, "hand_richness": 0.32},
"1": {"age": 30, "emotion": "sad", "emotion_richness": 0.45, "hand_richness": 0.21}
}
}
"""
conn = self._get_connection()
cursor = conn.cursor()
sql = """
INSERT INTO video_sft_results (video_id, video_path, duration, info)
VALUES (%s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
video_path=VALUES(video_path),
duration=VALUES(duration),
info=VALUES(info)
"""
cursor.execute(sql, (
result["video_id"],
result["video_path"],
result.get("duration", None),
json.dumps(result.get("info")) # 转成 JSON 字符串存入数据库
))
conn.commit()
cursor.close()
conn.close()
def fetch_server_paths(self):
"""读取数据库 al_video_cut_sft 中满足 volume_level > -91 的 server_path 字段"""
conn = pymysql.connect(**self.db_config)
try:
with conn.cursor() as cursor:
sql = """
SELECT s.server_path
FROM al_video_segment_info s
INNER JOIN al_video_info v ON s.original_video_id = v.video_id
WHERE v.volume_level > -91
"""
# sql = """
# SELECT s.server_path
# FROM al_video_segment_info s
# INNER JOIN al_video_info v ON s.original_video_id = v.video_id
# WHERE v.volume_level > -91
# AND s.server_path NOT IN (
# SELECT r.video_path FROM video_sft_results r
# )
# """
cursor.execute(sql)
results = cursor.fetchall()
return [row[0] for row in results if row[0]]
finally:
conn.close()
def fetch_server_path(self):
conn = pymysql.connect(**self.db_config)
try:
with conn.cursor() as cursor:
sql = """
SELECT server_path
FROM al_video_segment_info
"""
cursor.execute(sql)
results = cursor.fetchall()
return [row[0] for row in results if row[0]]
finally:
conn.close()
def fetch_vcfile_server_path(self):
"""
获取符合条件的 vc_file.new_path,并转换为 Linux 下的 /mnt/nas/ 路径
"""
conn = pymysql.connect(**self.db_config)
try:
with conn.cursor() as cursor:
sql = """
SELECT vf.new_path
FROM vc_file vf
WHERE vf.id NOT IN (
SELECT s.original_video_id
FROM al_video_segment_info s
)
AND vf.type = 1
AND vf.width > 1080
AND vf.audio_bitrate IS NOT NULL
"""
cursor.execute(sql)
results = cursor.fetchall()
paths = [row[0] for row in results if row[0]]
print("paths:",len(paths))
# Python 里替换路径
linux_paths = []
for p in paths:
if not p:
continue
# 先去掉首尾空白
p = p.strip()
# 把所有反斜杠换成正斜杠
p = p.replace('\\', '/')
# 现在才判断前缀(此时路径已变成 //172.16.177.70/tzvchain/...)
if p.startswith('//172.16.177.70/tzvchain/'):
p = p.replace('//172.16.177.70/tzvchain/', '/mnt/nas/')
linux_paths.append(p)
# 排除已存在的 processed_SFT_video
with conn.cursor() as cursor2:
cursor2.execute("SELECT video_path FROM processed_SFT_video")
existing = set(row[0] for row in cursor2.fetchall())
final_paths = [p for p in linux_paths if p not in existing]
print("final_paths:",len(final_paths))
return final_paths
finally:
conn.close()
def insert_processed_video(self, video_path: str):
"""插入已处理视频路径"""
conn = self._get_connection()
try:
with conn.cursor() as cursor:
sql = """
INSERT IGNORE INTO processed_SFT_video (video_path)
VALUES (%s)
"""
cursor.execute(sql, (video_path,))
conn.commit()
finally:
conn.close()
def save_vcfile_video_result(self, result: dict):
"""
result = {
"video_id": "xxx",
"video_path": "/full/path/to/video.mp4",
"duration": 123.45, # 秒
"info": {
"0": {"age": 25, "emotion": "happy", "emotion_richness": 0.67, "hand_richness": 0.32},
"1": {"age": 30, "emotion": "sad", "emotion_richness": 0.45, "hand_richness": 0.21}
}
}
"""
conn = self._get_connection()
cursor = conn.cursor()
sql = """
INSERT INTO vc_sft_results (video_id, video_path, duration, info)
VALUES (%s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
video_path=VALUES(video_path),
duration=VALUES(duration),
info=VALUES(info)
"""
cursor.execute(sql, (
result["video_id"],
result["video_path"],
result.get("duration", None),
json.dumps(result.get("info")) # 转成 JSON 字符串存入数据库
))
conn.commit()
cursor.close()
conn.close()
下一章更新VideoDecoder解码模块