py 多线程 m3u8 转mp4 过滤广告,结合ffmpeg使用

python代码:

py 复制代码
import csv
import os
import subprocess
from concurrent.futures import ThreadPoolExecutor, as_completed
from urllib.parse import urljoin
import sys
import requests

def resource_path(relative_path):
    """获取资源文件的绝对路径,兼容PyInstaller打包后的环境"""
    try:
        base_path = sys._MEIPASS
    except AttributeError:
        base_path = os.path.abspath(".")
    return os.path.join(base_path, relative_path)

def process_csv(csv_file, output_dir, begin_num, max_workers=8):
    csv_file = resource_path(csv_file)
    with open(csv_file, 'r', encoding='utf-8') as file:
        reader = csv.reader(file)
        next(reader)  # 跳过表头

        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            futures = []
            try:
                for index, row in enumerate(reader, start=1):
                    if index < begin_num:
                        continue

                    print(f"Processing row {index}...")
                    title, _, m3u8_url = row
                    base_name = f"{index}.m3u8"
                    cleaned_m3u8_file_name = f"{index}_cleaned.m3u8"

                    # 提交任务到线程池
                    future = executor.submit(download_and_process_m3u8, m3u8_url, output_dir, base_name,
                                             cleaned_m3u8_file_name, title, index)
                    futures.append(future)

                # 等待所有线程完成
                for future in as_completed(futures):
                    try:
                        future.result()
                    except Exception as e:
                        print(f"Task generated an exception: {e}")
            except KeyboardInterrupt:
                print("用户中断,正在取消所有任务...")
                for future in futures:
                    future.cancel()
                executor.shutdown(wait=False)
                sys.exit(1)

def download_and_process_m3u8(m3u8_url, output_dir, m3u8_file_name, cleaned_m3u8_file_name, title, index):
    m3u8_content = requests.get(m3u8_url).text
    print(f"Downloaded m3u8 file from {m3u8_url}, content: {m3u8_content}")
    m3u8_file_path = os.path.join(output_dir, m3u8_file_name)

    with open(m3u8_file_path, 'w') as m3u8_file:
        m3u8_file.write(m3u8_content)

    print(f"m3u8 file downloaded and saved as {m3u8_file_path}")

    tsM3u8 = os.path.join(output_dir, f"{index}_ts.m3u8")
    download_ts_files(m3u8_file_path, m3u8_url, tsM3u8)

    cleaned_m3u8_file_path = os.path.join(output_dir, cleaned_m3u8_file_name)
    cleaned_m3u8_path = filter_advertisement(m3u8_url, tsM3u8, cleaned_m3u8_file_path)

    convert_to_mp4(cleaned_m3u8_path, "video_output", title, index)

def download_ts_files(m3u8_file_path, m3u8_url, ts_filename):
    with open(m3u8_file_path, 'r') as m3u8_file:
        lines = m3u8_file.readlines()

    for line in lines:
        if line.startswith('#'):
            continue
        ts_url = urljoin(m3u8_url, line.strip())

        with requests.get(ts_url, stream=True) as r:
            if r.status_code == 200:
                with open(ts_filename, 'wb') as ts_file:
                    for chunk in r.iter_content(chunk_size=8192):
                        ts_file.write(chunk)
                print(f"Downloaded {ts_filename}")
            else:
                print(f"Failed to download {ts_url}, status code: {r.status_code}")

    print("All .ts files have been downloaded.")

def filter_advertisement(base_url, m3u8_file_path, cleaned_m3u8_file_path):
    with open(m3u8_file_path, 'r') as m3u8_file:
        lines = m3u8_file.readlines()

    cleaned_lines = []
    skip = False
    ad_removed = False

    for i, line in enumerate(lines):
        if line.startswith("#EXT-X-KEY") and not ad_removed:
            skip = True
            ad_removed = True
            print(f"Removing key and associated segments starting with: {line.strip()}")
            continue

        if skip and line.startswith("#EXTINF"):
            continue

        if skip and (line.startswith("http") or line.startswith("/")):
            continue

        if line.strip().startswith("https"):
            last_element = cleaned_lines[-1]
            if last_element.startswith("#EXTINF"):
                cleaned_lines.pop()
            continue

        if skip and line.startswith("#EXT-X-DISCONTINUITY"):
            skip = False

        if not skip:
            cleaned_lines.append(line)

    new_lines = []
    for line in cleaned_lines:
        if line.startswith('#EXT-X-KEY'):
            uri_part = line.split('URI="')[1].split('"')[0]
            if not uri_part.startswith('http'):
                full_uri = urljoin(base_url, uri_part)
                line = line.replace(uri_part, full_uri)
            new_lines.append(line)
        elif line.startswith('/') and not line.startswith('http'):
            line = urljoin(base_url, line.strip())
            new_lines.append(line + "\n")
        else:
            new_lines.append(line)

    with open(cleaned_m3u8_file_path, 'w') as cleaned_m3u8_file:
        cleaned_m3u8_file.writelines(new_lines)

    print(f"Filtered m3u8 file saved as {cleaned_m3u8_file_path}")
    return cleaned_m3u8_file_path

def convert_to_mp4(m3u8_file_path, output_dir, title, index):
    output_mp4 = os.path.join(output_dir, f"{index}_{title}.mp4")
    ffmpeg_command = [
        "ffmpeg",
        "-protocol_whitelist", "file,http,https,tcp,tls,crypto",
        "-i", m3u8_file_path,
        "-c", "copy",
        "-bsf:a", "aac_adtstoasc",
        f"{output_mp4}"
    ]

    print(f"命令行:{' '.join(ffmpeg_command)}")

    try:
        subprocess.run(ffmpeg_command, check=True)
        print(f"Successfully created {output_mp4}")
    except subprocess.CalledProcessError as e:
        print(f"Failed to create MP4: {e}")

if __name__ == "__main__":
    csv_file = "ai_video.csv"
    output_dir = "ts_files"
    video_outputh = "video_output"
    max_workers = 4

    if not os.path.exists(video_outputh):
        os.makedirs(video_outputh)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    files = [f for f in os.listdir(video_outputh) if
             os.path.isfile(os.path.join(video_outputh, f)) and not f.startswith('.')]

    files_sorted = sorted(files, key=lambda x: int(x.split('_')[0]))

    print("files:", files_sorted)
    begin_num = 1
    if files_sorted:
        last_file = files_sorted[-1]
        num = int(last_file.split('_')[0])
        if num > 0 and num - max_workers > 0:
            begin_num = num - max_workers + 1

        print("最后一个文件名是:", last_file, begin_num)
    else:
        print("目录中没有符合条件的文件。")

    try:
        process_csv(csv_file, output_dir, begin_num, max_workers=max_workers)
    except KeyboardInterrupt:
        print("程序被用户中断。")

写入 pip freeze > requirements.txt

打包使用github action

mkdir -p .github/workflows

yaml 复制代码
name: Build Windows Executable

on: [push]

jobs:
  build:
    runs-on: windows-latest

    steps:
      - name: Checkout code
        uses: actions/checkout@v2

      - name: Set up Python
        uses: actions/setup-python@v2
        with:
          python-version: '3.12.4'  # 使用适合你的 Python 版本

      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
          pip install pyinstaller requests  # 安装 pyinstaller 和 requests
          pip install -r requirements.txt  # 如果你有 requirements.txt 文件

      - name: Build executable
        run: pyinstaller --onefile --add-data "ai_video.csv;." test_ts.py

      - name: Upload artifact
        uses: actions/upload-artifact@v2
        with:
          name: Windows Executable
          path: dist/test_ts.exe

ffmpeg 常用命令

sh 复制代码
ffmpeg -i demo.mp4 -ss 1 -f image2 -vframes 1 out.jpg  // 原视频截图
ffmpeg -hide_banner -i demo.mp4 -i logo.png -filter_complex "overlay=x=xxx:y=xxx" with_watermark.mp4 -y // 原视频添加水印
ffmpeg -i with_watermark.mp4 -ss 1 -f image2 -vframes 1 with_watermark.jpg  // 对添加水印视频截图
ffmpeg -i with_watermark.mp4 -vf "delogo=x=xxx:y=xxx:w=xxx:h=xxx:show=0" -c:a copy no_watermark.mp4 // 给添加水印的视频,去除水印
ffmpeg -protocol_whitelist "file,http,https,tcp,tls,crypto" -i ./ts_files/cleaned_index.m3u8 -c copy -bsf:a aac_adtstoasc output.mp4 转成视频
ffmpeg -i "https://xxx.com/20240814/Zf8gOK3i/index.m3u8" -c copy output.ts 转视频

ps网页榜:https://www.nuanque.com/ps/

ffmpeg -i demo.mp4 -ss 16 -f image2 -vframes 1 out.jpg  // 原视频截图
ffmpeg -i demo.mp4 -vf "delogo=x=892:y=589:w=385:h=113:show=0" -c:a copy no_watermark.mp4 // 给添加水印的视频,去除水印
ffmpeg -i demo.mp4 -vf "delogo=x=892:y=589:w=385:h=113:show=0, delogo=x=100:y=100:w=150:h=50:show=0" -c:a copy no_watermark.mp4
相关推荐
全能全知者34 分钟前
不废话简单易懂的Selenium 页面操作与切换
python·selenium·测试工具·网络爬虫
你可以自己看2 小时前
python的基础语法
开发语言·python
akhfuiigabv4 小时前
使用Neo4j-Cypher-FT实现自然语言查询图数据库
数据库·python·oracle·neo4j
繁依Fanyi4 小时前
828华为云征文|华为Flexus云服务器搭建OnlyOffice私有化在线办公套件
服务器·开发语言·前端·python·算法·华为·华为云
zhangfeng11334 小时前
在 PyTorch 中,除了 pad_sequence 还有哪些其他处理序列数据的函数?时间序列数据 预处理
人工智能·pytorch·python·深度学习
python1564 小时前
Python Numpy布尔数组在数据分析中的应用
python·数据分析·numpy
AIAdvocate5 小时前
力扣-96.不同的二叉搜索树 题目详解
python·算法·动态规划
luthane5 小时前
python 实现entropy熵算法
python·算法·概率论
akhfuiigabv5 小时前
探索Timescale Vector与Postgres数据库的融合:AI应用的新选择
数据库·人工智能·python
hakesashou5 小时前
ruby和python哪个好学
开发语言·python·ruby