环境搭建
pip install selenium
pip install BeautifulSoup
pip install ffmpy3
getMovePage.py
python
from selenium import webdriver
from selenium.webdriver.edge.options import Options
from selenium.webdriver.edge.service import Service
edge_path = r"C:\Users\11313\.cache\selenium\msedgedriver\win64\128.0.2739.67\msedgedriver.exe"
class WebBrowser:
def __init__(self):
# 无可视化界面
edge_options = Options()
edge_options.add_argument('headless')
#禁用GPU
edge_options.add_argument('disable-gpu')
edge_options.add_argument('disable-blink-features=AutomationsControlled')
# edge_options.add_experimential_option('excludeSwitchs', ['enable-automation'])
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0"
edge_options.add_argument(f'user_agent={user_agent}')
edge_options.add_argument(f'lang=zh_CN.UTF-8')
edge_options.add_argument(f'UseWebView2=True')
#edge_options.add_experimental_option("debuggerAddress", "localhost:64218")
edge_options.add_argument(f'enable-automation')
service = Service(executable_path=edge_path, port=53478, verbose = True)
self.browser = webdriver.Edge(service=service, options=edge_options)
def exit(self):
# 清理,关闭浏览器
self.browser.quit()
def getContent(self, url):
# 打开目标网页
# 等待元素加载完成
self.browser.get(url)
# 获取页面内容
content = self.browser.page_source
return content
# url="https://www.kankanwu.com/Domestic/sifangguan/player-0-0.html"
# browser = WebBrowser()
# # 处理页面内容
# print(browser.getContent(url))
# browser.exit()
注: 如果出现python msedgedriver Can not connect to the Service,请手动启动edge_path,将port=53478替换为启动后端口号,再重新启动程序
getM3u8.py
python
import requests
from bs4 import BeautifulSoup
from bs4 import Comment
import getMovePage
# 地址解析
def parselKanKanWuUrl(searchText):
# search_Obj=search_html.find_all("iframe")
# print(search_Obj)
startIndex = searchText.find("https://v8.fentvoss.com")
if(-1 == startIndex or None == startIndex):
return []
endIndex = searchText.find("index.m3u8", startIndex)
if(-1 == endIndex or None == endIndex):
return []
m3u8Url = searchText[startIndex : endIndex + len("index.m3u8")]
return [m3u8Url]
def getM3u8List(url):
browser = getMovePage.WebBrowser()
# 处理页面内容
content = browser.getContent(url)
# search_html=BeautifulSoup(content,"html")
# print(type(search_html))
# tags = search_html.getText()
# print(tags)
# comments = [tag.string for tag in tags if isinstance(tag.string, Comment)]
browser.exit()
# with open("./context.txt", 'w') as file:
# file.write('\n'.join(content))
# print(search_html.contents)
return parselKanKanWuUrl(content)
# print(getM3u8List("https://www.kankanwu.com/Domestic/sifangguan/player-0-0.html"))
downM3u8.py
python
from ffmpy3 import FFmpeg
def downM3u8(m3u8list):
FFmpegDir = r"E:\program\ffmpeg\bin\ffmpeg.exe"
for i,url in enumerate(m3u8list):
FFmpeg(FFmpegDir, inputs={url: None}, \
outputs={'第{}集.mp4'.format(i):None}).run()
注: FFmpegDir 需替换为环境搭建时安装位置
saveVideo.py
python
import getM3u8
import downM3u8
url = "https://www.kankanwu.com/Domestic/sifangguan/player-0-0.html"
m3u8 = getM3u8.getM3u8List(url)
downM3u8.downM3u8(m3u8)
注:集数切换只需改后缀,如player-0-2.html表示第二集