python
from datetime import datetime
import requests
from pyquery import PyQuery as pq
import os
import json
from json.decoder import JSONDecodeError
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.40'
}
host = 'https://www.dy2018.com/'
def load_existing_data():
try:
with open('movies.json', 'r', encoding='utf-8') as f:
return json.load(f)
except FileNotFoundError:
return []
except JSONDecodeError:
print("Warning: Could not decode JSON in 'movies.json', starting with an empty list.")
return []
def savejson(url: str = ''):
if not url:
print('URL 不能为空')
return
existing_movies = load_existing_data()
content = requests.get(host + url, headers=headers)
content.encoding = 'gb2312' # 如果需要,可以手动设置
magnet = pq(content.text).find("#downlist table tbody tr td a").attr('href')
title = pq(content.text).find('title').text()
if magnet:
movie_data = {'title': title, 'link': magnet}
if movie_data not in existing_movies:
print(magnet, title)
existing_movies.append(movie_data)
with open('movies.json', 'w', encoding='utf-8') as f:
json.dump(existing_movies, f, ensure_ascii=False, indent=4)
def getUrl(page: int = 1):
url = f'{host}html/gndy/dyzz/index.html' if page < 2 else f'{host}html/gndy/dyzz/index_{page}.html'
try:
content = requests.get(url, headers=headers)
co_content8 = pq(content.text).find('.co_content8 ul table')
for i in co_content8.items():
href = i.find('a').attr('href')
if href:
savejson(href)
except Exception as e:
print('只能是数字哦')
downloaded_pages = set() # 存储已下载过的页码
next_page = 1 # 下一个默认页码
while True:
# 显示已下载过的页数
downloaded_str = ", ".join(map(str, sorted(downloaded_pages)))
prompt_msg = f"请输入页码(已下载:[{downloaded_str}],直接回车默认为{next_page},输入q退出):"
user_input = input(prompt_msg)
if user_input == 'q':
print("退出程序")
break
elif user_input == '':
if next_page in downloaded_pages:
print(f"页码 {next_page} 已经下载过。请输入新的页码或者下一页。")
continue
getUrl(next_page)
downloaded_pages.add(next_page)
next_page += 1 # 自动增加下一个默认页码
else:
try:
page_number = int(user_input)
if page_number in downloaded_pages:
print(f"页码 {page_number} 已经下载过。请输入新的页码或者下一页。")
continue
getUrl(page_number)
downloaded_pages.add(page_number)
next_page = page_number + 1 # 自动增加下一个默认页码
except ValueError:
print("只能输入数字或 'q' 退出")