python搞资源来

python 复制代码
from datetime import datetime
import requests
from pyquery import PyQuery as pq
import os
import json
from json.decoder import JSONDecodeError

headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.40'
}

host = 'https://www.dy2018.com/'

def load_existing_data():
    try:
        with open('movies.json', 'r', encoding='utf-8') as f:
            return json.load(f)
    except FileNotFoundError:
        return []
    except JSONDecodeError:
        print("Warning: Could not decode JSON in 'movies.json', starting with an empty list.")
        return []

def savejson(url: str = ''):
    if not url:
        print('URL 不能为空')
        return

    existing_movies = load_existing_data()
    content = requests.get(host + url, headers=headers)
    content.encoding = 'gb2312'  # 如果需要,可以手动设置
    magnet = pq(content.text).find("#downlist table tbody tr td a").attr('href')
    title = pq(content.text).find('title').text()

    if magnet:
        movie_data = {'title': title, 'link': magnet}
        if movie_data not in existing_movies:
            print(magnet, title)
            existing_movies.append(movie_data)

    with open('movies.json', 'w', encoding='utf-8') as f:
        json.dump(existing_movies, f, ensure_ascii=False, indent=4)

def getUrl(page: int = 1):
    url = f'{host}html/gndy/dyzz/index.html' if page < 2 else f'{host}html/gndy/dyzz/index_{page}.html'
    try:
        content = requests.get(url, headers=headers)
        co_content8 = pq(content.text).find('.co_content8 ul table')
        for i in co_content8.items():
            href = i.find('a').attr('href')
            if href:
                savejson(href)
    except Exception as e:
        print('只能是数字哦')

downloaded_pages = set()  # 存储已下载过的页码
next_page = 1  # 下一个默认页码

while True:
    # 显示已下载过的页数
    downloaded_str = ", ".join(map(str, sorted(downloaded_pages)))
    prompt_msg = f"请输入页码(已下载:[{downloaded_str}],直接回车默认为{next_page},输入q退出):"
    user_input = input(prompt_msg)

    if user_input == 'q':
        print("退出程序")
        break
    elif user_input == '':
        if next_page in downloaded_pages:
            print(f"页码 {next_page} 已经下载过。请输入新的页码或者下一页。")
            continue
        getUrl(next_page)
        downloaded_pages.add(next_page)
        next_page += 1  # 自动增加下一个默认页码
    else:
        try:
            page_number = int(user_input)
            if page_number in downloaded_pages:
                print(f"页码 {page_number} 已经下载过。请输入新的页码或者下一页。")
                continue
            getUrl(page_number)
            downloaded_pages.add(page_number)
            next_page = page_number + 1  # 自动增加下一个默认页码
        except ValueError:
            print("只能输入数字或 'q' 退出")
相关推荐
拾光师1 分钟前
Java AIO 详解:异步非阻塞 IO 的实现与实践
后端
伊灵eLing11 分钟前
GoLang 语言基础
开发语言·后端·golang
techdashen16 分钟前
What is maintenance, anyway?
开发语言·后端·rust
用户298698530141 小时前
Java 实战:Word 文档中超链接的添加与自定义技巧
java·后端
铁皮饭盒1 小时前
用bunjs代码讲解XSS/CSRF/SQL注入/DDos等10种前后端安全防护
前端·后端
不考研当牛马1 小时前
Django 框架 深度学习 第二课程
后端·python·django
我登哥MVP1 小时前
SpringCloud 核心组件解析:服务注册与发现
java·spring boot·后端·spring·spring cloud·java-ee·maven
uhakadotcom1 小时前
什么是Mass Assignment(批量赋值)风险
后端·面试·github
XovH1 小时前
Redis 从入门到精通:Python 操作 Redis 进阶
后端