python获取快手账号列表数据

快手数据获取相对简单访问地址固定且不需要登录token

列表地址获取的固定接口

https://www.kuaishou.com/graphql

发送post请求注意每个快手账号对应的id

python 复制代码
import time
from datetime import datetime
import logging
import json
import pymysql
import requests

# 创建一个logger
logger = logging.getLogger('my_logger')
logger.setLevel(logging.DEBUG)  # 设置日志级别

# 创建一个handler,用于写入日志文件
fh = logging.FileHandler('ks.log', encoding='utf-8')  # 日志文件名

# 定义handler的输出格式
formatter = logging.Formatter(
    '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
fh.setFormatter(formatter)

# 给logger添加handler
logger.addHandler(fh)

#!!!数据库链接需要调整
mydatabase = pymysql.connect(host='localhost',
                             user='root',
                             password='123456',
                             database='ry',
                             charset='utf8mb4')
cursor = mydatabase.cursor()

cursor.execute(
    'SELECT id,base_media_name,dy_url FROM `media_account_manager2` where `type_id` = 484 AND `status` = 3 and dy_url is not null'
)
result = cursor.fetchall()
headers = {
    'Cookie': 'kpf=PC_WEB; clientid=3; did=web_8239e5591749f85a281700fcf0834715; didv=1719032992223; kpn=KUAISHOU_VISION',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
}
now = datetime.now()
now_formatted_date = now.strftime("%Y-%m-%d")

try:
    for row in result:
        m_id = row[0]
        ks_name = row[1]
        ks_url = row[2]
        time.sleep(5)
        print(ks_name)
        print('***************************************')
        urlKs = 'https://www.kuaishou.com/graphql'
        jsonObj = {
            "operationName":"visionProfilePhotoList",
            "variables":{
                "userId":ks_url,
                "pcursor":"",
                "page":"profile"
            },
            "query":"fragment photoContent on PhotoEntity {\n  __typename\n  id\n  duration\n  caption\n  originCaption\n  likeCount\n  viewCount\n  commentCount\n  realLikeCount\n  coverUrl\n  photoUrl\n  photoH265Url\n  manifest\n  manifestH265\n  videoResource\n  coverUrls {\n    url\n    __typename\n  }\n  timestamp\n  expTag\n  animatedCoverUrl\n  distance\n  videoRatio\n  liked\n  stereoType\n  profileUserTopPhoto\n  musicBlocked\n}\n\nfragment recoPhotoFragment on recoPhotoEntity {\n  __typename\n  id\n  duration\n  caption\n  originCaption\n  likeCount\n  viewCount\n  commentCount\n  realLikeCount\n  coverUrl\n  photoUrl\n  photoH265Url\n  manifest\n  manifestH265\n  videoResource\n  coverUrls {\n    url\n    __typename\n  }\n  timestamp\n  expTag\n  animatedCoverUrl\n  distance\n  videoRatio\n  liked\n  stereoType\n  profileUserTopPhoto\n  musicBlocked\n}\n\nfragment feedContent on Feed {\n  type\n  author {\n    id\n    name\n    headerUrl\n    following\n    headerUrls {\n      url\n      __typename\n    }\n    __typename\n  }\n  photo {\n    ...photoContent\n    ...recoPhotoFragment\n    __typename\n  }\n  canAddComment\n  llsid\n  status\n  currentPcursor\n  tags {\n    type\n    name\n    __typename\n  }\n  __typename\n}\n\nquery visionProfilePhotoList($pcursor: String, $userId: String, $page: String, $webPageArea: String) {\n  visionProfilePhotoList(pcursor: $pcursor, userId: $userId, page: $page, webPageArea: $webPageArea) {\n    result\n    llsid\n    webPageArea\n    feeds {\n      ...feedContent\n      __typename\n    }\n    hostName\n    pcursor\n    __typename\n  }\n}\n"
        }
        response = requests.post(url=urlKs, json=jsonObj, headers=headers)
        if response.status_code == 200:
            response_data2 = response.json()
            data = response_data2.get('data',{}).get('visionProfilePhotoList',{}).get('feeds',[])
            print(data)
            for item in data:
                itemName = item.get('photo',{}).get('caption','null')
                photoUrl = item.get('photo',{}).get('photoUrl','null')
                timestamp = item.get('photo',{}).get('timestamp','null')
                timestamp_s = timestamp / 1000
                dt_object = datetime.fromtimestamp(timestamp_s)
                # dt_object = datetime.fromtimestamp(timestamp)
                # formatted_date = dt_object.strftime('%Y-%m-%d')
                create_time_str = dt_object.strftime("%Y-%m-%d %H:%M:%S")
                print(itemName)#视频名称
                print(photoUrl)#视频地址
                print(create_time_str)#视频发布时间
                text = ''
                insert_query = "INSERT INTO `ry`.`media_content`(`title`, `pub_date`, `url`, `content`, `media_id`, `media_name`,`type_id`,`platform`) VALUES (%s,%s,%s,%s,%s,%s,%s,%s)"
                # 执行插入操作
                cursor.execute(
                    insert_query,
                    (itemName, create_time_str, photoUrl, text, m_id, ks_name, '483', '快手自动抓取'))
                # 提交事务
                mydatabase.commit()
     
except Exception as e:
    logger.info('******快手获取发生错误********')
    logger.info(e)
    logger.info(item)
    logger.info('******快手账号:' + ks_name + ',数据获取异常******')
else:
    logger.info('******快手数据结束********')
finally:
    mydatabase.close()
相关推荐
曲幽8 小时前
FastAPI + PostgreSQL 实战:从入门到不踩坑,一次讲透
python·sql·postgresql·fastapi·web·postgres·db·asyncpg
用户83562907805113 小时前
使用 C# 在 Excel 中创建数据透视表
后端·python
码路飞16 小时前
FastMCP 实战:一个 .py 文件,给 Claude Code 装上 3 个超实用工具
python·ai编程·mcp
dev派18 小时前
AI Agent 系统中的常用 Workflow 模式(2) Evaluator-Optimizer模式
python·langchain
前端付豪20 小时前
AI 数学辅导老师项目构想和初始化
前端·后端·python
用户03321266636720 小时前
将 PDF 文档转换为图片【Python 教程】
python
悟空爬虫21 小时前
UV实战教程,我啥要从Anaconda切换到uv来管理包?
python
dev派21 小时前
AI Agent 系统中的常用 Workflow 模式(1)
python·langchain
明月_清风1 天前
从“能用”到“专业”:构建生产级装饰器与三层逻辑拆解
后端·python
曲幽1 天前
数据库实战:FastAPI + SQLAlchemy 2.0 + Alembic 从零搭建,踩坑实录
python·fastapi·web·sqlalchemy·db·asyncio·alembic