python获取快手账号列表数据

快手数据获取相对简单访问地址固定且不需要登录token

列表地址获取的固定接口

https://www.kuaishou.com/graphql

发送post请求注意每个快手账号对应的id

python 复制代码
import time
from datetime import datetime
import logging
import json
import pymysql
import requests

# 创建一个logger
logger = logging.getLogger('my_logger')
logger.setLevel(logging.DEBUG)  # 设置日志级别

# 创建一个handler,用于写入日志文件
fh = logging.FileHandler('ks.log', encoding='utf-8')  # 日志文件名

# 定义handler的输出格式
formatter = logging.Formatter(
    '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
fh.setFormatter(formatter)

# 给logger添加handler
logger.addHandler(fh)

#!!!数据库链接需要调整
mydatabase = pymysql.connect(host='localhost',
                             user='root',
                             password='123456',
                             database='ry',
                             charset='utf8mb4')
cursor = mydatabase.cursor()

cursor.execute(
    'SELECT id,base_media_name,dy_url FROM `media_account_manager2` where `type_id` = 484 AND `status` = 3 and dy_url is not null'
)
result = cursor.fetchall()
headers = {
    'Cookie': 'kpf=PC_WEB; clientid=3; did=web_8239e5591749f85a281700fcf0834715; didv=1719032992223; kpn=KUAISHOU_VISION',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
}
now = datetime.now()
now_formatted_date = now.strftime("%Y-%m-%d")

try:
    for row in result:
        m_id = row[0]
        ks_name = row[1]
        ks_url = row[2]
        time.sleep(5)
        print(ks_name)
        print('***************************************')
        urlKs = 'https://www.kuaishou.com/graphql'
        jsonObj = {
            "operationName":"visionProfilePhotoList",
            "variables":{
                "userId":ks_url,
                "pcursor":"",
                "page":"profile"
            },
            "query":"fragment photoContent on PhotoEntity {\n  __typename\n  id\n  duration\n  caption\n  originCaption\n  likeCount\n  viewCount\n  commentCount\n  realLikeCount\n  coverUrl\n  photoUrl\n  photoH265Url\n  manifest\n  manifestH265\n  videoResource\n  coverUrls {\n    url\n    __typename\n  }\n  timestamp\n  expTag\n  animatedCoverUrl\n  distance\n  videoRatio\n  liked\n  stereoType\n  profileUserTopPhoto\n  musicBlocked\n}\n\nfragment recoPhotoFragment on recoPhotoEntity {\n  __typename\n  id\n  duration\n  caption\n  originCaption\n  likeCount\n  viewCount\n  commentCount\n  realLikeCount\n  coverUrl\n  photoUrl\n  photoH265Url\n  manifest\n  manifestH265\n  videoResource\n  coverUrls {\n    url\n    __typename\n  }\n  timestamp\n  expTag\n  animatedCoverUrl\n  distance\n  videoRatio\n  liked\n  stereoType\n  profileUserTopPhoto\n  musicBlocked\n}\n\nfragment feedContent on Feed {\n  type\n  author {\n    id\n    name\n    headerUrl\n    following\n    headerUrls {\n      url\n      __typename\n    }\n    __typename\n  }\n  photo {\n    ...photoContent\n    ...recoPhotoFragment\n    __typename\n  }\n  canAddComment\n  llsid\n  status\n  currentPcursor\n  tags {\n    type\n    name\n    __typename\n  }\n  __typename\n}\n\nquery visionProfilePhotoList($pcursor: String, $userId: String, $page: String, $webPageArea: String) {\n  visionProfilePhotoList(pcursor: $pcursor, userId: $userId, page: $page, webPageArea: $webPageArea) {\n    result\n    llsid\n    webPageArea\n    feeds {\n      ...feedContent\n      __typename\n    }\n    hostName\n    pcursor\n    __typename\n  }\n}\n"
        }
        response = requests.post(url=urlKs, json=jsonObj, headers=headers)
        if response.status_code == 200:
            response_data2 = response.json()
            data = response_data2.get('data',{}).get('visionProfilePhotoList',{}).get('feeds',[])
            print(data)
            for item in data:
                itemName = item.get('photo',{}).get('caption','null')
                photoUrl = item.get('photo',{}).get('photoUrl','null')
                timestamp = item.get('photo',{}).get('timestamp','null')
                timestamp_s = timestamp / 1000
                dt_object = datetime.fromtimestamp(timestamp_s)
                # dt_object = datetime.fromtimestamp(timestamp)
                # formatted_date = dt_object.strftime('%Y-%m-%d')
                create_time_str = dt_object.strftime("%Y-%m-%d %H:%M:%S")
                print(itemName)#视频名称
                print(photoUrl)#视频地址
                print(create_time_str)#视频发布时间
                text = ''
                insert_query = "INSERT INTO `ry`.`media_content`(`title`, `pub_date`, `url`, `content`, `media_id`, `media_name`,`type_id`,`platform`) VALUES (%s,%s,%s,%s,%s,%s,%s,%s)"
                # 执行插入操作
                cursor.execute(
                    insert_query,
                    (itemName, create_time_str, photoUrl, text, m_id, ks_name, '483', '快手自动抓取'))
                # 提交事务
                mydatabase.commit()
     
except Exception as e:
    logger.info('******快手获取发生错误********')
    logger.info(e)
    logger.info(item)
    logger.info('******快手账号:' + ks_name + ',数据获取异常******')
else:
    logger.info('******快手数据结束********')
finally:
    mydatabase.close()
相关推荐
小糖学代码7 分钟前
LLM系列:1.python入门:15.JSON 数据处理与操作
开发语言·python·json·aigc
yejqvow1213 分钟前
CSS如何控制placeholder文字的颜色_使用--placeholder伪元素
jvm·数据库·python
handler0115 分钟前
从源码到二进制:深度拆解 Linux 下 C 程序的编译与链接全流程
linux·c语言·开发语言·c++·笔记·学习
m0_7436239222 分钟前
HTML怎么创建多语言切换器_HTML语言选择下拉结构【指南】
jvm·数据库·python
pele30 分钟前
Angular 表单中基于下拉选择动态启用字段必填校验的完整实现
jvm·数据库·python
HHHHH1010HHHHH31 分钟前
Redis怎样判断节点是否主观下线_哨兵基于down-after-milliseconds参数的心跳超时判定
jvm·数据库·python
小白学大数据1 小时前
现代Python爬虫开发范式:基于Asyncio的高可用架构实战
开发语言·爬虫·python·架构
渔舟小调1 小时前
P19 | 前端加密通信层 pikachuNetwork.js 完整实现
开发语言·前端·javascript
不爱吃炸鸡柳1 小时前
数据结构精讲:树 → 二叉树 → 堆 从入门到实战
开发语言·数据结构
网络安全许木1 小时前
自学渗透测试第21天(基础命令复盘与DVWA熟悉)
开发语言·网络安全·渗透测试·php