python获取快手账号列表数据

快手数据获取相对简单访问地址固定且不需要登录token

列表地址获取的固定接口

https://www.kuaishou.com/graphql

发送post请求注意每个快手账号对应的id

python 复制代码
import time
from datetime import datetime
import logging
import json
import pymysql
import requests

# 创建一个logger
logger = logging.getLogger('my_logger')
logger.setLevel(logging.DEBUG)  # 设置日志级别

# 创建一个handler,用于写入日志文件
fh = logging.FileHandler('ks.log', encoding='utf-8')  # 日志文件名

# 定义handler的输出格式
formatter = logging.Formatter(
    '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
fh.setFormatter(formatter)

# 给logger添加handler
logger.addHandler(fh)

#!!!数据库链接需要调整
mydatabase = pymysql.connect(host='localhost',
                             user='root',
                             password='123456',
                             database='ry',
                             charset='utf8mb4')
cursor = mydatabase.cursor()

cursor.execute(
    'SELECT id,base_media_name,dy_url FROM `media_account_manager2` where `type_id` = 484 AND `status` = 3 and dy_url is not null'
)
result = cursor.fetchall()
headers = {
    'Cookie': 'kpf=PC_WEB; clientid=3; did=web_8239e5591749f85a281700fcf0834715; didv=1719032992223; kpn=KUAISHOU_VISION',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
}
now = datetime.now()
now_formatted_date = now.strftime("%Y-%m-%d")

try:
    for row in result:
        m_id = row[0]
        ks_name = row[1]
        ks_url = row[2]
        time.sleep(5)
        print(ks_name)
        print('***************************************')
        urlKs = 'https://www.kuaishou.com/graphql'
        jsonObj = {
            "operationName":"visionProfilePhotoList",
            "variables":{
                "userId":ks_url,
                "pcursor":"",
                "page":"profile"
            },
            "query":"fragment photoContent on PhotoEntity {\n  __typename\n  id\n  duration\n  caption\n  originCaption\n  likeCount\n  viewCount\n  commentCount\n  realLikeCount\n  coverUrl\n  photoUrl\n  photoH265Url\n  manifest\n  manifestH265\n  videoResource\n  coverUrls {\n    url\n    __typename\n  }\n  timestamp\n  expTag\n  animatedCoverUrl\n  distance\n  videoRatio\n  liked\n  stereoType\n  profileUserTopPhoto\n  musicBlocked\n}\n\nfragment recoPhotoFragment on recoPhotoEntity {\n  __typename\n  id\n  duration\n  caption\n  originCaption\n  likeCount\n  viewCount\n  commentCount\n  realLikeCount\n  coverUrl\n  photoUrl\n  photoH265Url\n  manifest\n  manifestH265\n  videoResource\n  coverUrls {\n    url\n    __typename\n  }\n  timestamp\n  expTag\n  animatedCoverUrl\n  distance\n  videoRatio\n  liked\n  stereoType\n  profileUserTopPhoto\n  musicBlocked\n}\n\nfragment feedContent on Feed {\n  type\n  author {\n    id\n    name\n    headerUrl\n    following\n    headerUrls {\n      url\n      __typename\n    }\n    __typename\n  }\n  photo {\n    ...photoContent\n    ...recoPhotoFragment\n    __typename\n  }\n  canAddComment\n  llsid\n  status\n  currentPcursor\n  tags {\n    type\n    name\n    __typename\n  }\n  __typename\n}\n\nquery visionProfilePhotoList($pcursor: String, $userId: String, $page: String, $webPageArea: String) {\n  visionProfilePhotoList(pcursor: $pcursor, userId: $userId, page: $page, webPageArea: $webPageArea) {\n    result\n    llsid\n    webPageArea\n    feeds {\n      ...feedContent\n      __typename\n    }\n    hostName\n    pcursor\n    __typename\n  }\n}\n"
        }
        response = requests.post(url=urlKs, json=jsonObj, headers=headers)
        if response.status_code == 200:
            response_data2 = response.json()
            data = response_data2.get('data',{}).get('visionProfilePhotoList',{}).get('feeds',[])
            print(data)
            for item in data:
                itemName = item.get('photo',{}).get('caption','null')
                photoUrl = item.get('photo',{}).get('photoUrl','null')
                timestamp = item.get('photo',{}).get('timestamp','null')
                timestamp_s = timestamp / 1000
                dt_object = datetime.fromtimestamp(timestamp_s)
                # dt_object = datetime.fromtimestamp(timestamp)
                # formatted_date = dt_object.strftime('%Y-%m-%d')
                create_time_str = dt_object.strftime("%Y-%m-%d %H:%M:%S")
                print(itemName)#视频名称
                print(photoUrl)#视频地址
                print(create_time_str)#视频发布时间
                text = ''
                insert_query = "INSERT INTO `ry`.`media_content`(`title`, `pub_date`, `url`, `content`, `media_id`, `media_name`,`type_id`,`platform`) VALUES (%s,%s,%s,%s,%s,%s,%s,%s)"
                # 执行插入操作
                cursor.execute(
                    insert_query,
                    (itemName, create_time_str, photoUrl, text, m_id, ks_name, '483', '快手自动抓取'))
                # 提交事务
                mydatabase.commit()
     
except Exception as e:
    logger.info('******快手获取发生错误********')
    logger.info(e)
    logger.info(item)
    logger.info('******快手账号:' + ks_name + ',数据获取异常******')
else:
    logger.info('******快手数据结束********')
finally:
    mydatabase.close()
相关推荐
杨荧1 分钟前
【JAVA毕业设计】基于Vue和SpringBoot的宠物咖啡馆平台
java·开发语言·jvm·vue.js·spring boot·spring cloud·开源
monkey_meng14 分钟前
【Rust中的项目管理】
开发语言·rust·源代码管理
喜欢打篮球的普通人16 分钟前
rust高级特征
开发语言·后端·rust
weixin_4786897620 分钟前
【回溯法】——组合总数
数据结构·python·算法
天天要nx23 分钟前
D68【python 接口自动化学习】- python基础之数据库
数据库·python
山山而川 潺潺如镜27 分钟前
杰控通过 OPCproxy 获取数据发送到服务器
python
ModelBulider33 分钟前
十三、注解配置SpringMVC
java·开发语言·数据库·sql·mysql
V搜xhliang024642 分钟前
基于深度学习的地物类型的提取
开发语言·人工智能·python·深度学习·神经网络·学习·conda
DK七七44 分钟前
多端校园圈子论坛小程序,多个学校同时代理,校园小程序分展示后台管理源码
开发语言·前端·微信小程序·小程序·php
苹果酱05671 小时前
C语言 char 字符串 - C语言零基础入门教程
java·开发语言·spring boot·mysql·中间件