python获取快手账号列表数据

快手数据获取相对简单访问地址固定且不需要登录token

列表地址获取的固定接口

https://www.kuaishou.com/graphql

发送post请求注意每个快手账号对应的id

python 复制代码
import time
from datetime import datetime
import logging
import json
import pymysql
import requests

# 创建一个logger
logger = logging.getLogger('my_logger')
logger.setLevel(logging.DEBUG)  # 设置日志级别

# 创建一个handler,用于写入日志文件
fh = logging.FileHandler('ks.log', encoding='utf-8')  # 日志文件名

# 定义handler的输出格式
formatter = logging.Formatter(
    '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
fh.setFormatter(formatter)

# 给logger添加handler
logger.addHandler(fh)

#!!!数据库链接需要调整
mydatabase = pymysql.connect(host='localhost',
                             user='root',
                             password='123456',
                             database='ry',
                             charset='utf8mb4')
cursor = mydatabase.cursor()

cursor.execute(
    'SELECT id,base_media_name,dy_url FROM `media_account_manager2` where `type_id` = 484 AND `status` = 3 and dy_url is not null'
)
result = cursor.fetchall()
headers = {
    'Cookie': 'kpf=PC_WEB; clientid=3; did=web_8239e5591749f85a281700fcf0834715; didv=1719032992223; kpn=KUAISHOU_VISION',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
}
now = datetime.now()
now_formatted_date = now.strftime("%Y-%m-%d")

try:
    for row in result:
        m_id = row[0]
        ks_name = row[1]
        ks_url = row[2]
        time.sleep(5)
        print(ks_name)
        print('***************************************')
        urlKs = 'https://www.kuaishou.com/graphql'
        jsonObj = {
            "operationName":"visionProfilePhotoList",
            "variables":{
                "userId":ks_url,
                "pcursor":"",
                "page":"profile"
            },
            "query":"fragment photoContent on PhotoEntity {\n  __typename\n  id\n  duration\n  caption\n  originCaption\n  likeCount\n  viewCount\n  commentCount\n  realLikeCount\n  coverUrl\n  photoUrl\n  photoH265Url\n  manifest\n  manifestH265\n  videoResource\n  coverUrls {\n    url\n    __typename\n  }\n  timestamp\n  expTag\n  animatedCoverUrl\n  distance\n  videoRatio\n  liked\n  stereoType\n  profileUserTopPhoto\n  musicBlocked\n}\n\nfragment recoPhotoFragment on recoPhotoEntity {\n  __typename\n  id\n  duration\n  caption\n  originCaption\n  likeCount\n  viewCount\n  commentCount\n  realLikeCount\n  coverUrl\n  photoUrl\n  photoH265Url\n  manifest\n  manifestH265\n  videoResource\n  coverUrls {\n    url\n    __typename\n  }\n  timestamp\n  expTag\n  animatedCoverUrl\n  distance\n  videoRatio\n  liked\n  stereoType\n  profileUserTopPhoto\n  musicBlocked\n}\n\nfragment feedContent on Feed {\n  type\n  author {\n    id\n    name\n    headerUrl\n    following\n    headerUrls {\n      url\n      __typename\n    }\n    __typename\n  }\n  photo {\n    ...photoContent\n    ...recoPhotoFragment\n    __typename\n  }\n  canAddComment\n  llsid\n  status\n  currentPcursor\n  tags {\n    type\n    name\n    __typename\n  }\n  __typename\n}\n\nquery visionProfilePhotoList($pcursor: String, $userId: String, $page: String, $webPageArea: String) {\n  visionProfilePhotoList(pcursor: $pcursor, userId: $userId, page: $page, webPageArea: $webPageArea) {\n    result\n    llsid\n    webPageArea\n    feeds {\n      ...feedContent\n      __typename\n    }\n    hostName\n    pcursor\n    __typename\n  }\n}\n"
        }
        response = requests.post(url=urlKs, json=jsonObj, headers=headers)
        if response.status_code == 200:
            response_data2 = response.json()
            data = response_data2.get('data',{}).get('visionProfilePhotoList',{}).get('feeds',[])
            print(data)
            for item in data:
                itemName = item.get('photo',{}).get('caption','null')
                photoUrl = item.get('photo',{}).get('photoUrl','null')
                timestamp = item.get('photo',{}).get('timestamp','null')
                timestamp_s = timestamp / 1000
                dt_object = datetime.fromtimestamp(timestamp_s)
                # dt_object = datetime.fromtimestamp(timestamp)
                # formatted_date = dt_object.strftime('%Y-%m-%d')
                create_time_str = dt_object.strftime("%Y-%m-%d %H:%M:%S")
                print(itemName)#视频名称
                print(photoUrl)#视频地址
                print(create_time_str)#视频发布时间
                text = ''
                insert_query = "INSERT INTO `ry`.`media_content`(`title`, `pub_date`, `url`, `content`, `media_id`, `media_name`,`type_id`,`platform`) VALUES (%s,%s,%s,%s,%s,%s,%s,%s)"
                # 执行插入操作
                cursor.execute(
                    insert_query,
                    (itemName, create_time_str, photoUrl, text, m_id, ks_name, '483', '快手自动抓取'))
                # 提交事务
                mydatabase.commit()
     
except Exception as e:
    logger.info('******快手获取发生错误********')
    logger.info(e)
    logger.info(item)
    logger.info('******快手账号:' + ks_name + ',数据获取异常******')
else:
    logger.info('******快手数据结束********')
finally:
    mydatabase.close()
相关推荐
bst@微胖子11 分钟前
Python高级语法之selenium
开发语言·python·selenium
王小义笔记16 分钟前
Postman如何流畅使用DeepSeek
开发语言·测试工具·lua·postman·deepseek
查理零世1 小时前
【蓝桥杯集训·每日一题2025】 AcWing 6118. 蛋糕游戏 python
python·算法·蓝桥杯
魔尔助理顾问2 小时前
一个简洁高效的Flask用户管理示例
后端·python·flask
java1234_小锋2 小时前
一周学会Flask3 Python Web开发-request请求对象与url传参
开发语言·python·flask·flask3
流星白龙5 小时前
【C++】36.C++IO流
开发语言·c++
诚信爱国敬业友善6 小时前
常见排序方法的总结归类
开发语言·python·算法
nbsaas-boot7 小时前
Go 自动升级依赖版本
开发语言·后端·golang
架构默片7 小时前
【JAVA工程师从0开始学AI】,第五步:Python类的“七十二变“——当Java的铠甲遇见Python的液态金属
java·开发语言·python
不只会拍照的程序猿7 小时前
从插入排序到希尔排序
java·开发语言·数据结构·算法·排序算法