python获取快手账号列表数据

快手数据获取相对简单访问地址固定且不需要登录token

列表地址获取的固定接口

https://www.kuaishou.com/graphql

发送post请求注意每个快手账号对应的id

python 复制代码
import time
from datetime import datetime
import logging
import json
import pymysql
import requests

# 创建一个logger
logger = logging.getLogger('my_logger')
logger.setLevel(logging.DEBUG)  # 设置日志级别

# 创建一个handler,用于写入日志文件
fh = logging.FileHandler('ks.log', encoding='utf-8')  # 日志文件名

# 定义handler的输出格式
formatter = logging.Formatter(
    '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
fh.setFormatter(formatter)

# 给logger添加handler
logger.addHandler(fh)

#!!!数据库链接需要调整
mydatabase = pymysql.connect(host='localhost',
                             user='root',
                             password='123456',
                             database='ry',
                             charset='utf8mb4')
cursor = mydatabase.cursor()

cursor.execute(
    'SELECT id,base_media_name,dy_url FROM `media_account_manager2` where `type_id` = 484 AND `status` = 3 and dy_url is not null'
)
result = cursor.fetchall()
headers = {
    'Cookie': 'kpf=PC_WEB; clientid=3; did=web_8239e5591749f85a281700fcf0834715; didv=1719032992223; kpn=KUAISHOU_VISION',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
}
now = datetime.now()
now_formatted_date = now.strftime("%Y-%m-%d")

try:
    for row in result:
        m_id = row[0]
        ks_name = row[1]
        ks_url = row[2]
        time.sleep(5)
        print(ks_name)
        print('***************************************')
        urlKs = 'https://www.kuaishou.com/graphql'
        jsonObj = {
            "operationName":"visionProfilePhotoList",
            "variables":{
                "userId":ks_url,
                "pcursor":"",
                "page":"profile"
            },
            "query":"fragment photoContent on PhotoEntity {\n  __typename\n  id\n  duration\n  caption\n  originCaption\n  likeCount\n  viewCount\n  commentCount\n  realLikeCount\n  coverUrl\n  photoUrl\n  photoH265Url\n  manifest\n  manifestH265\n  videoResource\n  coverUrls {\n    url\n    __typename\n  }\n  timestamp\n  expTag\n  animatedCoverUrl\n  distance\n  videoRatio\n  liked\n  stereoType\n  profileUserTopPhoto\n  musicBlocked\n}\n\nfragment recoPhotoFragment on recoPhotoEntity {\n  __typename\n  id\n  duration\n  caption\n  originCaption\n  likeCount\n  viewCount\n  commentCount\n  realLikeCount\n  coverUrl\n  photoUrl\n  photoH265Url\n  manifest\n  manifestH265\n  videoResource\n  coverUrls {\n    url\n    __typename\n  }\n  timestamp\n  expTag\n  animatedCoverUrl\n  distance\n  videoRatio\n  liked\n  stereoType\n  profileUserTopPhoto\n  musicBlocked\n}\n\nfragment feedContent on Feed {\n  type\n  author {\n    id\n    name\n    headerUrl\n    following\n    headerUrls {\n      url\n      __typename\n    }\n    __typename\n  }\n  photo {\n    ...photoContent\n    ...recoPhotoFragment\n    __typename\n  }\n  canAddComment\n  llsid\n  status\n  currentPcursor\n  tags {\n    type\n    name\n    __typename\n  }\n  __typename\n}\n\nquery visionProfilePhotoList($pcursor: String, $userId: String, $page: String, $webPageArea: String) {\n  visionProfilePhotoList(pcursor: $pcursor, userId: $userId, page: $page, webPageArea: $webPageArea) {\n    result\n    llsid\n    webPageArea\n    feeds {\n      ...feedContent\n      __typename\n    }\n    hostName\n    pcursor\n    __typename\n  }\n}\n"
        }
        response = requests.post(url=urlKs, json=jsonObj, headers=headers)
        if response.status_code == 200:
            response_data2 = response.json()
            data = response_data2.get('data',{}).get('visionProfilePhotoList',{}).get('feeds',[])
            print(data)
            for item in data:
                itemName = item.get('photo',{}).get('caption','null')
                photoUrl = item.get('photo',{}).get('photoUrl','null')
                timestamp = item.get('photo',{}).get('timestamp','null')
                timestamp_s = timestamp / 1000
                dt_object = datetime.fromtimestamp(timestamp_s)
                # dt_object = datetime.fromtimestamp(timestamp)
                # formatted_date = dt_object.strftime('%Y-%m-%d')
                create_time_str = dt_object.strftime("%Y-%m-%d %H:%M:%S")
                print(itemName)#视频名称
                print(photoUrl)#视频地址
                print(create_time_str)#视频发布时间
                text = ''
                insert_query = "INSERT INTO `ry`.`media_content`(`title`, `pub_date`, `url`, `content`, `media_id`, `media_name`,`type_id`,`platform`) VALUES (%s,%s,%s,%s,%s,%s,%s,%s)"
                # 执行插入操作
                cursor.execute(
                    insert_query,
                    (itemName, create_time_str, photoUrl, text, m_id, ks_name, '483', '快手自动抓取'))
                # 提交事务
                mydatabase.commit()
     
except Exception as e:
    logger.info('******快手获取发生错误********')
    logger.info(e)
    logger.info(item)
    logger.info('******快手账号:' + ks_name + ',数据获取异常******')
else:
    logger.info('******快手数据结束********')
finally:
    mydatabase.close()
相关推荐
向宇it6 分钟前
【unity实战】使用Unity实现动作游戏的攻击 连击 轻重攻击和打击感
开发语言·游戏·unity·游戏引擎
Cpdr13 分钟前
pytorch自适应的调整特征图大小
pytorch·python·深度学习
写代码的中青年16 分钟前
Semantic Kernel:微软大模型开发框架——LangChain 替代
人工智能·python·microsoft·langchain·大模型·llm
ZJ_.20 分钟前
Node.js 使用 gRPC:从定义到实现
java·开发语言·javascript·分布式·rpc·架构·node.js
ljh_a134 分钟前
Django 和 Django REST framework 创建对外 API
python·http·django·flask·tornado
syluxhch34 分钟前
Pycharm的终端(Terminal)中切换到当前项目所在的虚拟环境
ide·python·pycharm
yjjpp230135 分钟前
Django REST Framework(四)DRF APIVIEW
后端·python·django
concisedistinct39 分钟前
大数据开发语言 Scala(四):面向对象编程
大数据·开发语言·后端·scala·编程语言·面向对象
铁匠匠匠40 分钟前
django学习入门系列之第三点《BootSrap初了解》
前端·经验分享·笔记·python·学习·django·前端框架