python爬取网页接口数据,以yearning为例

模拟登陆获取token,传token到对应的接口获取数据,下载到csv里面

python 复制代码
import getpass
import os
import requests
import time
import csv
from datetime import datetime

class Yearning:

    def __init__(self):
        self.session = requests.Session()
        self.host = 'http://yearning.xxxx.com:8000'
        self.token_file = './token.txt'
        if not os.path.exists(self.token_file):
            open(self.token_file, 'w').close()
        file = open(self.token_file, 'r')
        token = file.read()
        file.close()
        self.authorization = ''
        if token:
            self.authorization = f'Bearer {token}'

    def query(self, database, source, sql, print_sql=False):
        if print_sql:
            print(sql)
        uri = f'{self.host}/api/v2/query/results'
        body = {
            'data_base': database,
            'source': source,
            'sql': sql
        }
        try:
            result_data = self.get_result_data(uri, body, False)
            if result_data and 'data' in result_data and result_data['data']:
                return result_data['data']
            return []
        except Exception as e:
            print(f'query exception! database={database}, source={source}, sql={sql}')
            print(e)
            return []


    def login(self, username, password):
        uri = f'{self.host}/ldap'
        headers = {
            'Content-Type': 'application/json; charset=UTF-8',
            'cookie': '',
        }
        body = {
            'username': username,
            'password': password
        }
        try:
            result_data = self.get_result_data(uri, body, False)
            if result_data and 'token' in result_data:
                token = result_data['token']
                self.authorization = f'Bearer {token}'
                file = open(self.token_file, 'w')
                file.truncate()
                file.write(token)
                file.close()
            else:
                print(f'login fail!')
        except Exception as e:
            print(f'login error! e={e}')

    def get_result_data(self, uri, body, isGet):
        headers = {
            'Content-Type': 'application/json; charset=UTF-8',
            'cookie': '',
            'Authorization': self.authorization
        }
        if isGet:
            response = requests.get(uri, headers=headers)
        else:
            response = requests.post(uri, json=body, headers=headers)
        result_data = response.json()
        if result_data == 'missing or malformed jwt' or result_data == 'Token is expired' or result_data == '':
            print('登录授权已失效,请重新登录!')
            username = input('账号:')
            password = getpass.getpass('密码:')
            # 可以hardcode账号密码
            #username = ''
            #password = ''
            self.login(username, password)
            return self.get_result_data(uri, body, isGet)
        if 'code' in result_data and result_data['code'] == 1200 and 'payload' in result_data:
            return result_data['payload']
        else:
            print(f'其他异常,休眠1s!{result_data}')
            time.sleep(1)
            return None


yearning = Yearning()


def export():
    sql = "select   count(*), driver_id from   order_info group by driver_id"
    db = "order"
    source = "order_readonly"
    # 获取当前时间
    now = datetime.now()
    # 格式化时间为年月日时分秒字符串,例如:"2023-04-01 14:30:45"
    datetime_str = now.strftime("%Y%m%d%H%M%S")
    #windows的换行符是\r\n 会导致有空行,需要将换行符修改为\n,lineterminator='\n'
    save_file = csv.writer(open(f'res/data-{datetime_str}.csv', 'w'),lineterminator='\n')
    data_list = yearning.query(db, source, sql, True)
    data = data_list[0]
    key_list = [key for key in data]
    save_file.writerow(key_list)
    print(key_list)
    for data in data_list:
        list = [data[key].strip() for key in key_list]
        print(list)
        save_file.writerow(list)






if __name__ == '__main__':
    export()
相关推荐
2501_9216494912 小时前
2026个人量化交易免费数据API接入:从选型到实操
经验分享·python·金融·api·个人开发·量化交易
wgzrmlrm7412 小时前
如何解决ORA-28040没有匹配的验证协议_sqlnet.ora版本兼容设置
jvm·数据库·python
一江寒逸12 小时前
零基础从入门到精通MySQL(附加篇):面试八股文全集
数据库·mysql·面试
维度攻城狮12 小时前
pycallgraph2drawio:Python 调用链可视化 + Draw.io 自由编辑
开发语言·python·draw.io·graphviz
数厘12 小时前
2.6MySQL库表操作指南(电商数据分析专用)
数据库·mysql·数据分析
需要点灵感12 小时前
SQL Server 存储过程语法整理
数据库·sql
刘~浪地球12 小时前
数据库与缓存--分库分表实战指南
网络·数据库·缓存
蒙奇·D·路飞-12 小时前
大模型时代下 Java 后端开发的技术重构与工程实践
java·开发语言·重构
极光代码工作室12 小时前
基于NLP的智能客服系统设计与实现
python·深度学习·机器学习·ai·自然语言处理
wljy112 小时前
Qt入门(一)
开发语言·qt