python爬取网页接口数据,以yearning为例

模拟登陆获取token,传token到对应的接口获取数据,下载到csv里面

python 复制代码
import getpass
import os
import requests
import time
import csv
from datetime import datetime

class Yearning:

    def __init__(self):
        self.session = requests.Session()
        self.host = 'http://yearning.xxxx.com:8000'
        self.token_file = './token.txt'
        if not os.path.exists(self.token_file):
            open(self.token_file, 'w').close()
        file = open(self.token_file, 'r')
        token = file.read()
        file.close()
        self.authorization = ''
        if token:
            self.authorization = f'Bearer {token}'

    def query(self, database, source, sql, print_sql=False):
        if print_sql:
            print(sql)
        uri = f'{self.host}/api/v2/query/results'
        body = {
            'data_base': database,
            'source': source,
            'sql': sql
        }
        try:
            result_data = self.get_result_data(uri, body, False)
            if result_data and 'data' in result_data and result_data['data']:
                return result_data['data']
            return []
        except Exception as e:
            print(f'query exception! database={database}, source={source}, sql={sql}')
            print(e)
            return []


    def login(self, username, password):
        uri = f'{self.host}/ldap'
        headers = {
            'Content-Type': 'application/json; charset=UTF-8',
            'cookie': '',
        }
        body = {
            'username': username,
            'password': password
        }
        try:
            result_data = self.get_result_data(uri, body, False)
            if result_data and 'token' in result_data:
                token = result_data['token']
                self.authorization = f'Bearer {token}'
                file = open(self.token_file, 'w')
                file.truncate()
                file.write(token)
                file.close()
            else:
                print(f'login fail!')
        except Exception as e:
            print(f'login error! e={e}')

    def get_result_data(self, uri, body, isGet):
        headers = {
            'Content-Type': 'application/json; charset=UTF-8',
            'cookie': '',
            'Authorization': self.authorization
        }
        if isGet:
            response = requests.get(uri, headers=headers)
        else:
            response = requests.post(uri, json=body, headers=headers)
        result_data = response.json()
        if result_data == 'missing or malformed jwt' or result_data == 'Token is expired' or result_data == '':
            print('登录授权已失效,请重新登录!')
            username = input('账号:')
            password = getpass.getpass('密码:')
            # 可以hardcode账号密码
            #username = ''
            #password = ''
            self.login(username, password)
            return self.get_result_data(uri, body, isGet)
        if 'code' in result_data and result_data['code'] == 1200 and 'payload' in result_data:
            return result_data['payload']
        else:
            print(f'其他异常,休眠1s!{result_data}')
            time.sleep(1)
            return None


yearning = Yearning()


def export():
    sql = "select   count(*), driver_id from   order_info group by driver_id"
    db = "order"
    source = "order_readonly"
    # 获取当前时间
    now = datetime.now()
    # 格式化时间为年月日时分秒字符串,例如:"2023-04-01 14:30:45"
    datetime_str = now.strftime("%Y%m%d%H%M%S")
    #windows的换行符是\r\n 会导致有空行,需要将换行符修改为\n,lineterminator='\n'
    save_file = csv.writer(open(f'res/data-{datetime_str}.csv', 'w'),lineterminator='\n')
    data_list = yearning.query(db, source, sql, True)
    data = data_list[0]
    key_list = [key for key in data]
    save_file.writerow(key_list)
    print(key_list)
    for data in data_list:
        list = [data[key].strip() for key in key_list]
        print(list)
        save_file.writerow(list)






if __name__ == '__main__':
    export()
相关推荐
亽仒凣凣2 分钟前
Windows安装Redis图文教程
数据库·windows·redis
ELI_He9995 分钟前
PHP中替换某个包或某个类
开发语言·php
亦世凡华、11 分钟前
MySQL--》如何在MySQL中打造高效优化索引
数据库·经验分享·mysql·索引·性能分析
m0_7482361112 分钟前
Calcite Web 项目常见问题解决方案
开发语言·前端·rust
YashanDB13 分钟前
【YashanDB知识库】Mybatis-Plus调用YashanDB怎么设置分页
数据库·yashandb·崖山数据库
倔强的石头10620 分钟前
【C++指南】类和对象(九):内部类
开发语言·c++
老大白菜21 分钟前
Python 爬虫技术指南
python
ProtonBase24 分钟前
如何从 0 到 1 ,打造全新一代分布式数据架构
java·网络·数据库·数据仓库·分布式·云原生·架构
Watermelo61725 分钟前
详解js柯里化原理及用法,探究柯里化在Redux Selector 的场景模拟、构建复杂的数据流管道、优化深度嵌套函数中的精妙应用
开发语言·前端·javascript·算法·数据挖掘·数据分析·ecmascript
古希腊掌管学习的神2 小时前
[搜广推]王树森推荐系统——矩阵补充&最近邻查找
python·算法·机器学习·矩阵