python爬取网页接口数据,以yearning为例

模拟登陆获取token,传token到对应的接口获取数据,下载到csv里面

python 复制代码
import getpass
import os
import requests
import time
import csv
from datetime import datetime

class Yearning:

    def __init__(self):
        self.session = requests.Session()
        self.host = 'http://yearning.xxxx.com:8000'
        self.token_file = './token.txt'
        if not os.path.exists(self.token_file):
            open(self.token_file, 'w').close()
        file = open(self.token_file, 'r')
        token = file.read()
        file.close()
        self.authorization = ''
        if token:
            self.authorization = f'Bearer {token}'

    def query(self, database, source, sql, print_sql=False):
        if print_sql:
            print(sql)
        uri = f'{self.host}/api/v2/query/results'
        body = {
            'data_base': database,
            'source': source,
            'sql': sql
        }
        try:
            result_data = self.get_result_data(uri, body, False)
            if result_data and 'data' in result_data and result_data['data']:
                return result_data['data']
            return []
        except Exception as e:
            print(f'query exception! database={database}, source={source}, sql={sql}')
            print(e)
            return []


    def login(self, username, password):
        uri = f'{self.host}/ldap'
        headers = {
            'Content-Type': 'application/json; charset=UTF-8',
            'cookie': '',
        }
        body = {
            'username': username,
            'password': password
        }
        try:
            result_data = self.get_result_data(uri, body, False)
            if result_data and 'token' in result_data:
                token = result_data['token']
                self.authorization = f'Bearer {token}'
                file = open(self.token_file, 'w')
                file.truncate()
                file.write(token)
                file.close()
            else:
                print(f'login fail!')
        except Exception as e:
            print(f'login error! e={e}')

    def get_result_data(self, uri, body, isGet):
        headers = {
            'Content-Type': 'application/json; charset=UTF-8',
            'cookie': '',
            'Authorization': self.authorization
        }
        if isGet:
            response = requests.get(uri, headers=headers)
        else:
            response = requests.post(uri, json=body, headers=headers)
        result_data = response.json()
        if result_data == 'missing or malformed jwt' or result_data == 'Token is expired' or result_data == '':
            print('登录授权已失效,请重新登录!')
            username = input('账号:')
            password = getpass.getpass('密码:')
            # 可以hardcode账号密码
            #username = ''
            #password = ''
            self.login(username, password)
            return self.get_result_data(uri, body, isGet)
        if 'code' in result_data and result_data['code'] == 1200 and 'payload' in result_data:
            return result_data['payload']
        else:
            print(f'其他异常,休眠1s!{result_data}')
            time.sleep(1)
            return None


yearning = Yearning()


def export():
    sql = "select   count(*), driver_id from   order_info group by driver_id"
    db = "order"
    source = "order_readonly"
    # 获取当前时间
    now = datetime.now()
    # 格式化时间为年月日时分秒字符串,例如:"2023-04-01 14:30:45"
    datetime_str = now.strftime("%Y%m%d%H%M%S")
    #windows的换行符是\r\n 会导致有空行,需要将换行符修改为\n,lineterminator='\n'
    save_file = csv.writer(open(f'res/data-{datetime_str}.csv', 'w'),lineterminator='\n')
    data_list = yearning.query(db, source, sql, True)
    data = data_list[0]
    key_list = [key for key in data]
    save_file.writerow(key_list)
    print(key_list)
    for data in data_list:
        list = [data[key].strip() for key in key_list]
        print(list)
        save_file.writerow(list)






if __name__ == '__main__':
    export()
相关推荐
小张贼嚣张5 分钟前
数据分析全流程实战:Python(Pandas/Matplotlib/Numpy)+ MySQL(附可下载数据源+多图形绘制)
python·数据分析·pandas
努力的小白o(^▽^)o13 分钟前
面向课堂考勤场景的桌面端人脸识别签到系统
python·人脸识别
运维 小白16 分钟前
2. 部署mysql服务并监控mysql
数据库·mysql·adb
myloveasuka34 分钟前
C++进阶:利用作用域解析运算符 :: 突破多态与变量隐藏
开发语言·c++
聪明人42 分钟前
macOS安装Redis
数据库·redis·macos
OxyTheCrack1 小时前
【C++】详细拆解std::mutex的底层原理
linux·开发语言·c++·笔记
weixin_505154461 小时前
Bowell Studio:重塑工业互联网时代的装配制造与运维检修
运维·数据库·人工智能·制造·数字孪生·3d产品配置器·3d交互展示
sa100271 小时前
淘宝商品详情 API 接口开发实战:item_detail 调用、参数与 Python 示例
linux·数据库·python
云栖梦泽1 小时前
易语言开发从入门到精通:进阶篇·网络爬虫与数据采集分析系统深度实战
开发语言
lsx2024061 小时前
XSLT `<sort>` 元素详解
开发语言