python爬取网页接口数据,以yearning为例

模拟登陆获取token,传token到对应的接口获取数据,下载到csv里面

python 复制代码
import getpass
import os
import requests
import time
import csv
from datetime import datetime

class Yearning:

    def __init__(self):
        self.session = requests.Session()
        self.host = 'http://yearning.xxxx.com:8000'
        self.token_file = './token.txt'
        if not os.path.exists(self.token_file):
            open(self.token_file, 'w').close()
        file = open(self.token_file, 'r')
        token = file.read()
        file.close()
        self.authorization = ''
        if token:
            self.authorization = f'Bearer {token}'

    def query(self, database, source, sql, print_sql=False):
        if print_sql:
            print(sql)
        uri = f'{self.host}/api/v2/query/results'
        body = {
            'data_base': database,
            'source': source,
            'sql': sql
        }
        try:
            result_data = self.get_result_data(uri, body, False)
            if result_data and 'data' in result_data and result_data['data']:
                return result_data['data']
            return []
        except Exception as e:
            print(f'query exception! database={database}, source={source}, sql={sql}')
            print(e)
            return []


    def login(self, username, password):
        uri = f'{self.host}/ldap'
        headers = {
            'Content-Type': 'application/json; charset=UTF-8',
            'cookie': '',
        }
        body = {
            'username': username,
            'password': password
        }
        try:
            result_data = self.get_result_data(uri, body, False)
            if result_data and 'token' in result_data:
                token = result_data['token']
                self.authorization = f'Bearer {token}'
                file = open(self.token_file, 'w')
                file.truncate()
                file.write(token)
                file.close()
            else:
                print(f'login fail!')
        except Exception as e:
            print(f'login error! e={e}')

    def get_result_data(self, uri, body, isGet):
        headers = {
            'Content-Type': 'application/json; charset=UTF-8',
            'cookie': '',
            'Authorization': self.authorization
        }
        if isGet:
            response = requests.get(uri, headers=headers)
        else:
            response = requests.post(uri, json=body, headers=headers)
        result_data = response.json()
        if result_data == 'missing or malformed jwt' or result_data == 'Token is expired' or result_data == '':
            print('登录授权已失效,请重新登录!')
            username = input('账号:')
            password = getpass.getpass('密码:')
            # 可以hardcode账号密码
            #username = ''
            #password = ''
            self.login(username, password)
            return self.get_result_data(uri, body, isGet)
        if 'code' in result_data and result_data['code'] == 1200 and 'payload' in result_data:
            return result_data['payload']
        else:
            print(f'其他异常,休眠1s!{result_data}')
            time.sleep(1)
            return None


yearning = Yearning()


def export():
    sql = "select   count(*), driver_id from   order_info group by driver_id"
    db = "order"
    source = "order_readonly"
    # 获取当前时间
    now = datetime.now()
    # 格式化时间为年月日时分秒字符串,例如:"2023-04-01 14:30:45"
    datetime_str = now.strftime("%Y%m%d%H%M%S")
    #windows的换行符是\r\n 会导致有空行,需要将换行符修改为\n,lineterminator='\n'
    save_file = csv.writer(open(f'res/data-{datetime_str}.csv', 'w'),lineterminator='\n')
    data_list = yearning.query(db, source, sql, True)
    data = data_list[0]
    key_list = [key for key in data]
    save_file.writerow(key_list)
    print(key_list)
    for data in data_list:
        list = [data[key].strip() for key in key_list]
        print(list)
        save_file.writerow(list)






if __name__ == '__main__':
    export()
相关推荐
❀͜͡傀儡师7 小时前
JDK 25 新特性速览
java·开发语言
兮动人7 小时前
主流JDK版本支持时间
java·开发语言·主流jdk版本支持时间
深蓝电商API7 小时前
0 基础入门爬虫:Python+requests 环境搭建保姆级教程
开发语言·爬虫·python
码河漫步7 小时前
win11安装mysql社区版数据库
数据库·mysql
2501_930707787 小时前
使用C#代码在 Word 文档中查找并替换文本
开发语言·c#·word
Wang's Blog7 小时前
MySQL: 存储引擎深度解析:Memory与Federated的特性与应用场景
数据库·mysql
学习中的程序媛~7 小时前
Spring 事务(@Transactional)与异步(@Async / CompletableFuture)结合的陷阱与最佳实践
java·数据库·sql
MediaTea7 小时前
Python 第三方库:PyTorch(动态计算图的深度学习框架)
开发语言·人工智能·pytorch·python·深度学习
kyle-fang7 小时前
pytorch-张量转换
人工智能·pytorch·python
Boop_wu7 小时前
[Java EE] 多线程 -- 初阶(3)
java·开发语言