python调用百度ai将图片识别为表格excel

python调用百度ai将图片识别为表格excel

ocr

百度ai官方文档:https://ai.baidu.com/ai-doc/OCR/Ik3h7y238

python 复制代码
import requests
import json
import base64
import time
'''
文档:https://ai.baidu.com/ai-doc/OCR/Ik3h7y238
'''

# 获取access_token
def get_access_token():
    client_id = "xxxxxxxxxxxxxxxxxx"
    client_secret = "xxxxxxxxxxxxxxxxxxxxxx"
    # client_id = '你的apikey'
    # client_secret = '你的Secret Key'
    # client_id 为官网获取的AK, client_secret 为官网获取的SK
    host = 'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={}&client_secret={}'.format(
        client_id, client_secret)
    response = requests.get(host).text
    data = json.loads(response)
    access_token = data['access_token']
    return access_token


# 获取返回信息

# 获取识别结果
def get_info(access_token):
    request_url = "https://aip.baidubce.com/rest/2.0/solution/v1/form_ocr/request"
    # 二进制方式打开图片文件
    f = open('1.jpg', 'rb')
    img = base64.b64encode(f.read())  # base64编码
    params = {"image": img}
    request_url = request_url + "?access_token=" + access_token
    headers = {'content-type': 'application/x-www-form-urlencoded'}
    response = requests.post(request_url, data=params, headers=headers)
    # if response:
    #     print(response.json())
    data_1 = response.json()
    return data_1


# 获取excel


def get_excel(requests_id, access_token):
    headers = {'content-type': 'application/x-www-form-urlencoded'}
    pargams = {
        'request_id': requests_id,
        'result_type': 'excel'
    }
    url = 'https://aip.baidubce.com/rest/2.0/solution/v1/form_ocr/get_request_result'
    url_all = url + "?access_token=" + access_token
    res = requests.post(url_all, headers=headers, params=pargams)  # 访问链接获取excel下载页
    info_1 = res.json()['result']['ret_msg']
    excel_url = res.json()['result']['result_data']
    excel_1 = requests.get(excel_url).content
    print(excel_1)
    with open('识别结果.xls', 'wb+') as f:
        f.write(excel_1)
    print(info_1)


def main():
    print('正在处理中请稍后')
    access_token = get_access_token()
    data_1 = get_info(access_token)
    try:
        requests_id = data_1['result'][0]['request_id']
        if requests_id != '':
            print('识别完成')
    except:
        print('识别错误')
    print('正在获取excel')
    time.sleep(10)  # 延时十秒让网页图片转excel完毕,excel量多的话,转化会慢,可以延时长一点
    get_excel(requests_id, access_token)


main()
相关推荐
Kenneth風车8 分钟前
【机器学习(五)】分类和回归任务-AdaBoost算法-Sentosa_DSML社区版
人工智能·算法·低代码·机器学习·数据分析
凌不了云11 分钟前
windows环境下安装python第三方包
开发语言·python
AI小白龙*11 分钟前
大模型团队招人(校招):阿里巴巴智能信息,2025届春招来了!
人工智能·langchain·大模型·llm·transformer
大熊程序猿12 分钟前
python 读取excel数据存储到mysql
数据库·python·mysql
生椰拿铁You14 分钟前
Python
python
鸽芷咕17 分钟前
【Python报错已解决】python setup.py bdist_wheel did not run successfully.
开发语言·python·机器学习·bug
空指针异常Null_Point_Ex33 分钟前
大模型LLM之SpringAI:Web+AI(一)
人工智能·chatgpt·nlp
清纯世纪43 分钟前
基于深度学习的图像分类或识别系统(含全套项目+PyQt5界面)
开发语言·python·深度学习
孤华暗香1 小时前
Python快速入门 —— 第三节:类与对象
开发语言·python
didiplus1 小时前
【趣学Python算法100例】百钱百鸡
python·算法·百钱百鸡