腾讯录音文件语音识别 python api接口

官方文档:https://cloud.tencent.com/document/product/1093/37823

python 复制代码
# -*- coding: utf-8 -*-
import hashlib, hmac, json, time, base64, re
from datetime import datetime
import requests


# 录音文件识别
class TencentAsrRec(object):
    def __init__(self):
        # 密钥参数
        self.secret_id = "xxxxxxxxxxxx"
        self.secret_key = "xxxxxxxxxxxx"
        self.service = "asr"
        self.host = f"{self.service}.tencentcloudapi.com"
        self.endpoint = f"https://{self.host}"

        self.version = "2019-06-14"
        self.region = "ap-shanghai"

    def file_to_base64(self, file_path):
        """将音频文件转换为base64编码"""
        with open(file_path, "rb") as f:
            base64_data = base64.b64encode(f.read()).decode('utf-8')
        return base64_data

    def clean_text(self, text):
        pattern = r"\[[\d:.]+,[\d:.]+\]"

        # 替换所有匹配的时间戳为空字符串
        cleaned_text = re.sub(pattern, "", text)

        # 可选:去除多余的空白行和首尾空格(优化格式)
        cleaned_text = "".join([line.strip() for line in cleaned_text.splitlines() if line.strip()])
        return cleaned_text

    def get_authorization(self, params, action, timestamp):
        algorithm = "TC3-HMAC-SHA256"

        date = datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d")
        # ************* 步骤 1:拼接规范请求串 *************
        http_request_method = "POST"
        canonical_uri = "/"
        canonical_querystring = ""
        ct = "application/json; charset=utf-8"
        payload = json.dumps(params, separators=(',', ':'))
        canonical_headers = "content-type:%s\nhost:%s\nx-tc-action:%s\n" % (ct, self.host, action.lower())
        signed_headers = "content-type;host;x-tc-action"
        hashed_request_payload = hashlib.sha256(payload.encode("utf-8")).hexdigest()
        canonical_request = (http_request_method + "\n" +
                             canonical_uri + "\n" +
                             canonical_querystring + "\n" +
                             canonical_headers + "\n" +
                             signed_headers + "\n" +
                             hashed_request_payload)
        # print(canonical_request)

        # ************* 步骤 2:拼接待签名字符串 *************
        credential_scope = date + "/" + self.service + "/" + "tc3_request"
        hashed_canonical_request = hashlib.sha256(canonical_request.encode("utf-8")).hexdigest()
        string_to_sign = (algorithm + "\n" +
                          str(timestamp) + "\n" +
                          credential_scope + "\n" +
                          hashed_canonical_request)

        # print(string_to_sign)

        # ************* 步骤 3:计算签名 *************
        # 计算签名摘要函数
        def sign(key, msg):
            return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest()

        secret_date = sign(("TC3" + self.secret_key).encode("utf-8"), date)
        secret_service = sign(secret_date, self.service)
        secret_signing = sign(secret_service, "tc3_request")
        signature = hmac.new(secret_signing, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
        # print(signature)

        # ************* 步骤 4:拼接 Authorization *************
        authorization = (algorithm + " " +
                         "Credential=" + self.secret_id + "/" + credential_scope + ", " +
                         "SignedHeaders=" + signed_headers + ", " +
                         "Signature=" + signature)
        return authorization

    # 录音文件识别请求
    def getCreateRecTask(self, url):
        action = "CreateRecTask"
        timestamp = int(time.time())

        # 1. 定义payload(请求体核心数据)
        payload_dict = {
            "EngineModelType": "16k_zh",  # 16k采样率中文普通话(必填)
            "ChannelNum": 1,  # 1:单声道(16k音频仅支持单声道,请勿设置为双声道); 2:双声道(仅支持8k电话音频,且双声道应分别为通话双方)
            "ResTextFormat": 0,
            "SourceType": 0,  # 1=本地音频base64,0=音频URL(必填)
            "Url": url
            # 语音的URL地址,需要公网环境浏览器可下载。当 SourceType 值为 0时须填写该字段,为 1 时不填。音频时长不能超过60s,音频文件大小不能超过3MB。
            # "Data": self.file_to_base64(audio_file_path),  # 语音数据,当SourceType 值为1(本地语音数据上传)时必须填写,当SourceType 值为0(语音 URL上传)可不写。
        }

        # 2. 生成授权信息
        authorization = self.get_authorization(payload_dict, action, timestamp)
        print("授权信息:", authorization)

        # 3. 构造请求头
        headers = {
            "Content-Type": "application/json; charset=utf-8",
            "Authorization": authorization,
            "Host": self.host,  # 补充Host头(原代码遗漏)
            "X-TC-Action": action,
            "X-TC-Timestamp": str(timestamp),
            "X-TC-Version": self.version ,
            "X-TC-Region": self.region
        }

        # 5. 将payload转为JSON字符串(请求体最终格式)
        payload = json.dumps(payload_dict, separators=(',', ':'))

        # 6. 发送POST请求(payload作为请求体)
        response = requests.post(self.endpoint, headers=headers, data=payload)
        json_data = response.json()
        taskId = json_data.get("Response", {}).get("Data", {}).get("TaskId", "")
        return taskId

    # 录音文件识别结果查询
    def getDescribeTaskStatus(self, taskId):
        action = "DescribeTaskStatus"
        timestamp = int(time.time())

        # 1. 定义payload(请求体核心数据)
        payload_dict = {
            "TaskId": taskId
        }

        # 2. 生成授权信息
        authorization = self.get_authorization(payload_dict, action, timestamp)
        print("授权信息:", authorization)

        # 3. 构造请求头
        headers = {
            "Content-Type": "application/json; charset=utf-8",
            "Authorization": authorization,
            "Host": self.host,  # 补充Host头(原代码遗漏)
            "X-TC-Action": action,
            "X-TC-Timestamp": str(timestamp),
            "X-TC-Version": self.version ,
            "X-TC-Region": self.region
        }


        # 5. 将payload转为JSON字符串(请求体最终格式)
        payload = json.dumps(payload_dict, separators=(',', ':'))

        # 6. 发送POST请求(payload作为请求体)
        response = requests.post(self.endpoint, headers=headers, data=payload)
        json_data = response.json()
        print(json_data)

        result = json_data.get("Response", {}).get("Data", {}).get("Result", "")
        return result


if __name__ == '__main__':
    tencent_asr = TencentAsrRec()
    url = "https://xxxxxxxxxxxx.mp4"
    taskId = tencent_asr.getCreateRecTask(url)
    print(f"TaskId:{taskId}")

    result = tencent_asr.getDescribeTaskStatus(taskId)
    time.sleep(10) # 等待识别出结果
    print(f"识别结果:{result}")
    print(f"格式化之后结果:{tencent_asr.clean_text(result)}")
相关推荐
易知微EasyV数据可视化11 分钟前
当AI开始理解物理与场景,数字孪生如何回归其价值本身?
人工智能·经验分享·数字孪生
大数据在线4 小时前
布局Agentic AI,亚马逊云科技组合拳再升级
人工智能·openai·亚马逊云科技·智能体·agentic ai
皮皮学姐分享-ppx8 小时前
政府绿色采购数据库(2015-2024.3)
大数据·网络·数据库·人工智能·制造
GIS数据转换器8 小时前
基于3D GIS的监控视频精准标定平台
人工智能·物联网·3d·音视频·无人机·知识图谱
珺毅同学8 小时前
YOLO生成预测json标签迁移问题
python·yolo·json
骑士雄师8 小时前
18.4 长期记忆可修改版
python
专注VB编程开发20年8 小时前
AI 生成C# WinForm 窗体 = 目前就是垃圾
开发语言·人工智能·c#
深小乐9 小时前
Claude Fable5 尝鲜,效果挺不错
人工智能
~小先生~9 小时前
Python从入门到放弃(一)
开发语言·python
Nayxxu9 小时前
Gemini + RAG 企业知识库教程:从文档切片到答案生成
运维·人工智能