JAVA麦克风实时录音调用听写并存储音频到本地

一、麦克风传可以实现对着电脑说话转成文字

语音听写是一种将说话内容转换为可读文字的技术,广泛应用于各种语音输入和语音操控的场景中。随着人工智能和语音识别技术的发展,语音听写服务提供了高识别率和高准确率,为用户带来便捷的交互体验。语音听写在各行各业都有广泛应用,从聊天输入到游戏娱乐,再到人机交互,它使人们的生活更加便利和高效。本技术不仅可以实现实时送流给听写能力,而且还能把录音的音频文件存储到本地,方便验证产生的音频格式是否正确、设备录音效果是否符合能力要求等等。

二、具体实现录音的代码

java 复制代码
package day;

import com.google.gson.Gson;
import com.google.gson.JsonObject;
import day.constants.Constants;
import okhttp3.*;

import javax.crypto.Mac;
import javax.crypto.spec.SecretKeySpec;
import javax.sound.sampled.AudioInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URL;
import java.nio.charset.Charset;
import java.text.SimpleDateFormat;
import java.util.*;

// 麦克风传流听写

public class IatMic extends WebSocketListener {
    private static final String hostUrl = "https://iat-api.xfyun.cn/v2/iat"; //中英文,http url 不支持解析 ws/wss schema
    // private static final String hostUrl = "https://iat-niche-api.xfyun.cn/v2/iat";//小语种
    private static final String appid = ""; //在控制台-我的应用获取
    private static final String apiSecret = ""; //在控制台-我的应用-语音听写(流式版)获取
    private static final String apiKey = ""; //在控制台-我的应用-语音听写(流式版)获取

    //private static final String file = "./zMusic/pcm/科大讯飞.pcm"; // 中文
    public static final int StatusFirstFrame = 0;
    public static final int StatusContinueFrame = 1;
    public static final int StatusLastFrame = 2;
    public static final Gson json = new Gson();
    Decoder decoder = new Decoder();
    // 开始时间
    private static Date dateBegin = new Date();
    // 结束时间
    private static Date dateEnd = new Date();
    private static final SimpleDateFormat sdf = new SimpleDateFormat("yyy-MM-dd HH:mm:ss.SSS");
    static int status = 0;  // 音频的状态
    public static boolean IAT_FLAG = true;

    public static int len;
    public static byte[] audioDataByteArray;

    public static final String RECORD_FILE_PATH = "src/main/resources/record/" + System.currentTimeMillis() + ".pcm";
    public static final OutputStream outputStream; // 录音文件写出

    static {
        try {
            outputStream = new FileOutputStream(RECORD_FILE_PATH);
        } catch (FileNotFoundException e) {
            throw new RuntimeException(e);
        }
    }


    public static void main(String[] args) throws Exception {
        // 用线程方式启动听写
        Constants.IVW_ASR_TARGET_DATA_LINE.open(Constants.IVW_ASR_AUDIO_FORMAT);
        Constants.IVW_ASR_TARGET_DATA_LINE.start();
        MyThread myThread = new MyThread();
        myThread.start();
    }


    static class MyThread extends Thread {
        public void run() {
            // 需要初始化的参数都在这里添加
            IatMic.IAT_FLAG = true;
            status = 0;
            // 结束初始化
            IatMic iatMic = new IatMic();
            // 构建鉴权url
            String authUrl = null;
            try {
                authUrl = getAuthUrl(hostUrl, apiKey, apiSecret);
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
            OkHttpClient client = new OkHttpClient.Builder().build();
            //将url中的 schema http://和https://分别替换为ws:// 和 wss://
            String url = authUrl.toString().replace("http://", "ws://").replace("https://", "wss://");
            // System.err.println(url);
            Request request = new Request.Builder().url(url).build();
            WebSocket webSocket = client.newWebSocket(request, iatMic);
        }
    }

    @Override
    public void onOpen(WebSocket webSocket, Response response) {
        // System.out.println("建立连接成功");
        System.out.println(Constants.YELLOW_BACKGROUND + "机器人正在听,您请说:" + Constants.RESET);
        super.onOpen(webSocket, response);
        new Thread(() -> {
            //连接成功,开始发送数据
            //  int interval = 200;
            try {
              /*  Constants.IVW_ASR_TARGET_DATA_LINE.open(Constants.IVW_ASR_AUDIO_FORMAT);
                Constants.IVW_ASR_TARGET_DATA_LINE.start();*/
                while (true) {
                    audioDataByteArray = new byte[Constants.IVW_FRAME_SIZE];
                    len = new AudioInputStream(Constants.IVW_ASR_TARGET_DATA_LINE).read(audioDataByteArray);

                    outputStream.write(Arrays.copyOf(audioDataByteArray, len));
                    outputStream.flush();
                    // System.err.println(AIMain.len + "" + AIMain.audioDataByteArray);
                    if (len == -1) {
                        status = 2;// 标志读取完毕
                    }
                    switch (status) {
                        case StatusFirstFrame:   // 第一帧音频status = 0
                            JsonObject frame = new JsonObject();
                            JsonObject business = new JsonObject();  //第一帧必须发送
                            JsonObject common = new JsonObject();  //第一帧必须发送
                            JsonObject data = new JsonObject();  //每一帧都要发送
                            // 填充common
                            common.addProperty("app_id", appid);
                            //填充business
                            business.addProperty("language", "zh_cn");//
                            //business.addProperty("language", "en_us");//英文
                            //business.addProperty("language", "ja_jp");//日语,在控制台可添加试用或购买
                            //business.addProperty("language", "ko_kr");//韩语,在控制台可添加试用或购买
                            //business.addProperty("language", "ru-ru");//俄语,在控制台可添加试用或购买
                            //business.addProperty("ptt", 1);
                            business.addProperty("domain", "iat");
                            //mandarin中文普通话  广东话cantonese
                            business.addProperty("accent", "mandarin");//中文方言请在控制台添加试用,添加后即展示相应参数值cantonese//mandarin
                            //business.addProperty("nunum", 0);
                            //business.addProperty("ptt", 1);//标点符号
                            //business.addProperty("rlang", "zh-hk"); // zh-cn :简体中文(默认值)zh-hk :繁体香港(若未授权不生效,在控制台可免费开通)
                            business.addProperty("vinfo", 1);
                            business.addProperty("dwa", "wpgs");//动态修正(若未授权不生效,在控制台可免费开通)
                            business.addProperty("vad_eos", 3000);
                            //business.addProperty("fa_nbest", true);
                            //business.addProperty("fa_sch", true);
                            //business.addProperty("vinfo", 1);
                            //business.addProperty("speex_size", 70);
                            //business.addProperty("nbest", 5);// 句子多候选(若未授权不生效,在控制台可免费开通)
                            //business.addProperty("wbest", 3);// 词级多候选(若未授权不生效,在控制台可免费开通)
                            //填充data
                            data.addProperty("status", StatusFirstFrame);
                            data.addProperty("format", "audio/L16;rate=16000");
                            //data.addProperty("encoding", "speex-wb");
                            data.addProperty("encoding", "raw");
                            data.addProperty("audio", Base64.getEncoder().encodeToString(Arrays.copyOf(audioDataByteArray, len)));
                            //填充frame
                            frame.add("common", common);
                            frame.add("business", business);
                            frame.add("data", data);
                            // System.out.println("即将发送第一帧数据...");
                            // System.err.println(frame.toString());
                            webSocket.send(frame.toString());
                            status = StatusContinueFrame;  // 发送完第一帧改变status 为 1
                            break;
                        case StatusContinueFrame:  //中间帧status = 1
                            JsonObject frame1 = new JsonObject();
                            JsonObject data1 = new JsonObject();
                            data1.addProperty("status", StatusContinueFrame);
                            data1.addProperty("format", "audio/L16;rate=16000");
                            //data1.addProperty("encoding", "speex-wb");
                            data1.addProperty("encoding", "raw");
                            String temp = Base64.getEncoder().encodeToString(Arrays.copyOf(audioDataByteArray, len));
                            data1.addProperty("audio", temp);
                            frame1.add("data", data1);
                            //System.out.println(temp);
                            webSocket.send(frame1.toString());
                            break;
                    }
                    try {
                        Thread.sleep(200);
                        if (!IAT_FLAG) {
                            //System.out.println("本次会话结束");
                            break;
                        }
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                }
                //说明读完了
                status = StatusLastFrame;
                JsonObject frame2 = new JsonObject();
                JsonObject data2 = new JsonObject();
                data2.addProperty("status", StatusLastFrame);
                data2.addProperty("audio", "");
                data2.addProperty("format", "audio/L16;rate=16000");
                //data2.addProperty("encoding", "speex-wb");
                data2.addProperty("encoding", "raw");
                frame2.add("data", data2);
                webSocket.send(frame2.toString());
                // System.err.println(frame2.toString());
                // System.out.println("all data is send");
            } catch (Exception e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }).start();
    }

    @Override
    public void onMessage(WebSocket webSocket, String text) {
        // System.out.println(text);
        super.onMessage(webSocket, text);
        ResponseData resp = json.fromJson(text, ResponseData.class);
        if (resp != null) {
            if (resp.getCode() != 0) {
                System.out.println("code=>" + resp.getCode() + " error=>" + resp.getMessage() + " sid=" + resp.getSid());
                System.out.println("错误码查询链接:https://www.xfyun.cn/document/error-code");
                return;
            }
            if (resp.getData() != null) {
                if (resp.getData().getResult() != null) {
                    Text te = resp.getData().getResult().getText();
                    //System.out.println(te.toString());
                    try {
                        decoder.decode(te);
                        dateEnd = new Date();
                        // System.out.println("耗时:" + (dateEnd.getTime() - dateBegin.getTime()) + "ms");
                        System.out.println(Constants.YELLOW_BACKGROUND + "用户说话识别中:" + decoder.toString() + Constants.RESET);
                        //System.err.println("中间识别JSON结果 ----" + text);
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                }
                if (resp.getData().getStatus() == 2) {
                    // todo  resp.data.status ==2 说明数据全部返回完毕,可以关闭连接,释放资源
                    //System.err.println("我的getStatus() == 2");
                    // System.out.println("session end ");
                    dateEnd = new Date();
                    // System.out.println(sdf.format(dateBegin) + "开始");
                    // System.out.println(sdf.format(dateEnd) + "结束");
                    //  System.out.println("耗时:" + (dateEnd.getTime() - dateBegin.getTime()) + "ms");
                    System.out.println(Constants.YELLOW_BACKGROUND + "用户说话识别最终结果:" + decoder.toString() + Constants.RESET);
                    // System.out.println("本次识别sid ==》" + resp.getSid());
                    decoder.discard();
                    webSocket.close(1000, "");
                    IatMic.IAT_FLAG = false;
                    // System.exit(0);
                } else {
                    // todo 根据返回的数据处理
                }
            }
        }
    }

    @Override
    public void onFailure(WebSocket webSocket, Throwable t, Response response) {
        super.onFailure(webSocket, t, response);
        try {
            if (null != response) {
                int code = response.code();
                System.out.println("onFailure code:" + code);
                System.out.println("onFailure body:" + response.body().string());
                if (101 != code) {
                    System.out.println("connection failed");
                    System.exit(0);
                }
            }
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

    public static String getAuthUrl(String hostUrl, String apiKey, String apiSecret) throws Exception {
        URL url = new URL(hostUrl);
        SimpleDateFormat format = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss z", Locale.US);
        format.setTimeZone(TimeZone.getTimeZone("GMT"));
        String date = format.format(new Date());
        //String date = format.format(new Date());
        //System.err.println(date);
        StringBuilder builder = new StringBuilder("host: ").append(url.getHost()).append("\n").//
                append("date: ").append(date).append("\n").//
                append("GET ").append(url.getPath()).append(" HTTP/1.1");
        //System.err.println(builder);
        Charset charset = Charset.forName("UTF-8");
        Mac mac = Mac.getInstance("hmacsha256");
        SecretKeySpec spec = new SecretKeySpec(apiSecret.getBytes(charset), "hmacsha256");
        mac.init(spec);
        byte[] hexDigits = mac.doFinal(builder.toString().getBytes(charset));
        String sha = Base64.getEncoder().encodeToString(hexDigits);
        //System.err.println(sha);
        String authorization = String.format("api_key=\"%s\", algorithm=\"%s\", headers=\"%s\", signature=\"%s\"", apiKey, "hmac-sha256", "host date request-line", sha);
        //System.err.println(authorization);
        HttpUrl httpUrl = HttpUrl.parse("https://" + url.getHost() + url.getPath()).newBuilder().//
                addQueryParameter("authorization", Base64.getEncoder().encodeToString(authorization.getBytes(charset))).//
                addQueryParameter("date", date).//
                addQueryParameter("host", url.getHost()).//
                build();
        return httpUrl.toString();
    }

    public static class ResponseData {
        private int code;
        private String message;
        private String sid;
        private Data data;

        public int getCode() {
            return code;
        }

        public String getMessage() {
            return this.message;
        }

        public String getSid() {
            return sid;
        }

        public Data getData() {
            return data;
        }
    }

    public static class Data {
        private int status;
        private Result result;

        public int getStatus() {
            return status;
        }

        public Result getResult() {
            return result;
        }
    }

    public static class Result {
        int bg;
        int ed;
        String pgs;
        int[] rg;
        int sn;
        Ws[] ws;
        boolean ls;
        JsonObject vad;

        public Text getText() {
            Text text = new Text();
            StringBuilder sb = new StringBuilder();
            for (Ws ws : this.ws) {
                sb.append(ws.cw[0].w);
            }
            text.sn = this.sn;
            text.text = sb.toString();
            text.sn = this.sn;
            text.rg = this.rg;
            text.pgs = this.pgs;
            text.bg = this.bg;
            text.ed = this.ed;
            text.ls = this.ls;
            text.vad = this.vad == null ? null : this.vad;
            return text;
        }
    }

    public static class Ws {
        Cw[] cw;
        int bg;
        int ed;
    }

    public static class Cw {
        int sc;
        String w;
    }

    public static class Text {
        int sn;
        int bg;
        int ed;
        String text;
        String pgs;
        int[] rg;
        boolean deleted;
        boolean ls;
        JsonObject vad;

        @Override
        public String toString() {
            return "Text{" + "bg=" + bg + ", ed=" + ed + ", ls=" + ls + ", sn=" + sn + ", text='" + text + '\'' + ", pgs=" + pgs + ", rg=" + Arrays.toString(rg) + ", deleted=" + deleted + ", vad=" + (vad == null ? "null" : vad.getAsJsonArray("ws").toString()) + '}';
        }
    }

    //解析返回数据,仅供参考
    public static class Decoder {
        private Text[] texts;
        private int defc = 10;

        public Decoder() {
            this.texts = new Text[this.defc];
        }

        public synchronized void decode(Text text) {
            if (text.sn >= this.defc) {
                this.resize();
            }
            if ("rpl".equals(text.pgs)) {
                for (int i = text.rg[0]; i <= text.rg[1]; i++) {
                    this.texts[i].deleted = true;
                }
            }
            this.texts[text.sn] = text;
        }

        public String toString() {
            StringBuilder sb = new StringBuilder();
            for (Text t : this.texts) {
                if (t != null && !t.deleted) {
                    sb.append(t.text);
                }
            }
            return sb.toString();
        }

        public void resize() {
            int oc = this.defc;
            this.defc <<= 1;
            Text[] old = this.texts;
            this.texts = new Text[this.defc];
            for (int i = 0; i < oc; i++) {
                this.texts[i] = old[i];
            }
        }

        public void discard() {
            for (int i = 0; i < this.texts.length; i++) {
                this.texts[i] = null;
            }
        }
    }
}
相关推荐
人工智能研究所44 分钟前
字节开源 OmniShow:文本,图片,音频,人体姿态多输入,一键成片
人工智能·神经网络·开源·音视频·开源软件·字节跳动·ai 视频
byte轻骑兵2 小时前
【LE Audio】CAP精讲[14]: BR/EDR传输连接实战,老设备兼容的核心流程解析
网络·音视频·le audio·音视频控制·车机蓝牙
jbk33112 小时前
谷哥找同片助手:相同视频片段自动寻找匹配功能使用说明
人工智能·音视频·剪辑软件·剪映自动化软件
南山有乔木7894 小时前
如何把视频转换成音频mp3格式?7种适合录屏、课程和素材整理的方法
音视频
华盛AI4 小时前
【视频物联网 App】RN 双端适配与原生核心实现深度剖析
物联网·音视频
胖祥4 小时前
Windows D3D12 DDI中的视频函数调用
音视频
sweetone4 小时前
小拆小修2例 (劲浪FOCAL耳机, ABRAZO 耦合器温控器)
经验分享·音视频
办公自动化软件定制化开发python4 小时前
开源!Edge TTS 音频转换工具 v2.1:批量文本转语音,支持段落拆分与多发音人
前端·edge·音视频
2601_9578848413 小时前
面向内容合规性的短视频矩阵分发机制:感知哈希去重与语义检索优化实践
矩阵·音视频·哈希算法
luoqice20 小时前
Windows下局域网rtsp流媒体服务器搭建-测试
服务器·windows·音视频