JAVA麦克风实时录音调用听写并存储音频到本地

一、麦克风传可以实现对着电脑说话转成文字

语音听写是一种将说话内容转换为可读文字的技术,广泛应用于各种语音输入和语音操控的场景中。随着人工智能和语音识别技术的发展,语音听写服务提供了高识别率和高准确率,为用户带来便捷的交互体验。语音听写在各行各业都有广泛应用,从聊天输入到游戏娱乐,再到人机交互,它使人们的生活更加便利和高效。本技术不仅可以实现实时送流给听写能力,而且还能把录音的音频文件存储到本地,方便验证产生的音频格式是否正确、设备录音效果是否符合能力要求等等。

二、具体实现录音的代码

java 复制代码
package day;

import com.google.gson.Gson;
import com.google.gson.JsonObject;
import day.constants.Constants;
import okhttp3.*;

import javax.crypto.Mac;
import javax.crypto.spec.SecretKeySpec;
import javax.sound.sampled.AudioInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URL;
import java.nio.charset.Charset;
import java.text.SimpleDateFormat;
import java.util.*;

// 麦克风传流听写

public class IatMic extends WebSocketListener {
    private static final String hostUrl = "https://iat-api.xfyun.cn/v2/iat"; //中英文,http url 不支持解析 ws/wss schema
    // private static final String hostUrl = "https://iat-niche-api.xfyun.cn/v2/iat";//小语种
    private static final String appid = ""; //在控制台-我的应用获取
    private static final String apiSecret = ""; //在控制台-我的应用-语音听写(流式版)获取
    private static final String apiKey = ""; //在控制台-我的应用-语音听写(流式版)获取

    //private static final String file = "./zMusic/pcm/科大讯飞.pcm"; // 中文
    public static final int StatusFirstFrame = 0;
    public static final int StatusContinueFrame = 1;
    public static final int StatusLastFrame = 2;
    public static final Gson json = new Gson();
    Decoder decoder = new Decoder();
    // 开始时间
    private static Date dateBegin = new Date();
    // 结束时间
    private static Date dateEnd = new Date();
    private static final SimpleDateFormat sdf = new SimpleDateFormat("yyy-MM-dd HH:mm:ss.SSS");
    static int status = 0;  // 音频的状态
    public static boolean IAT_FLAG = true;

    public static int len;
    public static byte[] audioDataByteArray;

    public static final String RECORD_FILE_PATH = "src/main/resources/record/" + System.currentTimeMillis() + ".pcm";
    public static final OutputStream outputStream; // 录音文件写出

    static {
        try {
            outputStream = new FileOutputStream(RECORD_FILE_PATH);
        } catch (FileNotFoundException e) {
            throw new RuntimeException(e);
        }
    }


    public static void main(String[] args) throws Exception {
        // 用线程方式启动听写
        Constants.IVW_ASR_TARGET_DATA_LINE.open(Constants.IVW_ASR_AUDIO_FORMAT);
        Constants.IVW_ASR_TARGET_DATA_LINE.start();
        MyThread myThread = new MyThread();
        myThread.start();
    }


    static class MyThread extends Thread {
        public void run() {
            // 需要初始化的参数都在这里添加
            IatMic.IAT_FLAG = true;
            status = 0;
            // 结束初始化
            IatMic iatMic = new IatMic();
            // 构建鉴权url
            String authUrl = null;
            try {
                authUrl = getAuthUrl(hostUrl, apiKey, apiSecret);
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
            OkHttpClient client = new OkHttpClient.Builder().build();
            //将url中的 schema http://和https://分别替换为ws:// 和 wss://
            String url = authUrl.toString().replace("http://", "ws://").replace("https://", "wss://");
            // System.err.println(url);
            Request request = new Request.Builder().url(url).build();
            WebSocket webSocket = client.newWebSocket(request, iatMic);
        }
    }

    @Override
    public void onOpen(WebSocket webSocket, Response response) {
        // System.out.println("建立连接成功");
        System.out.println(Constants.YELLOW_BACKGROUND + "机器人正在听,您请说:" + Constants.RESET);
        super.onOpen(webSocket, response);
        new Thread(() -> {
            //连接成功,开始发送数据
            //  int interval = 200;
            try {
              /*  Constants.IVW_ASR_TARGET_DATA_LINE.open(Constants.IVW_ASR_AUDIO_FORMAT);
                Constants.IVW_ASR_TARGET_DATA_LINE.start();*/
                while (true) {
                    audioDataByteArray = new byte[Constants.IVW_FRAME_SIZE];
                    len = new AudioInputStream(Constants.IVW_ASR_TARGET_DATA_LINE).read(audioDataByteArray);

                    outputStream.write(Arrays.copyOf(audioDataByteArray, len));
                    outputStream.flush();
                    // System.err.println(AIMain.len + "" + AIMain.audioDataByteArray);
                    if (len == -1) {
                        status = 2;// 标志读取完毕
                    }
                    switch (status) {
                        case StatusFirstFrame:   // 第一帧音频status = 0
                            JsonObject frame = new JsonObject();
                            JsonObject business = new JsonObject();  //第一帧必须发送
                            JsonObject common = new JsonObject();  //第一帧必须发送
                            JsonObject data = new JsonObject();  //每一帧都要发送
                            // 填充common
                            common.addProperty("app_id", appid);
                            //填充business
                            business.addProperty("language", "zh_cn");//
                            //business.addProperty("language", "en_us");//英文
                            //business.addProperty("language", "ja_jp");//日语,在控制台可添加试用或购买
                            //business.addProperty("language", "ko_kr");//韩语,在控制台可添加试用或购买
                            //business.addProperty("language", "ru-ru");//俄语,在控制台可添加试用或购买
                            //business.addProperty("ptt", 1);
                            business.addProperty("domain", "iat");
                            //mandarin中文普通话  广东话cantonese
                            business.addProperty("accent", "mandarin");//中文方言请在控制台添加试用,添加后即展示相应参数值cantonese//mandarin
                            //business.addProperty("nunum", 0);
                            //business.addProperty("ptt", 1);//标点符号
                            //business.addProperty("rlang", "zh-hk"); // zh-cn :简体中文(默认值)zh-hk :繁体香港(若未授权不生效,在控制台可免费开通)
                            business.addProperty("vinfo", 1);
                            business.addProperty("dwa", "wpgs");//动态修正(若未授权不生效,在控制台可免费开通)
                            business.addProperty("vad_eos", 3000);
                            //business.addProperty("fa_nbest", true);
                            //business.addProperty("fa_sch", true);
                            //business.addProperty("vinfo", 1);
                            //business.addProperty("speex_size", 70);
                            //business.addProperty("nbest", 5);// 句子多候选(若未授权不生效,在控制台可免费开通)
                            //business.addProperty("wbest", 3);// 词级多候选(若未授权不生效,在控制台可免费开通)
                            //填充data
                            data.addProperty("status", StatusFirstFrame);
                            data.addProperty("format", "audio/L16;rate=16000");
                            //data.addProperty("encoding", "speex-wb");
                            data.addProperty("encoding", "raw");
                            data.addProperty("audio", Base64.getEncoder().encodeToString(Arrays.copyOf(audioDataByteArray, len)));
                            //填充frame
                            frame.add("common", common);
                            frame.add("business", business);
                            frame.add("data", data);
                            // System.out.println("即将发送第一帧数据...");
                            // System.err.println(frame.toString());
                            webSocket.send(frame.toString());
                            status = StatusContinueFrame;  // 发送完第一帧改变status 为 1
                            break;
                        case StatusContinueFrame:  //中间帧status = 1
                            JsonObject frame1 = new JsonObject();
                            JsonObject data1 = new JsonObject();
                            data1.addProperty("status", StatusContinueFrame);
                            data1.addProperty("format", "audio/L16;rate=16000");
                            //data1.addProperty("encoding", "speex-wb");
                            data1.addProperty("encoding", "raw");
                            String temp = Base64.getEncoder().encodeToString(Arrays.copyOf(audioDataByteArray, len));
                            data1.addProperty("audio", temp);
                            frame1.add("data", data1);
                            //System.out.println(temp);
                            webSocket.send(frame1.toString());
                            break;
                    }
                    try {
                        Thread.sleep(200);
                        if (!IAT_FLAG) {
                            //System.out.println("本次会话结束");
                            break;
                        }
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                }
                //说明读完了
                status = StatusLastFrame;
                JsonObject frame2 = new JsonObject();
                JsonObject data2 = new JsonObject();
                data2.addProperty("status", StatusLastFrame);
                data2.addProperty("audio", "");
                data2.addProperty("format", "audio/L16;rate=16000");
                //data2.addProperty("encoding", "speex-wb");
                data2.addProperty("encoding", "raw");
                frame2.add("data", data2);
                webSocket.send(frame2.toString());
                // System.err.println(frame2.toString());
                // System.out.println("all data is send");
            } catch (Exception e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }).start();
    }

    @Override
    public void onMessage(WebSocket webSocket, String text) {
        // System.out.println(text);
        super.onMessage(webSocket, text);
        ResponseData resp = json.fromJson(text, ResponseData.class);
        if (resp != null) {
            if (resp.getCode() != 0) {
                System.out.println("code=>" + resp.getCode() + " error=>" + resp.getMessage() + " sid=" + resp.getSid());
                System.out.println("错误码查询链接:https://www.xfyun.cn/document/error-code");
                return;
            }
            if (resp.getData() != null) {
                if (resp.getData().getResult() != null) {
                    Text te = resp.getData().getResult().getText();
                    //System.out.println(te.toString());
                    try {
                        decoder.decode(te);
                        dateEnd = new Date();
                        // System.out.println("耗时:" + (dateEnd.getTime() - dateBegin.getTime()) + "ms");
                        System.out.println(Constants.YELLOW_BACKGROUND + "用户说话识别中:" + decoder.toString() + Constants.RESET);
                        //System.err.println("中间识别JSON结果 ----" + text);
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                }
                if (resp.getData().getStatus() == 2) {
                    // todo  resp.data.status ==2 说明数据全部返回完毕,可以关闭连接,释放资源
                    //System.err.println("我的getStatus() == 2");
                    // System.out.println("session end ");
                    dateEnd = new Date();
                    // System.out.println(sdf.format(dateBegin) + "开始");
                    // System.out.println(sdf.format(dateEnd) + "结束");
                    //  System.out.println("耗时:" + (dateEnd.getTime() - dateBegin.getTime()) + "ms");
                    System.out.println(Constants.YELLOW_BACKGROUND + "用户说话识别最终结果:" + decoder.toString() + Constants.RESET);
                    // System.out.println("本次识别sid ==》" + resp.getSid());
                    decoder.discard();
                    webSocket.close(1000, "");
                    IatMic.IAT_FLAG = false;
                    // System.exit(0);
                } else {
                    // todo 根据返回的数据处理
                }
            }
        }
    }

    @Override
    public void onFailure(WebSocket webSocket, Throwable t, Response response) {
        super.onFailure(webSocket, t, response);
        try {
            if (null != response) {
                int code = response.code();
                System.out.println("onFailure code:" + code);
                System.out.println("onFailure body:" + response.body().string());
                if (101 != code) {
                    System.out.println("connection failed");
                    System.exit(0);
                }
            }
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

    public static String getAuthUrl(String hostUrl, String apiKey, String apiSecret) throws Exception {
        URL url = new URL(hostUrl);
        SimpleDateFormat format = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss z", Locale.US);
        format.setTimeZone(TimeZone.getTimeZone("GMT"));
        String date = format.format(new Date());
        //String date = format.format(new Date());
        //System.err.println(date);
        StringBuilder builder = new StringBuilder("host: ").append(url.getHost()).append("\n").//
                append("date: ").append(date).append("\n").//
                append("GET ").append(url.getPath()).append(" HTTP/1.1");
        //System.err.println(builder);
        Charset charset = Charset.forName("UTF-8");
        Mac mac = Mac.getInstance("hmacsha256");
        SecretKeySpec spec = new SecretKeySpec(apiSecret.getBytes(charset), "hmacsha256");
        mac.init(spec);
        byte[] hexDigits = mac.doFinal(builder.toString().getBytes(charset));
        String sha = Base64.getEncoder().encodeToString(hexDigits);
        //System.err.println(sha);
        String authorization = String.format("api_key=\"%s\", algorithm=\"%s\", headers=\"%s\", signature=\"%s\"", apiKey, "hmac-sha256", "host date request-line", sha);
        //System.err.println(authorization);
        HttpUrl httpUrl = HttpUrl.parse("https://" + url.getHost() + url.getPath()).newBuilder().//
                addQueryParameter("authorization", Base64.getEncoder().encodeToString(authorization.getBytes(charset))).//
                addQueryParameter("date", date).//
                addQueryParameter("host", url.getHost()).//
                build();
        return httpUrl.toString();
    }

    public static class ResponseData {
        private int code;
        private String message;
        private String sid;
        private Data data;

        public int getCode() {
            return code;
        }

        public String getMessage() {
            return this.message;
        }

        public String getSid() {
            return sid;
        }

        public Data getData() {
            return data;
        }
    }

    public static class Data {
        private int status;
        private Result result;

        public int getStatus() {
            return status;
        }

        public Result getResult() {
            return result;
        }
    }

    public static class Result {
        int bg;
        int ed;
        String pgs;
        int[] rg;
        int sn;
        Ws[] ws;
        boolean ls;
        JsonObject vad;

        public Text getText() {
            Text text = new Text();
            StringBuilder sb = new StringBuilder();
            for (Ws ws : this.ws) {
                sb.append(ws.cw[0].w);
            }
            text.sn = this.sn;
            text.text = sb.toString();
            text.sn = this.sn;
            text.rg = this.rg;
            text.pgs = this.pgs;
            text.bg = this.bg;
            text.ed = this.ed;
            text.ls = this.ls;
            text.vad = this.vad == null ? null : this.vad;
            return text;
        }
    }

    public static class Ws {
        Cw[] cw;
        int bg;
        int ed;
    }

    public static class Cw {
        int sc;
        String w;
    }

    public static class Text {
        int sn;
        int bg;
        int ed;
        String text;
        String pgs;
        int[] rg;
        boolean deleted;
        boolean ls;
        JsonObject vad;

        @Override
        public String toString() {
            return "Text{" + "bg=" + bg + ", ed=" + ed + ", ls=" + ls + ", sn=" + sn + ", text='" + text + '\'' + ", pgs=" + pgs + ", rg=" + Arrays.toString(rg) + ", deleted=" + deleted + ", vad=" + (vad == null ? "null" : vad.getAsJsonArray("ws").toString()) + '}';
        }
    }

    //解析返回数据,仅供参考
    public static class Decoder {
        private Text[] texts;
        private int defc = 10;

        public Decoder() {
            this.texts = new Text[this.defc];
        }

        public synchronized void decode(Text text) {
            if (text.sn >= this.defc) {
                this.resize();
            }
            if ("rpl".equals(text.pgs)) {
                for (int i = text.rg[0]; i <= text.rg[1]; i++) {
                    this.texts[i].deleted = true;
                }
            }
            this.texts[text.sn] = text;
        }

        public String toString() {
            StringBuilder sb = new StringBuilder();
            for (Text t : this.texts) {
                if (t != null && !t.deleted) {
                    sb.append(t.text);
                }
            }
            return sb.toString();
        }

        public void resize() {
            int oc = this.defc;
            this.defc <<= 1;
            Text[] old = this.texts;
            this.texts = new Text[this.defc];
            for (int i = 0; i < oc; i++) {
                this.texts[i] = old[i];
            }
        }

        public void discard() {
            for (int i = 0; i < this.texts.length; i++) {
                this.texts[i] = null;
            }
        }
    }
}
相关推荐
原来458 小时前
rtpengine_mr12.0 基础建设&&容器运行
音视频·ims·rtpengine
万岳科技系统开发9 小时前
短视频商城系统源码揭秘:架构设计与实现
音视频
TSINGSEE15 小时前
视频技术助力智慧城市一网统管:视频资源整合与智能化管理
人工智能·ai·音视频·智慧城市·城市安防监控系统
简鹿办公15 小时前
M3U8 视频是一种什么格式,M3U8 视频怎么转成 MP4
音视频
打开官网16 小时前
如何压缩视频大小,怎么压缩视频
音视频
小周爱学习€16 小时前
OPENCV(视频入门笔记)
笔记·opencv·音视频
TSINGSEE16 小时前
智能视频监控如何助力体育场馆安全管理:安防监控EasyCVR视频综合管理方案
5g·安全·音视频·视频监控·视频监控技术
hlyling16 小时前
高效批量调整视频色彩:一键实现视频饱和与色度优化,提升视觉体验
音视频
Bubbliiiing16 小时前
AIGC专栏12——EasyAnimateV3发布详解 支持图&文生视频 最大支持960x960x144帧视频生成
深度学习·生成对抗网络·自然语言处理·aigc·音视频
玖尾猫16 小时前
04.ffmpeg打印音视频媒体信息
ffmpeg·音视频·媒体