一、麦克风传可以实现对着电脑说话转成文字
语音听写是一种将说话内容转换为可读文字的技术,广泛应用于各种语音输入和语音操控的场景中。随着人工智能和语音识别技术的发展,语音听写服务提供了高识别率和高准确率,为用户带来便捷的交互体验。语音听写在各行各业都有广泛应用,从聊天输入到游戏娱乐,再到人机交互,它使人们的生活更加便利和高效。本技术不仅可以实现实时送流给听写能力,而且还能把录音的音频文件存储到本地,方便验证产生的音频格式是否正确、设备录音效果是否符合能力要求等等。
二、具体实现录音的代码
java
package day;
import com.google.gson.Gson;
import com.google.gson.JsonObject;
import day.constants.Constants;
import okhttp3.*;
import javax.crypto.Mac;
import javax.crypto.spec.SecretKeySpec;
import javax.sound.sampled.AudioInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URL;
import java.nio.charset.Charset;
import java.text.SimpleDateFormat;
import java.util.*;
// 麦克风传流听写
public class IatMic extends WebSocketListener {
private static final String hostUrl = "https://iat-api.xfyun.cn/v2/iat"; //中英文,http url 不支持解析 ws/wss schema
// private static final String hostUrl = "https://iat-niche-api.xfyun.cn/v2/iat";//小语种
private static final String appid = ""; //在控制台-我的应用获取
private static final String apiSecret = ""; //在控制台-我的应用-语音听写(流式版)获取
private static final String apiKey = ""; //在控制台-我的应用-语音听写(流式版)获取
//private static final String file = "./zMusic/pcm/科大讯飞.pcm"; // 中文
public static final int StatusFirstFrame = 0;
public static final int StatusContinueFrame = 1;
public static final int StatusLastFrame = 2;
public static final Gson json = new Gson();
Decoder decoder = new Decoder();
// 开始时间
private static Date dateBegin = new Date();
// 结束时间
private static Date dateEnd = new Date();
private static final SimpleDateFormat sdf = new SimpleDateFormat("yyy-MM-dd HH:mm:ss.SSS");
static int status = 0; // 音频的状态
public static boolean IAT_FLAG = true;
public static int len;
public static byte[] audioDataByteArray;
public static final String RECORD_FILE_PATH = "src/main/resources/record/" + System.currentTimeMillis() + ".pcm";
public static final OutputStream outputStream; // 录音文件写出
static {
try {
outputStream = new FileOutputStream(RECORD_FILE_PATH);
} catch (FileNotFoundException e) {
throw new RuntimeException(e);
}
}
public static void main(String[] args) throws Exception {
// 用线程方式启动听写
Constants.IVW_ASR_TARGET_DATA_LINE.open(Constants.IVW_ASR_AUDIO_FORMAT);
Constants.IVW_ASR_TARGET_DATA_LINE.start();
MyThread myThread = new MyThread();
myThread.start();
}
static class MyThread extends Thread {
public void run() {
// 需要初始化的参数都在这里添加
IatMic.IAT_FLAG = true;
status = 0;
// 结束初始化
IatMic iatMic = new IatMic();
// 构建鉴权url
String authUrl = null;
try {
authUrl = getAuthUrl(hostUrl, apiKey, apiSecret);
} catch (Exception e) {
throw new RuntimeException(e);
}
OkHttpClient client = new OkHttpClient.Builder().build();
//将url中的 schema http://和https://分别替换为ws:// 和 wss://
String url = authUrl.toString().replace("http://", "ws://").replace("https://", "wss://");
// System.err.println(url);
Request request = new Request.Builder().url(url).build();
WebSocket webSocket = client.newWebSocket(request, iatMic);
}
}
@Override
public void onOpen(WebSocket webSocket, Response response) {
// System.out.println("建立连接成功");
System.out.println(Constants.YELLOW_BACKGROUND + "机器人正在听,您请说:" + Constants.RESET);
super.onOpen(webSocket, response);
new Thread(() -> {
//连接成功,开始发送数据
// int interval = 200;
try {
/* Constants.IVW_ASR_TARGET_DATA_LINE.open(Constants.IVW_ASR_AUDIO_FORMAT);
Constants.IVW_ASR_TARGET_DATA_LINE.start();*/
while (true) {
audioDataByteArray = new byte[Constants.IVW_FRAME_SIZE];
len = new AudioInputStream(Constants.IVW_ASR_TARGET_DATA_LINE).read(audioDataByteArray);
outputStream.write(Arrays.copyOf(audioDataByteArray, len));
outputStream.flush();
// System.err.println(AIMain.len + "" + AIMain.audioDataByteArray);
if (len == -1) {
status = 2;// 标志读取完毕
}
switch (status) {
case StatusFirstFrame: // 第一帧音频status = 0
JsonObject frame = new JsonObject();
JsonObject business = new JsonObject(); //第一帧必须发送
JsonObject common = new JsonObject(); //第一帧必须发送
JsonObject data = new JsonObject(); //每一帧都要发送
// 填充common
common.addProperty("app_id", appid);
//填充business
business.addProperty("language", "zh_cn");//
//business.addProperty("language", "en_us");//英文
//business.addProperty("language", "ja_jp");//日语,在控制台可添加试用或购买
//business.addProperty("language", "ko_kr");//韩语,在控制台可添加试用或购买
//business.addProperty("language", "ru-ru");//俄语,在控制台可添加试用或购买
//business.addProperty("ptt", 1);
business.addProperty("domain", "iat");
//mandarin中文普通话 广东话cantonese
business.addProperty("accent", "mandarin");//中文方言请在控制台添加试用,添加后即展示相应参数值cantonese//mandarin
//business.addProperty("nunum", 0);
//business.addProperty("ptt", 1);//标点符号
//business.addProperty("rlang", "zh-hk"); // zh-cn :简体中文(默认值)zh-hk :繁体香港(若未授权不生效,在控制台可免费开通)
business.addProperty("vinfo", 1);
business.addProperty("dwa", "wpgs");//动态修正(若未授权不生效,在控制台可免费开通)
business.addProperty("vad_eos", 3000);
//business.addProperty("fa_nbest", true);
//business.addProperty("fa_sch", true);
//business.addProperty("vinfo", 1);
//business.addProperty("speex_size", 70);
//business.addProperty("nbest", 5);// 句子多候选(若未授权不生效,在控制台可免费开通)
//business.addProperty("wbest", 3);// 词级多候选(若未授权不生效,在控制台可免费开通)
//填充data
data.addProperty("status", StatusFirstFrame);
data.addProperty("format", "audio/L16;rate=16000");
//data.addProperty("encoding", "speex-wb");
data.addProperty("encoding", "raw");
data.addProperty("audio", Base64.getEncoder().encodeToString(Arrays.copyOf(audioDataByteArray, len)));
//填充frame
frame.add("common", common);
frame.add("business", business);
frame.add("data", data);
// System.out.println("即将发送第一帧数据...");
// System.err.println(frame.toString());
webSocket.send(frame.toString());
status = StatusContinueFrame; // 发送完第一帧改变status 为 1
break;
case StatusContinueFrame: //中间帧status = 1
JsonObject frame1 = new JsonObject();
JsonObject data1 = new JsonObject();
data1.addProperty("status", StatusContinueFrame);
data1.addProperty("format", "audio/L16;rate=16000");
//data1.addProperty("encoding", "speex-wb");
data1.addProperty("encoding", "raw");
String temp = Base64.getEncoder().encodeToString(Arrays.copyOf(audioDataByteArray, len));
data1.addProperty("audio", temp);
frame1.add("data", data1);
//System.out.println(temp);
webSocket.send(frame1.toString());
break;
}
try {
Thread.sleep(200);
if (!IAT_FLAG) {
//System.out.println("本次会话结束");
break;
}
} catch (Exception e) {
e.printStackTrace();
}
}
//说明读完了
status = StatusLastFrame;
JsonObject frame2 = new JsonObject();
JsonObject data2 = new JsonObject();
data2.addProperty("status", StatusLastFrame);
data2.addProperty("audio", "");
data2.addProperty("format", "audio/L16;rate=16000");
//data2.addProperty("encoding", "speex-wb");
data2.addProperty("encoding", "raw");
frame2.add("data", data2);
webSocket.send(frame2.toString());
// System.err.println(frame2.toString());
// System.out.println("all data is send");
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}).start();
}
@Override
public void onMessage(WebSocket webSocket, String text) {
// System.out.println(text);
super.onMessage(webSocket, text);
ResponseData resp = json.fromJson(text, ResponseData.class);
if (resp != null) {
if (resp.getCode() != 0) {
System.out.println("code=>" + resp.getCode() + " error=>" + resp.getMessage() + " sid=" + resp.getSid());
System.out.println("错误码查询链接:https://www.xfyun.cn/document/error-code");
return;
}
if (resp.getData() != null) {
if (resp.getData().getResult() != null) {
Text te = resp.getData().getResult().getText();
//System.out.println(te.toString());
try {
decoder.decode(te);
dateEnd = new Date();
// System.out.println("耗时:" + (dateEnd.getTime() - dateBegin.getTime()) + "ms");
System.out.println(Constants.YELLOW_BACKGROUND + "用户说话识别中:" + decoder.toString() + Constants.RESET);
//System.err.println("中间识别JSON结果 ----" + text);
} catch (Exception e) {
e.printStackTrace();
}
}
if (resp.getData().getStatus() == 2) {
// todo resp.data.status ==2 说明数据全部返回完毕,可以关闭连接,释放资源
//System.err.println("我的getStatus() == 2");
// System.out.println("session end ");
dateEnd = new Date();
// System.out.println(sdf.format(dateBegin) + "开始");
// System.out.println(sdf.format(dateEnd) + "结束");
// System.out.println("耗时:" + (dateEnd.getTime() - dateBegin.getTime()) + "ms");
System.out.println(Constants.YELLOW_BACKGROUND + "用户说话识别最终结果:" + decoder.toString() + Constants.RESET);
// System.out.println("本次识别sid ==》" + resp.getSid());
decoder.discard();
webSocket.close(1000, "");
IatMic.IAT_FLAG = false;
// System.exit(0);
} else {
// todo 根据返回的数据处理
}
}
}
}
@Override
public void onFailure(WebSocket webSocket, Throwable t, Response response) {
super.onFailure(webSocket, t, response);
try {
if (null != response) {
int code = response.code();
System.out.println("onFailure code:" + code);
System.out.println("onFailure body:" + response.body().string());
if (101 != code) {
System.out.println("connection failed");
System.exit(0);
}
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static String getAuthUrl(String hostUrl, String apiKey, String apiSecret) throws Exception {
URL url = new URL(hostUrl);
SimpleDateFormat format = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss z", Locale.US);
format.setTimeZone(TimeZone.getTimeZone("GMT"));
String date = format.format(new Date());
//String date = format.format(new Date());
//System.err.println(date);
StringBuilder builder = new StringBuilder("host: ").append(url.getHost()).append("\n").//
append("date: ").append(date).append("\n").//
append("GET ").append(url.getPath()).append(" HTTP/1.1");
//System.err.println(builder);
Charset charset = Charset.forName("UTF-8");
Mac mac = Mac.getInstance("hmacsha256");
SecretKeySpec spec = new SecretKeySpec(apiSecret.getBytes(charset), "hmacsha256");
mac.init(spec);
byte[] hexDigits = mac.doFinal(builder.toString().getBytes(charset));
String sha = Base64.getEncoder().encodeToString(hexDigits);
//System.err.println(sha);
String authorization = String.format("api_key=\"%s\", algorithm=\"%s\", headers=\"%s\", signature=\"%s\"", apiKey, "hmac-sha256", "host date request-line", sha);
//System.err.println(authorization);
HttpUrl httpUrl = HttpUrl.parse("https://" + url.getHost() + url.getPath()).newBuilder().//
addQueryParameter("authorization", Base64.getEncoder().encodeToString(authorization.getBytes(charset))).//
addQueryParameter("date", date).//
addQueryParameter("host", url.getHost()).//
build();
return httpUrl.toString();
}
public static class ResponseData {
private int code;
private String message;
private String sid;
private Data data;
public int getCode() {
return code;
}
public String getMessage() {
return this.message;
}
public String getSid() {
return sid;
}
public Data getData() {
return data;
}
}
public static class Data {
private int status;
private Result result;
public int getStatus() {
return status;
}
public Result getResult() {
return result;
}
}
public static class Result {
int bg;
int ed;
String pgs;
int[] rg;
int sn;
Ws[] ws;
boolean ls;
JsonObject vad;
public Text getText() {
Text text = new Text();
StringBuilder sb = new StringBuilder();
for (Ws ws : this.ws) {
sb.append(ws.cw[0].w);
}
text.sn = this.sn;
text.text = sb.toString();
text.sn = this.sn;
text.rg = this.rg;
text.pgs = this.pgs;
text.bg = this.bg;
text.ed = this.ed;
text.ls = this.ls;
text.vad = this.vad == null ? null : this.vad;
return text;
}
}
public static class Ws {
Cw[] cw;
int bg;
int ed;
}
public static class Cw {
int sc;
String w;
}
public static class Text {
int sn;
int bg;
int ed;
String text;
String pgs;
int[] rg;
boolean deleted;
boolean ls;
JsonObject vad;
@Override
public String toString() {
return "Text{" + "bg=" + bg + ", ed=" + ed + ", ls=" + ls + ", sn=" + sn + ", text='" + text + '\'' + ", pgs=" + pgs + ", rg=" + Arrays.toString(rg) + ", deleted=" + deleted + ", vad=" + (vad == null ? "null" : vad.getAsJsonArray("ws").toString()) + '}';
}
}
//解析返回数据,仅供参考
public static class Decoder {
private Text[] texts;
private int defc = 10;
public Decoder() {
this.texts = new Text[this.defc];
}
public synchronized void decode(Text text) {
if (text.sn >= this.defc) {
this.resize();
}
if ("rpl".equals(text.pgs)) {
for (int i = text.rg[0]; i <= text.rg[1]; i++) {
this.texts[i].deleted = true;
}
}
this.texts[text.sn] = text;
}
public String toString() {
StringBuilder sb = new StringBuilder();
for (Text t : this.texts) {
if (t != null && !t.deleted) {
sb.append(t.text);
}
}
return sb.toString();
}
public void resize() {
int oc = this.defc;
this.defc <<= 1;
Text[] old = this.texts;
this.texts = new Text[this.defc];
for (int i = 0; i < oc; i++) {
this.texts[i] = old[i];
}
}
public void discard() {
for (int i = 0; i < this.texts.length; i++) {
this.texts[i] = null;
}
}
}
}