#监听文件夹生成的双声道wav文件实时转文字
想要把软短话生成的双声道录音文件实时转文字做内容质检 思路是监听某文件夹的文件创建事件 拆分双声道wav 调用转文字接口
使用的阿里的TTS 实时语音识别 (需要阿里云账号)
##程序入口类
程序入口
StartFolderWatcherByHand.java
package xxx;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
public class StartFolderWatcherByHand {
public static void main(String[] args) throws Exception {
ExecutorService cachedThreadPool = Executors.newFixedThreadPool(1);
//启动文件夹监控
cachedThreadPool.execute(new FileWatchTask("C:\\wav\\"));
}
}
##文件夹监控类
文件夹监控线程 文件事件有多个 这里只监听文件创建
FileWatchTask.java
package xxx;
import java.nio.file.FileSystems;
import java.nio.file.Paths;
import java.nio.file.StandardWatchEventKinds;
import java.nio.file.WatchEvent;
import java.nio.file.WatchKey;
import java.nio.file.WatchService;
public class FileWatchTask implements Runnable {
private String fileDirectory;
public FileWatchTask(String fileDirectory) {
this.fileDirectory = fileDirectory;
}
public void run() {
try {
WatchService watchService = null;
watchService = FileSystems.getDefault().newWatchService();
//只监听创建文件事件
Paths.get(fileDirectory).register(watchService, StandardWatchEventKinds.ENTRY_CREATE);
while (true) {
//检索密钥
WatchKey key = watchService.take();
//处理事件
for (WatchEvent<?> event : key.pollEvents()) {
//启动阿里ASR
String filename = event.context().toString();
if(filename.endsWith(".wav")||filename.endsWith(".WAV")){
//传入完整文件路径
new Thread(new SpeechTranscriberTask.Task(fileDirectory+filename)).start();
}
}
//重置密钥
boolean valid = key.reset();
if (!valid) {
//无效停止
break;
}
//防止频繁占用
Thread.sleep(500);
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
##阿里实时录音识别接口
语音识别使用了阿里实时录音识别JAVA SDK里面有对getTranscriberListener中回调函数的详细介绍
其中用到的tocken获取请参考获取Token
整体思路为拆分wav文件 解码语音流 语音流转文字 具体wav拆分详情请参考我另一片文章java处理μ-law压缩的双声道wav分离解压
SpeechTranscriberTask.java
package xxx;
import java.io.*;
import java.text.SimpleDateFormat;
import java.util.Date;
import com.alibaba.nls.client.AccessToken;
import com.alibaba.nls.client.protocol.InputFormatEnum;
import com.alibaba.nls.client.protocol.NlsClient;
import com.alibaba.nls.client.protocol.SampleRateEnum;
import com.alibaba.nls.client.protocol.asr.SpeechTranscriber;
import com.alibaba.nls.client.protocol.asr.SpeechTranscriberListener;
import com.alibaba.nls.client.protocol.asr.SpeechTranscriberResponse;
public class SpeechTranscriberTask {
//μ-law解压表 使用A-law的话可以自行搜索ALawDecompressTable
private static short muLawDecompressTable[] = new short[]{
-32124, -31100, -30076, -29052, -28028, -27004, -25980, -24956,
-23932, -22908, -21884, -20860, -19836, -18812, -17788, -16764,
-15996, -15484, -14972, -14460, -13948, -13436, -12924, -12412,
-11900, -11388, -10876, -10364, -9852, -9340, -8828, -8316,
-7932, -7676, -7420, -7164, -6908, -6652, -6396, -6140,
-5884, -5628, -5372, -5116, -4860, -4604, -4348, -4092,
-3900, -3772, -3644, -3516, -3388, -3260, -3132, -3004,
-2876, -2748, -2620, -2492, -2364, -2236, -2108, -1980,
-1884, -1820, -1756, -1692, -1628, -1564, -1500, -1436,
-1372, -1308, -1244, -1180, -1116, -1052, -988, -924,
-876, -844, -812, -780, -748, -716, -684, -652,
-620, -588, -556, -524, -492, -460, -428, -396,
-372, -356, -340, -324, -308, -292, -276, -260,
-244, -228, -212, -196, -180, -164, -148, -132,
-120, -112, -104, -96, -88, -80, -72, -64,
-56, -48, -40, -32, -24, -16, -8, 0,
32124, 31100, 30076, 29052, 28028, 27004, 25980, 24956,
23932, 22908, 21884, 20860, 19836, 18812, 17788, 16764,
15996, 15484, 14972, 14460, 13948, 13436, 12924, 12412,
11900, 11388, 10876, 10364, 9852, 9340, 8828, 8316,
7932, 7676, 7420, 7164, 6908, 6652, 6396, 6140,
5884, 5628, 5372, 5116, 4860, 4604, 4348, 4092,
3900, 3772, 3644, 3516, 3388, 3260, 3132, 3004,
2876, 2748, 2620, 2492, 2364, 2236, 2108, 1980,
1884, 1820, 1756, 1692, 1628, 1564, 1500, 1436,
1372, 1308, 1244, 1180, 1116, 1052, 988, 924,
876, 844, 812, 780, 748, 716, 684, 652,
620, 588, 556, 524, 492, 460, 428, 396,
372, 356, 340, 324, 308, 292, 276, 260,
244, 228, 212, 196, 180, 164, 148, 132,
120, 112, 104, 96, 88, 80, 72, 64,
56, 48, 40, 32, 24, 16, 8, 0
};
//初始化采样率
private static SampleRateEnum sampleRateEnum = SampleRateEnum.SAMPLE_RATE_8K;
//阿里实时语音识别部分
static class Task implements Runnable {
//APPKEY,ACCESSKEYID,ACCESSKEYSECRET在阿里云控制台获取
//详见https://help.aliyun.com/document_detail/72153.html
private final String APPKEY = "你的APPKEY";
private final String URL = "wss://nls-gateway.cn-shanghai.aliyuncs.com/ws/v1";
private final String ACCESSKEYID = "你的ACCESSKEYID";
private final String ACCESSKEYSECRET = "你的ACCESSKEYSECRET";
private NlsClient client;
private String audioFile;
public Task(String audioFile) {
//认证
AccessToken accessToken = new AccessToken(ACCESSKEYID, ACCESSKEYSECRET);
try {
accessToken.apply();
} catch (IOException e) {
e.printStackTrace();
}
String token = accessToken.getToken();
client = new NlsClient(URL, token);
this.audioFile = audioFile;
}
@Override
public void run() {
BufferedInputStream wholefile = null;
try {
long dateTime = new Date().getTime();
//初始化左声道
SpeechTranscriberListener leftListener = getTranscriberListener("EMPLOYEE",dateTime);
final SpeechTranscriber leftTranscriber = new SpeechTranscriber(client, leftListener);
leftTranscriber.setAppKey(APPKEY);
leftTranscriber.setFormat(InputFormatEnum.PCM);
leftTranscriber.setSampleRate(sampleRateEnum);
leftTranscriber.setEnableIntermediateResult(false);
leftTranscriber.setEnablePunctuation(true);
leftTranscriber.setEnableITN(true);
//初始化左声道
SpeechTranscriberListener rightListener = getTranscriberListener("CUSTOMER",dateTime);
final SpeechTranscriber rightTranscriber = new SpeechTranscriber(client, rightListener);
rightTranscriber.setAppKey(APPKEY);
rightTranscriber.setFormat(InputFormatEnum.PCM);
rightTranscriber.setSampleRate(sampleRateEnum);
rightTranscriber.setEnableIntermediateResult(false);
rightTranscriber.setEnablePunctuation(true);
rightTranscriber.setEnableITN(true);
//启动
leftTranscriber.start();
rightTranscriber.start();
wholefile = new BufferedInputStream(new FileInputStream(audioFile));
//去退掉wav头信息
wholefile.skip(44);
//拆解成两段pcm流
int len;
short s;
byte[] ts = new byte[3200];
while ((len = wholefile.read(ts)) > 0) {
byte[] leftPart = new byte[3200];
byte[] rightPart = new byte[3200];
//μ-law解码操作 未编码可跳过
for(int i=0;i<len;i+=2){
s = muLawDecompressTable[ts[i] & 0xff];
leftPart[i] = (byte)s;
leftPart[i+1] = (byte)(s>>8);
s = muLawDecompressTable[ts[i+1] & 0xff];
rightPart[i] = (byte)s;
rightPart[i+1] = (byte)(s>>8);
}
//减缓日志打印速度 模拟实时生成的录音文件
Thread.sleep(200);
leftTranscriber.send(leftPart);
rightTranscriber.send(rightPart);
}
leftTranscriber.stop();
rightTranscriber.stop();
leftTranscriber.close();
rightTranscriber.close();
} catch (Exception e) {
System.err.println(e.getMessage());
} finally {
client.shutdown();
if(wholefile!=null){
try {
wholefile.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}
//阿里识别回掉函数
private static SpeechTranscriberListener getTranscriberListener(final String inputName,long datetime) {
SimpleDateFormat sdf = new SimpleDateFormat("HH:mm:ss");
SpeechTranscriberListener listener = new SpeechTranscriberListener() {
//识别出中间结果.服务端识别出一个字或词时会返回此消息.仅当setEnableIntermediateResult(true)时,才会有此类消息返回
@Override
public void onTranscriptionResultChange(SpeechTranscriberResponse response) {
//仅调试用
System.out.println("task_id: " + response.getTaskId() +
", name: " + response.getName() +
//状态码 20000000 表示正常识别
", status: " + response.getStatus() +
//句子编号,从1开始递增
", index: " + response.getTransSentenceIndex() +
//当前的识别结果
", result: " + response.getTransSentenceText() +
//当前已处理的音频时长,单位是毫秒
", time: " + response.getTransSentenceTime());
}
@Override
public void onTranscriberStart(SpeechTranscriberResponse response) {
//识别流程开始回调函数
}
@Override
public void onSentenceBegin(SpeechTranscriberResponse response) {
//每句话开始回调函数
}
//识别出一句话.服务端会智能断句,当识别到一句话结束时会返回此消息
@Override
public void onSentenceEnd(SpeechTranscriberResponse response) {
System.out.println(sdf.format(new Date(datetime+response.getSentenceBeginTime()))+
" - "+sdf.format(new Date(datetime+response.getTransSentenceTime()))+
","+inputName+
",第"+response.getTransSentenceIndex()+
"句话:"+response.getTransSentenceText());
}
@Override
public void onTranscriptionComplete(SpeechTranscriberResponse response) {
//识别完毕回调函数
}
@Override
public void onFail(SpeechTranscriberResponse response) {
// TODO 重要提示: task_id很重要,是调用方和服务端通信的唯一ID标识,当遇到问题时,需要提供此task_id以便排查
System.out.println(
"task_id: " + response.getTaskId() +
//状态码 20000000 表示识别成功
", status: " + response.getStatus() +
//错误信息
", status_text: " + response.getStatusText());
}
};
return listener;
}
}
最后启动StartFolderWatcherByHand.java就可以执行监听程序了
注:使用阿里TTS SDK需要引用
<dependency>
<groupId>com.alibaba.nls</groupId>
<artifactId>nls-sdk-transcriber</artifactId>
<version>2.1.6</version>
</dependency>