监听实时生成的双声道wav文件转文字

#监听文件夹生成的双声道wav文件实时转文字

想要把软短话生成的双声道录音文件实时转文字做内容质检 思路是监听某文件夹的文件创建事件 拆分双声道wav 调用转文字接口
使用的阿里的TTS 实时语音识别 (需要阿里云账号)

##程序入口类

程序入口
StartFolderWatcherByHand.java

package xxx;

import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

public class StartFolderWatcherByHand {

    public static void main(String[] args) throws Exception {
        ExecutorService cachedThreadPool = Executors.newFixedThreadPool(1);
        //启动文件夹监控
        cachedThreadPool.execute(new FileWatchTask("C:\\wav\\"));
    }
}

##文件夹监控类

文件夹监控线程 文件事件有多个 这里只监听文件创建
FileWatchTask.java

package xxx;

import java.nio.file.FileSystems;
import java.nio.file.Paths;
import java.nio.file.StandardWatchEventKinds;
import java.nio.file.WatchEvent;
import java.nio.file.WatchKey;
import java.nio.file.WatchService;

public class FileWatchTask implements Runnable {
    private String fileDirectory;

    public FileWatchTask(String fileDirectory) {
        this.fileDirectory = fileDirectory;
    }

    public void run() {
        try {
            WatchService watchService = null;
            watchService = FileSystems.getDefault().newWatchService();
            //只监听创建文件事件
            Paths.get(fileDirectory).register(watchService, StandardWatchEventKinds.ENTRY_CREATE);
            while (true) {
                //检索密钥
                WatchKey key = watchService.take();
                //处理事件
                for (WatchEvent<?> event : key.pollEvents()) {
                    //启动阿里ASR
                    String filename = event.context().toString();
                    if(filename.endsWith(".wav")||filename.endsWith(".WAV")){
                        //传入完整文件路径
                        new Thread(new SpeechTranscriberTask.Task(fileDirectory+filename)).start();
                    }
                }
                //重置密钥
                boolean valid = key.reset();
                if (!valid) {
                    //无效停止
                    break;
                }
                //防止频繁占用
                Thread.sleep(500);
            }

        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

##阿里实时录音识别接口

语音识别使用了阿里实时录音识别JAVA SDK里面有对getTranscriberListener中回调函数的详细介绍

其中用到的tocken获取请参考获取Token

整体思路为拆分wav文件 解码语音流 语音流转文字 具体wav拆分详情请参考我另一片文章java处理μ-law压缩的双声道wav分离解压

SpeechTranscriberTask.java

package xxx;

import java.io.*;
import java.text.SimpleDateFormat;
import java.util.Date;

import com.alibaba.nls.client.AccessToken;
import com.alibaba.nls.client.protocol.InputFormatEnum;
import com.alibaba.nls.client.protocol.NlsClient;
import com.alibaba.nls.client.protocol.SampleRateEnum;
import com.alibaba.nls.client.protocol.asr.SpeechTranscriber;
import com.alibaba.nls.client.protocol.asr.SpeechTranscriberListener;
import com.alibaba.nls.client.protocol.asr.SpeechTranscriberResponse;

public class SpeechTranscriberTask {

    //μ-law解压表 使用A-law的话可以自行搜索ALawDecompressTable
    private static short muLawDecompressTable[] = new short[]{
            -32124, -31100, -30076, -29052, -28028, -27004, -25980, -24956,
            -23932, -22908, -21884, -20860, -19836, -18812, -17788, -16764,
            -15996, -15484, -14972, -14460, -13948, -13436, -12924, -12412,
            -11900, -11388, -10876, -10364, -9852, -9340, -8828, -8316,
            -7932, -7676, -7420, -7164, -6908, -6652, -6396, -6140,
            -5884, -5628, -5372, -5116, -4860, -4604, -4348, -4092,
            -3900, -3772, -3644, -3516, -3388, -3260, -3132, -3004,
            -2876, -2748, -2620, -2492, -2364, -2236, -2108, -1980,
            -1884, -1820, -1756, -1692, -1628, -1564, -1500, -1436,
            -1372, -1308, -1244, -1180, -1116, -1052, -988, -924,
            -876, -844, -812, -780, -748, -716, -684, -652,
            -620, -588, -556, -524, -492, -460, -428, -396,
            -372, -356, -340, -324, -308, -292, -276, -260,
            -244, -228, -212, -196, -180, -164, -148, -132,
            -120, -112, -104, -96, -88, -80, -72, -64,
            -56, -48, -40, -32, -24, -16, -8, 0,
            32124, 31100, 30076, 29052, 28028, 27004, 25980, 24956,
            23932, 22908, 21884, 20860, 19836, 18812, 17788, 16764,
            15996, 15484, 14972, 14460, 13948, 13436, 12924, 12412,
            11900, 11388, 10876, 10364, 9852, 9340, 8828, 8316,
            7932, 7676, 7420, 7164, 6908, 6652, 6396, 6140,
            5884, 5628, 5372, 5116, 4860, 4604, 4348, 4092,
            3900, 3772, 3644, 3516, 3388, 3260, 3132, 3004,
            2876, 2748, 2620, 2492, 2364, 2236, 2108, 1980,
            1884, 1820, 1756, 1692, 1628, 1564, 1500, 1436,
            1372, 1308, 1244, 1180, 1116, 1052, 988, 924,
            876, 844, 812, 780, 748, 716, 684, 652,
            620, 588, 556, 524, 492, 460, 428, 396,
            372, 356, 340, 324, 308, 292, 276, 260,
            244, 228, 212, 196, 180, 164, 148, 132,
            120, 112, 104, 96, 88, 80, 72, 64,
            56, 48, 40, 32, 24, 16, 8, 0
    };

    //初始化采样率
    private static SampleRateEnum sampleRateEnum = SampleRateEnum.SAMPLE_RATE_8K;

    //阿里实时语音识别部分
    static class Task implements Runnable {
        //APPKEY,ACCESSKEYID,ACCESSKEYSECRET在阿里云控制台获取
        //详见https://help.aliyun.com/document_detail/72153.html
        private final String APPKEY = "你的APPKEY";
        private final String URL = "wss://nls-gateway.cn-shanghai.aliyuncs.com/ws/v1";
        private final String ACCESSKEYID = "你的ACCESSKEYID";
        private final String ACCESSKEYSECRET = "你的ACCESSKEYSECRET";
        private NlsClient client;
        private String audioFile;

        public Task(String audioFile) {
            //认证
            AccessToken accessToken = new AccessToken(ACCESSKEYID, ACCESSKEYSECRET);
            try {
                accessToken.apply();
            } catch (IOException e) {
                e.printStackTrace();
            }
            String token = accessToken.getToken();
            client = new NlsClient(URL, token);

            this.audioFile = audioFile;
        }

        @Override
        public void run() {
            BufferedInputStream wholefile = null;
            try {
                long dateTime = new Date().getTime();
                //初始化左声道
                SpeechTranscriberListener leftListener = getTranscriberListener("EMPLOYEE",dateTime);
                final SpeechTranscriber leftTranscriber = new SpeechTranscriber(client, leftListener);
                leftTranscriber.setAppKey(APPKEY);
                leftTranscriber.setFormat(InputFormatEnum.PCM);
                leftTranscriber.setSampleRate(sampleRateEnum);
                leftTranscriber.setEnableIntermediateResult(false);
                leftTranscriber.setEnablePunctuation(true);
                leftTranscriber.setEnableITN(true);

                //初始化左声道
                SpeechTranscriberListener rightListener = getTranscriberListener("CUSTOMER",dateTime);
                final SpeechTranscriber rightTranscriber = new SpeechTranscriber(client, rightListener);
                rightTranscriber.setAppKey(APPKEY);
                rightTranscriber.setFormat(InputFormatEnum.PCM);
                rightTranscriber.setSampleRate(sampleRateEnum);
                rightTranscriber.setEnableIntermediateResult(false);
                rightTranscriber.setEnablePunctuation(true);
                rightTranscriber.setEnableITN(true);

                //启动
                leftTranscriber.start();
                rightTranscriber.start();

                wholefile = new BufferedInputStream(new FileInputStream(audioFile));
                //去退掉wav头信息
                wholefile.skip(44);

                //拆解成两段pcm流
                int len;
                short s;
                byte[] ts = new byte[3200];
                while ((len = wholefile.read(ts)) > 0) {
                    byte[] leftPart = new byte[3200];
                    byte[] rightPart = new byte[3200];

                    //μ-law解码操作 未编码可跳过
                    for(int i=0;i<len;i+=2){
                        s = muLawDecompressTable[ts[i] & 0xff];
                        leftPart[i] = (byte)s;
                        leftPart[i+1] = (byte)(s>>8);
                        s = muLawDecompressTable[ts[i+1] & 0xff];
                        rightPart[i] = (byte)s;
                        rightPart[i+1] = (byte)(s>>8);
                    }
                    //减缓日志打印速度 模拟实时生成的录音文件
                    Thread.sleep(200);
                    leftTranscriber.send(leftPart);
                    rightTranscriber.send(rightPart);
                }

                leftTranscriber.stop();
                rightTranscriber.stop();
                leftTranscriber.close();
                rightTranscriber.close();
            } catch (Exception e) {
                System.err.println(e.getMessage());
            } finally {
                client.shutdown();

                if(wholefile!=null){
                    try {
                        wholefile.close();
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }
            }
        }
    }

    //阿里识别回掉函数
    private static SpeechTranscriberListener getTranscriberListener(final String inputName,long datetime) {
        SimpleDateFormat sdf = new SimpleDateFormat("HH:mm:ss");
        SpeechTranscriberListener listener = new SpeechTranscriberListener() {
            //识别出中间结果.服务端识别出一个字或词时会返回此消息.仅当setEnableIntermediateResult(true)时,才会有此类消息返回
            @Override
            public void onTranscriptionResultChange(SpeechTranscriberResponse response) {
                //仅调试用
                System.out.println("task_id: " + response.getTaskId() +
                    ", name: " + response.getName() +
                    //状态码 20000000 表示正常识别
                    ", status: " + response.getStatus() +
                    //句子编号,从1开始递增
                    ", index: " + response.getTransSentenceIndex() +
                    //当前的识别结果
                    ", result: " + response.getTransSentenceText() +
                    //当前已处理的音频时长,单位是毫秒
                    ", time: " + response.getTransSentenceTime());
            }

            @Override
            public void onTranscriberStart(SpeechTranscriberResponse response) {
                //识别流程开始回调函数
            }

            @Override
            public void onSentenceBegin(SpeechTranscriberResponse response) {
                //每句话开始回调函数
            }

            //识别出一句话.服务端会智能断句,当识别到一句话结束时会返回此消息
            @Override
            public void onSentenceEnd(SpeechTranscriberResponse response) {
                System.out.println(sdf.format(new Date(datetime+response.getSentenceBeginTime()))+
                        " - "+sdf.format(new Date(datetime+response.getTransSentenceTime()))+
                        ","+inputName+
                        ",第"+response.getTransSentenceIndex()+
                        "句话:"+response.getTransSentenceText());
            }

            @Override
            public void onTranscriptionComplete(SpeechTranscriberResponse response) {
                //识别完毕回调函数
            }

            @Override
            public void onFail(SpeechTranscriberResponse response) {
                // TODO 重要提示: task_id很重要,是调用方和服务端通信的唯一ID标识,当遇到问题时,需要提供此task_id以便排查
                System.out.println(
                    "task_id: " + response.getTaskId() +
                        //状态码 20000000 表示识别成功
                        ", status: " + response.getStatus() +
                        //错误信息
                        ", status_text: " + response.getStatusText());
            }
        };

        return listener;
    }

}

最后启动StartFolderWatcherByHand.java就可以执行监听程序了

注:使用阿里TTS SDK需要引用

<dependency>
    <groupId>com.alibaba.nls</groupId>
    <artifactId>nls-sdk-transcriber</artifactId>
    <version>2.1.6</version>
</dependency>
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值