JAVA实现麦克风说话同声传译

最新推荐文章于 2024-07-01 10:28:58 发布

定制开发才有价值

最新推荐文章于 2024-07-01 10:28:58 发布

阅读量742

点赞数 9

文章标签： java python 人工智能

本文链接：https://blog.csdn.net/p6448777/article/details/140093808

版权

一、能力与场景说明

同声传译，又称同步口译或同声翻译，是一种专业的口译形式，指的是在讲话者发言时，口译员几乎同时将讲话内容翻译成目标语言。这种翻译方式通常用于国际会议、高级别政治或商业会谈、研讨会和其他需要即时多语言交流的场合。本文是用JAVA调用原生麦克风技术，实现产生音频的同时用麦克风实现边说边翻译成英文的技术。不限制录音时长。

二、同传主调用代码

package main.com.iflytek;

import com.google.gson.Gson;
import com.google.gson.JsonObject;
import main.com.util.VideoPlayerService;
import okhttp3.*;
import main.com.util.AuthUtils;
import main.com.util.PcmToWav;

import javax.sound.sampled.AudioInputStream;
import java.io.*;
import java.util.*;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;

/**
 * 1、同声传译接口，可以将音频流实时翻译为不同语种的文本，并输对应的音频内容，广泛应用于国际论坛、智能会议、智慧教育、跨国交流等场景。
 */
public class SimultaneousTranslationMain extends WebSocketListener {
    private static String requestUrl = "wss://ws-api.xf-yun.com/v1/private/simult_interpretation";
    //控制台获取以下信息
    private static String APPID = "";
    private static String apiSecret = "";
    private static String apiKey = "";

    private static final String domain = "ist_ed_open";
    private static final String language = "zh_cn";
    private static final String accent = "mandarin";
    // 翻译所需参数，从中文-英文
    private static final String from = "cn"; // 源文件的语言类型
    private static final String to = "en"; // 目标文件的语言类型
    // 发声发音人
    private static final String vcn = "x2_catherine";
    // 输出音频编码方式 PCM
    private static final String encoding = "raw";

    // 输入的源音频文件
    private static final String inputAudioPcm = "input/audio/original.pcm";

    // 输出的音频与文本文件
    private static final String outPutPcm = "output/audio/trans.pcm";
    private static final String outPutWav = "output/audio/trans.wav";
    private static final String asr_result = "output/text/asr.txt";
    private static final String trans_result = "output/text/trans.txt";

    public static final int StatusFirstFrame = 0;
    public static final int StatusContinueFrame = 1;
    public static final int StatusLastFrame = 2;
    public static final Gson gson = new Gson();
    private static BlockingQueue<String> queue = new LinkedBlockingQueue<>();
    private static boolean overFlag = false;

    public static byte[] audioDataByteArray;
    public static Long ivwStartTime;
    public static Long ivwEndTime;

    // 主函数入口
    public static void main(String[] args) {
        // 清空文件夹
        clearDir();
        Thread thread = new Thread(new videoPlayer());
        thread.start();
        // 构建鉴权url
        String authUrl = AuthUtils.assembleRequestUrl(requestUrl, apiKey, apiSecret);
        // System.out.println(authUrl);
        OkHttpClient client = new OkHttpClient.Builder().build();
        Request request = new Request.Builder().url(authUrl).build();
        // System.out.println("url===>" + authUrl);
        WebSocket webSocket = client.newWebSocket(request, new SimultaneousTranslationMain());
        try {
            //打开音频文件
            int frameSize = 1280; // 每一帧音频的大小 1280/40ms
            int interval = 40;
            int status = 0;  // 音频的状态
            int count = 0;
            Constants.IVW_ASR_TARGET_DATA_LINE.open(Constants.IVW_ASR_AUDIO_FORMAT);
            Constants.IVW_ASR_TARGET_DATA_LINE.start();
            ivwStartTime = System.currentTimeMillis(); // 更新开始时间
            // 发送音频
            end:
            while (true) {
                //int len = fs.read(buffer);

                audioDataByteArray = new byte[Constants.IVW_FRAME_SIZE];
                int len = new AudioInputStream(Constants.IVW_ASR_TARGET_DATA_LINE).read(audioDataByteArray);

                ivwEndTime = System.currentTimeMillis();
                // System.out.println(ivwEndTime - ivwStartTime);
                if (len < frameSize || (ivwEndTime - ivwStartTime) > 60000) { // 大于一分钟自动停止
                    status = StatusLastFrame;  //文件读完，改变status 为 2
                }
                switch (status) {
                    case StatusFirstFrame:   // 第一帧音频status = 0
                        JsonObject frame = new JsonObject();
                        JsonObject header = new JsonObject();  //第一帧必须发送
                        JsonObject parameter = new JsonObject();
                        JsonObject ist = new JsonObject();
                        JsonObject streamtrans = new JsonObject();
                        JsonObject tts = new JsonObject();
                        JsonObject tts_results = new JsonObject();
                        JsonObject payload = new JsonObject();
                        JsonObject data = new JsonObject();
                        // 填充header
                        header.addProperty("app_id", APPID);//appid 必须带上，只需第一帧发送
                        header.addProperty("status", 0);
                        // 填充parameter
                        // ist参数填充
                        ist.addProperty("eos", 600000);
                        ist.addProperty("vto", 15000);
                        ist.addProperty("accent", accent);
                        ist.addProperty("language", language);
                        ist.addProperty("language_type", 1);
                        ist.addProperty("domain", domain);
                        // streamtrans参数填充
                        streamtrans.addProperty("from", from);
                        streamtrans.addProperty("to", to);
                        // tts参数填充
                        tts.addProperty("vcn", vcn);
                        tts_results.addProperty("encoding", "raw");
                        tts_results.addProperty("sample_rate", 16000);
                        tts_results.addProperty("channels", 1);
                        tts_results.addProperty("bit_depth", 16);
                        tts.add("tts_results", tts_results);
                        parameter.add("ist", ist);
                        parameter.add("streamtrans", streamtrans);
                        parameter.add("tts", tts);
                        //填充payload
                        data.addProperty("audio", Base64.getEncoder().encodeToString(Arrays.copyOf(audioDataByteArray, len)));
                        data.addProperty("encoding", encoding);
                        data.addProperty("sample_rate", 16000);
                        data.addProperty("status", status);
                        data.addProperty("seq", count);
                        payload.add("data", data);
                        //填充frame
                        frame.add("header", header);
                        frame.add("parameter", parameter);
                        frame.add("payload", payload);

                        webSocket.send(frame.toString());
                        status = StatusContinueFrame;  // 发送完第一帧改变status 为 1
                        System.out.println("send first 请开始说出中文：");
                        break;
                    case StatusContinueFrame:  //中间帧status = 1
                        JsonObject contineuFrame = new JsonObject();
                        JsonObject header1 = new JsonObject();
                        JsonObject payload1 = new JsonObject();
                        JsonObject data1 = new JsonObject();
                        // 填充head
                        header1.addProperty("status", 1);
                        header1.addProperty("app_id", APPID);
                        //填充payload
                        data1.addProperty("audio", Base64.getEncoder().encodeToString(Arrays.copyOf(audioDataByteArray, len)));
                        data1.addProperty("encoding", encoding);
                        data1.addProperty("sample_rate", 16000);
                        data1.addProperty("status", status);
                        data1.addProperty("seq", count);
                        payload1.add("data", data1);
                        contineuFrame.add("header", header1);
                        contineuFrame.add("payload", payload1);
                        webSocket.send(contineuFrame.toString());
                        //  System.out.println("send continue");
                        break;
                    case StatusLastFrame:    // 最后一帧音频status = 2 ，标志音频发送结束
                        String audio = "";
                        if (len != 0) {
                            audio = Base64.getEncoder().encodeToString(Arrays.copyOf(audioDataByteArray, len));
                        }
                        JsonObject lastFrame = new JsonObject();
                        JsonObject header2 = new JsonObject();
                        JsonObject payload2 = new JsonObject();
                        JsonObject data2 = new JsonObject();
                        // 填充head
                        header2.addProperty("status", 2);
                        header2.addProperty("app_id", APPID);
                        //填充payload
                        data2.addProperty("audio", audio);
                        data2.addProperty("encoding", encoding);
                        data2.addProperty("sample_rate", 16000);
                        data2.addProperty("status", status);
                        data2.addProperty("seq", count);
                        payload2.add("data", data2);
                        lastFrame.add("header", header2);
                        lastFrame.add("payload", payload2);
                        webSocket.send(lastFrame.toString());
                        System.out.println("send last 中文讲话结束！");
                        break end;
                }
                count++;
                Thread.sleep(interval); //模拟音频采样延时
            }
            System.out.println("all data is send 所有音频数据发送完毕！");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    @Override
    public void onOpen(WebSocket webSocket, Response response) {
        super.onOpen(webSocket, response);
        // System.out.println("open connection");
    }

    // 客户端接收服务端的消息并处理
    @Override
    public void onMessage(WebSocket webSocket, String text) {
        super.onMessage(webSocket, text);
        ResponseData resp = gson.fromJson(text, ResponseData.class);
        // System.err.println(text);
        if (resp != null) {
            if (resp.header.code != 0) {
                System.out.println("error=>" + resp.header.message + " sid=" + resp.header.sid + " 错误码=" + resp.header.code);
                return;
            }
            if (resp.header != null) {
                if (resp.header.code == 0) {
                    // System.out.println(text);
                    if (resp.payload != null) {
                        // 接收到的识别结果写到文本
                        if (resp.payload.recognition_results != null) {
                            String s1 = resp.payload.recognition_results.text;
                            byte[] trans1 = Base64.getDecoder().decode(s1);
                            String res1 = new String(trans1);
                            try {
                                writeStringToFile(res1, asr_result);
                            } catch (IOException e) {
                                e.printStackTrace();
                            }
                        }
                        // 接收到的翻译结果写到文本
                        if (resp.payload.streamtrans_results != null) {
                            String s2 = resp.payload.streamtrans_results.text;
                            byte[] trans = Base64.getDecoder().decode(s2);
                            String res = new String(trans);
                            try {
                                writeStringToFile(res, trans_result);
                            } catch (IOException e) {
                                e.printStackTrace();
                            }
                        }
                        // 把接收到的音频流合成PCM
                        if (resp.payload.tts_results != null) {
                            String s = resp.payload.tts_results.audio;
                            queue.add(s);
                            // System.err.println("执行一次");
                            try {
                                writeBytesToFile(Base64.getDecoder().decode(s), outPutPcm);
                            } catch (IOException e) {
                                e.printStackTrace();
                            }
                        }
                    }
                }
                if (resp.header.status == 2) {
                    // todo  resp.data.status ==2 说明数据全部返回完毕，可以关闭连接，释放资源
                    System.out.println("session end  同声传译返回完毕！");
                    System.out.println("本次请求的sid==》 " + resp.header.sid);
                    System.out.println("数据处理完毕，等待实时转译结束！");
                    overFlag = true;
                    try {
                        // 流程完毕后，输出音频文件，把PCM转换为WAV
                        PcmToWav.convertAudioFiles(outPutPcm, outPutWav);
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                    webSocket.close(1000, "");
                    if (queue.size() == 0) {
                        System.exit(0);
                    }
                } else {
                    // todo 根据返回的数据处理
                }
            }
        }
    }

    @Override
    public void onFailure(WebSocket webSocket, Throwable t, Response response) {
        super.onFailure(webSocket, t, response);
        System.out.println(t.getMessage());
        try {
            System.out.println("错误信息：" + response);
            if (response == null) {
                return;
            }
            System.out.println("错误信息" + response.code());
            System.out.println(response.body().string());
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    // 实时播放音频流方法
    static class videoPlayer implements Runnable {
        @Override
        public void run() {
            while (true) {
                if (overFlag && queue.size() == 0) {
                    break;
                }
                if (queue.size() != 0) {
                    String poll = queue.poll();
                    VideoPlayerService.videoPlay(Base64.getDecoder().decode(poll));
                }
            }
            System.out.println("实时转译结束！！！");
            System.exit(1000);
        }
    }

    // 清空已存在文件
    public static void clearDir() {
        String path = "output";
        File file = new File(path);
        for (File f : file.listFiles()) {
            if (f.isDirectory()) {
                for (File subFile : f.listFiles()) {
                    if (subFile != null) {
                        subFile.delete();
                    }
                }
            }
        }
        System.out.println("结果集初始化成功------");
    }

    // 写入文件
    public static void writeBytesToFile(byte[] bs, String path) throws IOException {
        OutputStream out = new FileOutputStream(path, true);
        InputStream is = new ByteArrayInputStream(bs);
        byte[] buff = new byte[1024];
        int len = 0;
        while ((len = is.read(buff)) != -1) {
            out.write(buff, 0, len);
        }
        is.close();
        out.close();
    }

    // 写入文件
    public static void writeStringToFile(String content, String path) throws IOException {
        OutputStream out = new FileOutputStream(path, true);
        out.write(content.getBytes());
        out.close();
    }

    // JSON解析
    public static class ResponseData {
        header header;
        payload payload;
    }

    public static class payload {
        streamtrans_results streamtrans_results;
        recognition_results recognition_results;
        tts_results tts_results;

        @Override
        public String toString() {
            return "payload{" + "streamtrans_results=" + streamtrans_results + ", recognition_results=" + recognition_results + ", tts_results=" + tts_results + '}';
        }
    }

    public static class header {
        int code;
        String message;
        String sid;
        int status;
    }

    public static class recognition_results {
        String encoding;
        String format;
        String text;
        int status;

        @Override
        public String toString() {
            return "recognition_results{" + "encoding='" + encoding + '\'' + ", format='" + format + '\'' + ", text='" + text + '\'' + ", status=" + status + '}';
        }
    }

    public static class streamtrans_results {
        String encoding;
        String format;
        String text;
        int status;

        @Override
        public String toString() {
            return "streamtrans_results{" + "encoding='" + encoding + '\'' + ", format='" + format + '\'' + ", text='" + text + '\'' + ", status=" + status + '}';
        }
    }

    public static class tts_results {
        String encoding;
        String audio;
        int sample_rate;
        int channels;
        int bit_depth;
        int status;
        int seq;
        int frame_size;

        @Override
        public String toString() {
            return "tts_results{" + "encoding='" + encoding + '\'' + ", audio='" + audio + '\'' + ", sample_rate=" + sample_rate + ", channels=" + channels + ", bit_depth=" + bit_depth + ", status=" + status + ", seq=" + seq + ", frame_size=" + frame_size + '}';
        }
    }
}

三、鉴权代码

package main.com.util;

import javax.crypto.Mac;
import javax.crypto.spec.SecretKeySpec;
import java.net.URL;
import java.net.URLEncoder;
import java.nio.charset.Charset;
import java.text.SimpleDateFormat;
import java.util.Base64;
import java.util.Date;
import java.util.Locale;
import java.util.TimeZone;

/**
 * @Author:sjliu7
 * 鉴权使用
 * @Date:2019/7/31 15:23
 */
public class AuthUtils {
    private static final String serviceId = "simult_interpretation";
    /**
     * 生成用于鉴权的URL,websocket 接口
     * @param requestUrl
     * @param apiKey
     * @param apiSecret
     * @return final requestUrl
     */
    public static String assembleRequestUrl(String requestUrl, String apiKey, String apiSecret) {
        URL url = null;
        String  httpRequestUrl = requestUrl.replace("ws://", "http://").replace("wss://","https://" );
        try {
            url = new URL(httpRequestUrl);
            SimpleDateFormat format = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss z", Locale.US);
            // format.setTimeZone(TimeZone.getTimeZone("UTC"));
            format.setTimeZone(TimeZone.getTimeZone("GMT"));
            String date = format.format(new Date());
            // date = "Mon, 13 Dec 2021 03:05:05 GMT";
            String host = url.getHost();
            StringBuilder builder = new StringBuilder("host: ").append(host).append("\n").//
                    append("date: ").append(date).append("\n").//
                    append("GET ").append(url.getPath()).append(" HTTP/1.1");
            // System.out.println(builder);
            Charset charset = Charset.forName("UTF-8");
            Mac mac = Mac.getInstance("hmacsha256");
            // System.out.println(builder.toString());
            SecretKeySpec spec = new SecretKeySpec(apiSecret.getBytes(charset), "hmacsha256");
            mac.init(spec);
            byte[] hexDigits = mac.doFinal(builder.toString().getBytes(charset));
            String sha = Base64.getEncoder().encodeToString(hexDigits);
            // System.out.println(sha);
            String authorization = String.format("api_key=\"%s\", algorithm=\"%s\", headers=\"%s\", signature=\"%s\"", apiKey, "hmac-sha256", "host date request-line", sha);
            String authBase = Base64.getEncoder().encodeToString(authorization.getBytes(charset));
            // System.out.println(authBase);
            // System.out.println(String.format("%s?authorization=%s&host=%s&date=%s&serviceId=%s", requestUrl, URLEncoder.encode(authBase), URLEncoder.encode(host), URLEncoder.encode(date),serviceId));
            return String.format("%s?authorization=%s&host=%s&date=%s&serviceId=%s", requestUrl, URLEncoder.encode(authBase), URLEncoder.encode(host), URLEncoder.encode(date),serviceId);
        } catch (Exception e) {
            throw new RuntimeException("assemble requestUrl error:"+e.getMessage());
        }
    }
}

四、PCM转成WAV

package main.com.util;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
public class PcmToWav {
    /**
     * @param src    待转换文件路径
     * @param target 目标文件路径
     * @throws IOException 抛出异常
     */
    public static void convertAudioFiles(String src, String target) throws IOException {
        FileInputStream fis = new FileInputStream(src);
        FileOutputStream fos = new FileOutputStream(target);
        //计算长度
        byte[] buf = new byte[1024 * 4];
        int size = fis.read(buf);
        int PCMSize = 0;
        while (size != -1) {
            PCMSize += size;
            size = fis.read(buf);
        }
        fis.close();
        //填入参数，比特率等等。这里用的是16位单声道 8000 hz
        WaveHeader header = new WaveHeader();
        //长度字段 = 内容的大小（PCMSize) + 头部字段的大小(不包括前面4字节的标识符RIFF以及fileLength本身的4字节)
        header.fileLength = PCMSize + (44 - 8);
        header.FmtHdrLeth = 16;
        header.BitsPerSample = 16;
        header.Channels = 2;
        header.FormatTag = 0x0001;
        header.SamplesPerSec = 8000;
        header.BlockAlign = (short) (header.Channels * header.BitsPerSample / 8);
        header.AvgBytesPerSec = header.BlockAlign * header.SamplesPerSec;
        header.DataHdrLeth = PCMSize;
        byte[] h = header.getHeader();
        assert h.length == 44; //WAV标准，头部应该是44字节
        fos.write(h, 0, h.length);
        fis = new FileInputStream(src);
        size = fis.read(buf);
        while (size != -1) {
            fos.write(buf, 0, size);
            size = fis.read(buf);
        }
        fis.close();
        fos.close();
        System.out.println("Convert OK!");
    }
}

五、音频播放

package main.com.util;
import javax.sound.sampled.*;
import java.io.BufferedInputStream;
import java.io.ByteArrayOutputStream;
import java.io.FileInputStream;
import java.io.IOException;
public class VideoPlayerService {
    public static SourceDataLine auline = null;
    static {
        AudioFormat audioFormat=new AudioFormat(16000F, 16, 1,true,false);
        DataLine.Info info = new DataLine.Info(SourceDataLine.class, audioFormat);
        try {
            auline = (SourceDataLine) AudioSystem.getLine(info);
            auline.open(audioFormat);
            auline.start();
        } catch (LineUnavailableException e) {
            e.printStackTrace();

        } catch (Exception e) {
            e.printStackTrace();

        }
    }
    public static byte[] byteArray(String file) throws IOException {
        BufferedInputStream in = new BufferedInputStream(new FileInputStream(file));
        ByteArrayOutputStream out = new ByteArrayOutputStream(1024);
        System.out.println("Available bytes:" + in.available());
        byte[] temp = new byte[1024];
        int size = 0;
        while ((size = in.read(temp)) != -1) {
            out.write(temp, 0, size);
        }
        in.close();
        byte[] content = out.toByteArray();
        return content;
    }
    public static void videoPlay(byte[] video){
        auline.write(video,0,video.length);
    }
}

六、WAV头添加

package main.com.util;

import java.io.ByteArrayOutputStream;
import java.io.IOException;

public class WaveHeader {
    public final char fileID[] = {'R', 'I', 'F', 'F'};
    public int fileLength;
    public char wavTag[] = {'W', 'A', 'V', 'E'};
    public char FmtHdrID[] = {'f', 'm', 't', ' '};
    public int FmtHdrLeth;
    public short FormatTag;
    public short Channels;
    public int SamplesPerSec;
    public int AvgBytesPerSec;
    public short BlockAlign;
    public short BitsPerSample;
    public char DataHdrID[] = {'d', 'a', 't', 'a'};
    public int DataHdrLeth;

    public byte[] getHeader() throws IOException {
        ByteArrayOutputStream bos = new ByteArrayOutputStream();
        WriteChar(bos, fileID);
        WriteInt(bos, fileLength);
        WriteChar(bos, wavTag);
        WriteChar(bos, FmtHdrID);
        WriteInt(bos, FmtHdrLeth);
        WriteShort(bos, FormatTag);
        WriteShort(bos, Channels);
        WriteInt(bos, SamplesPerSec);
        WriteInt(bos, AvgBytesPerSec);
        WriteShort(bos, BlockAlign);
        WriteShort(bos, BitsPerSample);
        WriteChar(bos, DataHdrID);
        WriteInt(bos, DataHdrLeth);
        bos.flush();
        byte[] r = bos.toByteArray();
        bos.close();
        return r;
    }

    private void WriteShort(ByteArrayOutputStream bos, int s) throws IOException {
        byte[] mybyte = new byte[2];
        mybyte[1] = (byte) ((s << 16) >> 24);
        mybyte[0] = (byte) ((s << 24) >> 24);
        bos.write(mybyte);
    }

    private void WriteInt(ByteArrayOutputStream bos, int n) throws IOException {
        byte[] buf = new byte[4];
        buf[3] = (byte) (n >> 24);
        buf[2] = (byte) ((n << 8) >> 24);
        buf[1] = (byte) ((n << 16) >> 24);
        buf[0] = (byte) ((n << 24) >> 24);
        bos.write(buf);
    }

    private void WriteChar(ByteArrayOutputStream bos, char[] id) {
        for (char c : id) {
            bos.write(c);
        }
    }
}