通过阿里云平台工具实现文字转语音功能

package com.comwinwin.project.speech;

import com.alibaba.nls.client.AccessToken;
import com.ruoyi.common.config.RuoYiConfig;
import com.ruoyi.common.core.domain.Response;
import com.ruoyi.common.utils.RandomUtil;
import io.swagger.annotations.Api;
import io.swagger.annotations.ApiOperation;
import org.springframework.beans.factory.annotation.Value;
import lombok.extern.log4j.Log4j2;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;

import javax.annotation.Resource;
import javax.servlet.http.HttpServletRequest;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

@Log4j2
@Api(value = "文字识别controller", tags = {"b文字识别接口"})
@RestController
public class SpeechSynthesizerLongTextController {

    @ApiOperation("识别文字转语音")
    @PostMapping("/api/SpeechSynthesizer/longText.do")
    public Response<String> SpeechSynthesizerLongText(HttpServletRequest request,@RequestParam(required = false) String ttsTextLong){
        try {
            if(ttsTextLong!=null&&!"".equals(ttsTextLong)){
                ttsTextLong = textUtil.getText(ttsTextLong);
            }
            //参数自己设置
            AccessToken token = new AccessToken("", "");
            token.apply();
            String accessToken = token.getToken();
            long expireTime = token.getExpireTime();

            String appKey = "";
            String url = ""; // 默认即可,默认值:wss://nls-gateway.cn-shanghai.aliyuncs.com/ws/v1

            String videoPath = "/mp3/" + new SimpleDateFormat("yyyyMMdd").format(new Date()) + "/";
            String basePath = getContextPath(request) + "/profile" + videoPath;
            String path = RuoYiConfig.getProfile() + videoPath;

            String filename = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date()) + RandomUtil.getRandom(6);
            String urlPath = basePath + filename + ".mp3";
            String file = path + filename + ".mp3";

            Path pathDir = Paths.get(path);
            try {
                if (!Files.exists(pathDir))
                    Files.createDirectories(pathDir);
            } catch (IOException e) {
                e.printStackTrace();
            }
            File out = new File(file);
            FileOutputStream fout = new FileOutputStream(out);
            // 初期并不知道wav文件实际长度,假设为0,最后再校正
            int pcmSize = 0;
            WavHeader header = new WavHeader();
            // 长度字段 = 内容的大小(PCMSize) + 头部字段的大小(不包括前面4字节的标识符RIFF以及fileLength本身的4字节)
            header.fileLength = pcmSize + (44 - 8);
            header.fmtHdrLeth = 16;
            header.bitsPerSample = 16;
            header.channels = 1;
            header.formatTag = 0x0001;
            header.samplesPerSec = 16000;
            header.blockAlign = (short) (header.channels * header.bitsPerSample / 8);
            header.avgBytesPerSec = header.blockAlign * header.samplesPerSec;
            header.dataHdrLeth = pcmSize;
            byte[] h = header.getHeader();
            assert h.length == 44;
            // 先写入44字节的wav头,如果合成的不是wav,比如是pcm,则不需要此步骤
            fout.write(h);

            SpeechSynthesizerLongTextDemo demo = new SpeechSynthesizerLongTextDemo(appKey, accessToken, url);
            demo.process(ttsTextLong, fout);
            demo.shutdown();
            // 更新44字节的wav头,如果合成的不是wav,比如是pcm,则不需要此步骤
            RandomAccessFile wavFile = new RandomAccessFile(file, "rw");
            int fileLength = (int)wavFile.length();
            int dataSize = fileLength - 44;
            System.out.println("filelength = " + fileLength +", datasize = " + dataSize);
            header.fileLength = fileLength - 8;
            header.dataHdrLeth = fileLength - 44;
            wavFile.write(header.getHeader());
            wavFile.close();
            return Response.success(urlPath);
        }catch (IOException e){
            log.info("文字识别失败! "+e);
            return Response.error("");
        }
    }

    private String getContextPath(HttpServletRequest request) {
        return request.getScheme() + "://" + request.getServerName() + ":" + request.getServerPort() + request.getContextPath();
    }

}
package com.comwinwin.project.speech;

import com.alibaba.nls.client.protocol.NlsClient;
import com.alibaba.nls.client.protocol.OutputFormatEnum;
import com.alibaba.nls.client.protocol.SampleRateEnum;
import com.alibaba.nls.client.protocol.tts.SpeechSynthesizer;
import com.alibaba.nls.client.protocol.tts.SpeechSynthesizerListener;
import com.alibaba.nls.client.protocol.tts.SpeechSynthesizerResponse;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;

/**
 * 此示例: tts 支持最多300个字符,此demo展示超过300字符的调用方式
 * 说明:这个示例和长文本语音合成并不完全相同,长文本语音合成是单独的产品功能,是将一长串文本直接发送给服务端去合成;
 * 而此处演示的是将一长串文本在调用方处切割然后分段调用语音合成接口
 */
public class SpeechSynthesizerLongTextDemo {
    private String appKey;
    NlsClient client;

    /// 直接传递token进来
    public SpeechSynthesizerLongTextDemo(String appKey, String token, String url) {
        this.appKey = appKey;
        //TODO 重要提示 创建NlsClient实例,应用全局创建一个即可,生命周期可和整个应用保持一致,默认服务地址为阿里云线上服务地址
        if(url.isEmpty()) {
            client = new NlsClient(token);
        }else {
            client = new NlsClient(url, token);
        }
    }

    private static SpeechSynthesizerListener getSynthesizerListener(final FileOutputStream fout) {
        SpeechSynthesizerListener listener = null;
        try {
            listener = new SpeechSynthesizerListener() {
                int totalSize = 0;
                //语音合成结束
                @Override
                public void onComplete(SpeechSynthesizerResponse response) {
                    System.out.println("task_id: " + response.getTaskId() +
                            ", name: " + response.getName() + ", status: " + response.getStatus());
                    System.out.println("onComplete, totalsize = " + totalSize);

                }

                //语音合成的语音二进制数据
                @Override
                public void onMessage(ByteBuffer message) {
                    try {
                        byte[] bytesArray = new byte[message.remaining()];
                        message.get(bytesArray, 0, bytesArray.length);
                        System.out.println("write arrya:" + bytesArray.length);
                        totalSize += bytesArray.length;
                        fout.write(bytesArray);
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }

                @Override
                public void onFail(SpeechSynthesizerResponse response) {
                    // 重要提示: task_id很重要,是调用方和服务端通信的唯一ID标识,当遇到问题时,需要提供此task_id以便排查
                    System.out.println(
                        "task_id: " + response.getTaskId() +
                            //状态码 20000000 表示识别成功
                            ", status: " + response.getStatus() +
                            //错误信息
                            ", status_text: " + response.getStatusText());
                }
            };
        } catch (Exception e) {
            e.printStackTrace();
        }
        return listener;
    }

    public void process(final String longText, final FileOutputStream fout) {
        List<String> textArr = splitLongText(longText, 100);
        SpeechSynthesizer synthesizer = null;
        try {
            //创建实例,建立连接
            synthesizer = new SpeechSynthesizer(client, getSynthesizerListener(fout));
            synthesizer.setAppKey(appKey);
            //此处一定要设置为pcm格式,才能将多次结果拼接起来
            synthesizer.setFormat(OutputFormatEnum.PCM);
            //设置返回音频的采样率
            synthesizer.setSampleRate(SampleRateEnum.SAMPLE_RATE_16K);

            for (int i = 0; i < textArr.size(); i++) {
                //设置用于语音合成的文本
                synthesizer.setText(textArr.get(i));
                //此方法将以上参数设置序列化为json发送给服务端,并等待服务端确认
                synthesizer.start();
                //等待语音合成结束
                synthesizer.waitForComplete();
            }

        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            //关闭连接
            if (null != synthesizer) {
                synthesizer.close();
            }
        }
    }

    /**
     * 将长文本切分为每句字数不大于size数目的短句
     * @param text
     * @param size
     * @return
     */
    public static List<String> splitLongText(String text, int size) {
        //先按标点符号切分
        String[] texts = text.split("[、,。;?!,!\\?]");
        StringBuilder textPart = new StringBuilder();
        List<String> result = new ArrayList<String>();
        int len = 0;
        //再按size merge,避免标点符号切分出来的太短
        for (int i = 0; i < texts.length; i++) {
            if (textPart.length() + texts[i].length() + 1 > size) {
                result.add(textPart.toString());
                textPart.delete(0, textPart.length());

            }
            textPart.append(texts[i]);
            len += texts[i].length();
            if(len<text.length()){
                //System.out.println("at " + text.charAt(len));
                textPart.append(text.charAt(len));
                len += 1;
            }

        }
        if (textPart.length() > 0) {
            result.add(textPart.toString());
        }

        return result;

    }

    public void shutdown() {
        client.shutdown();
    }

    public static byte[] int2byte(int intData) {
        byte[] byteData = new byte[4];
        byteData[0] = (byte) (0xff & (intData >> 24));
        byteData[1] = (byte) (0xff & (intData >> 16));
        byteData[2] = (byte) (0xff & (intData >> 8));
        byteData[3] = (byte) (0xff & intData);
        return byteData;
    }

    public static byte[] short2byte(short s) {
        byte[] byteData = new byte[2];
        byteData[0] = (byte) (0xff & (s >> 8));
        byteData[1] = (byte) (0xff & s);
        return byteData;
    }

    public static void main(String[] args) throws Exception {
        String appKey = "你的appkey";
        String token = "你的token";
        String url = ""; // 默认即可,默认值:wss://nls-gateway.cn-shanghai.aliyuncs.com/ws/v1

        String ttsTextLong = "百草堂与三味书屋 鲁迅 \n" +
            "我家的后面有一个很大的园,相传叫作百草园。现在是早已并屋子一起卖给朱文公的子孙了,连那最末次的相见也已经隔了七八年,其中似乎确凿只有一些野草;但那时却是我的乐园。\n" +
            "不必说碧绿的菜畦,光滑的石井栏,高大的皂荚树,紫红的桑葚;也不必说鸣蝉在树叶里长吟,肥胖的黄蜂伏在菜花上,轻捷的叫天子(云雀)忽然从草间直窜向云霄里去了。\n" +
            "单是周围的短短的泥墙根一带,就有无限趣味。油蛉在这里低唱,蟋蟀们在这里弹琴。翻开断砖来,有时会遇见蜈蚣;还有斑蝥,倘若用手指按住它的脊梁,便会啪的一声,\n" +
            "从后窍喷出一阵烟雾。何首乌藤和木莲藤缠络着,木莲有莲房一般的果实,何首乌有臃肿的根。有人说,何首乌根是有像人形的,吃了便可以成仙,我于是常常拔它起来,牵连不断地拔起来,\n" +
            "也曾因此弄坏了泥墙,却从来没有见过有一块根像人样! 如果不怕刺,还可以摘到覆盆子,像小珊瑚珠攒成的小球,又酸又甜,色味都比桑葚要好得远......";

        String path = "longText4TTS.wav";
        File out = new File(path);
        FileOutputStream fout = new FileOutputStream(out);

        // 初期并不知道wav文件实际长度,假设为0,最后再校正
        int pcmSize = 0;
        WavHeader header = new WavHeader();
        // 长度字段 = 内容的大小(PCMSize) + 头部字段的大小(不包括前面4字节的标识符RIFF以及fileLength本身的4字节)
        header.fileLength = pcmSize + (44 - 8);
        header.fmtHdrLeth = 16;
        header.bitsPerSample = 16;
        header.channels = 1;
        header.formatTag = 0x0001;
        header.samplesPerSec = 16000;
        header.blockAlign = (short) (header.channels * header.bitsPerSample / 8);
        header.avgBytesPerSec = header.blockAlign * header.samplesPerSec;
        header.dataHdrLeth = pcmSize;
        byte[] h = header.getHeader();
        assert h.length == 44;

        // 先写入44字节的wav头,如果合成的不是wav,比如是pcm,则不需要此步骤
        fout.write(h);

        SpeechSynthesizerLongTextDemo demo = new SpeechSynthesizerLongTextDemo(appKey, token, url);
        demo.process(ttsTextLong, fout);
        demo.shutdown();

        // 更新44字节的wav头,如果合成的不是wav,比如是pcm,则不需要此步骤
        RandomAccessFile wavFile = new RandomAccessFile(path, "rw");
        int fileLength = (int)wavFile.length();
        int dataSize = fileLength - 44;
        System.out.println("filelength = " + fileLength +", datasize = " + dataSize);
        header.fileLength = fileLength - 8;
        header.dataHdrLeth = fileLength - 44;
        wavFile.write(header.getHeader());
        wavFile.close();
    }
}
package com.comwinwin.project.speech;

import java.io.ByteArrayOutputStream;
import java.io.IOException;

class WavHeader {
    /**
     * 4 资源交换文件标志(RIFF)
     */
    public final char fileID[] = {'R', 'I', 'F', 'F'};
    /**
     * 4 总字节数
     */
    public int fileLength;
    /**
     * 4 WAV文件标志(WAVE)
     */
    public char wavTag[] = {'W', 'A', 'V', 'E'};
    /**
     * 4 波形格式标志(fmt ),最后一位空格
     */
    public char fmtHdrID[] = {'f', 'm', 't', ' '};
    /**
     * 4 过滤字节(一般为00000010H),若为00000012H则说明数据头携带附加信息
     */
    public int fmtHdrLeth;
    /**
     * 2 格式种类(值为1时,表示数据为线性PCM编码)
     */
    public short formatTag;
    /**
     * 2 通道数,单声道为1,双声道为2
     */
    public short channels;
    /**
     * 4 采样频率
     */
    public int samplesPerSec;
    /**
     * 4 波形数据传输速率(每秒平均字节数)
     */
    public int avgBytesPerSec;
    /**
     * 2 DATA数据块长度,字节
     */
    public short blockAlign;
    /**
     * 2 PCM位宽
     */
    public short bitsPerSample;
    /**
     * 4 数据标志符(data)
     */
    public char dataHdrID[] = {'d', 'a', 't', 'a'};
    /**
     * 4 DATA总数据长度字节
     */
    public int dataHdrLeth;
    public byte[] getHeader() throws IOException {
        ByteArrayOutputStream bos = new ByteArrayOutputStream();
        WriteChar(bos, fileID);
        WriteInt(bos, fileLength);
        WriteChar(bos, wavTag);
        WriteChar(bos, fmtHdrID);
        WriteInt(bos, fmtHdrLeth);
        WriteShort(bos, formatTag);
        WriteShort(bos, channels);
        WriteInt(bos, samplesPerSec);
        WriteInt(bos, avgBytesPerSec);
        WriteShort(bos, blockAlign);
        WriteShort(bos, bitsPerSample);
        WriteChar(bos, dataHdrID);
        WriteInt(bos, dataHdrLeth);
        bos.flush();
        byte[] r = bos.toByteArray();
        bos.close();
        return r;
    }
    private void WriteShort(ByteArrayOutputStream bos, int s) throws IOException {
        byte[] mybyte = new byte[2];
        mybyte[1] = (byte) ((s << 16) >> 24);
        mybyte[0] = (byte) ((s << 24) >> 24);
        bos.write(mybyte);
    }
    private void WriteInt(ByteArrayOutputStream bos, int n) throws IOException {
        byte[] buf = new byte[4];
        buf[3] = (byte) (n >> 24);
        buf[2] = (byte) ((n << 8) >> 24);
        buf[1] = (byte) ((n << 16) >> 24);
        buf[0] = (byte) ((n << 24) >> 24);
        bos.write(buf);
    }
    private void WriteChar(ByteArrayOutputStream bos, char[] id) {
        for (int i = 0; i < id.length; i++) {
            char c = id[i];
            bos.write(c);
        }
    }
}

maven引用

<!--        文字转语音-->
        <dependency>
            <groupId>com.alibaba.nls</groupId>
            <artifactId>nls-sdk-tts</artifactId>
            <version>2.2.1</version>
        </dependency>
        <dependency>
            <groupId>com.alibaba.nls</groupId>
            <artifactId>nls-sdk-common</artifactId>
            <version>2.1.6</version>
        </dependency>

  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
您可以使用阿里云的语音合成服务来实现Java文字转语音功能。以下是基本步骤: 1. 登录阿里云控制台,进入语音合成服务页面,创建一个语音合成应用,获取accessKeyId和accessKeySecret。 2. 下载并引入阿里云提供的Java SDK,配置accessKeyId和accessKeySecret。 3. 调用SDK提供的方法,设置需要转换的文本内容和音频格式等参数,调用语音合成接口,获取语音文件。 4. 播放语音文件或保存语音文件到本地。 以下是一个简单的示例代码: ``` import com.aliyuncs.DefaultAcsClient; import com.aliyuncs.exceptions.ClientException; import com.aliyuncs.exceptions.ServerException; import com.aliyuncs.profile.DefaultProfile; import com.aliyuncs.green.model.v20170112.VoiceIdentityCheckRequest; import com.aliyuncs.green.model.v20170112.VoiceIdentityCheckResponse; import com.aliyuncs.green.model.v20170112.VoiceIdentityUnbindRequest; import com.aliyuncs.green.model.v20170112.VoiceIdentityUnbindResponse; import com.aliyuncs.green.model.v20170112.VoiceStartCheckRequest; import com.aliyuncs.green.model.v20170112.VoiceStartCheckResponse; import com.aliyuncs.green.model.v20170112.VoiceStartRegisterRequest; import com.aliyuncs.green.model.v20170112.VoiceStartRegisterResponse; import com.aliyuncs.green.model.v20170112.VoiceSubmitRequest; import com.aliyuncs.green.model.v20170112.VoiceSubmitResponse; import com.aliyuncs.http.MethodType; public class AliyunVoiceSDKSample { /** * accessKeyId和accessKeySecret是阿里云账号的访问密钥,可以在阿里云控制台的AccessKey管理页面创建和查看。 * 访问密钥信息需要严格保密,只有获得者才能访问阿里云资源和API,请勿泄露。 */ private static final String ACCESS_KEY_ID = "<your_access_key_id>"; private static final String ACCESS_KEY_SECRET = "<your_access_key_secret>"; public static void main(String[] args) { // 创建DefaultAcsClient实例并配置Endpoint DefaultProfile profile = DefaultProfile.getProfile("cn-hangzhou", ACCESS_KEY_ID, ACCESS_KEY_SECRET); DefaultAcsClient client = new DefaultAcsClient(profile); // 创建VoiceStartRegisterRequest并设置参数 VoiceStartRegisterRequest request = new VoiceStartRegisterRequest(); request.setMethod(MethodType.POST); request.setIdentityType(1); // 设置身份类型,1为手机号码 request.setIdentity("13800000000"); // 设置手机号码 request.setBizType("default"); // 设置业务类型 request.setSource("default"); // 设置注册来源 try { // 调用VoiceStartRegister接口进行语音验证码注册 VoiceStartRegisterResponse response = client.getAcsResponse(request); String registerId = response.getRegisterId(); // 获取注册ID // 创建VoiceStartCheckRequest并设置参数 VoiceStartCheckRequest checkRequest = new VoiceStartCheckRequest(); checkRequest.setMethod(MethodType.POST); checkRequest.setIdentityType(1); // 设置身份类型,1为手机号码 checkRequest.setIdentity("13800000000"); // 设置手机号码 checkRequest.setBizType("default"); // 设置业务类型 checkRequest.setSource("default"); // 设置注册来源 checkRequest.setRegisterId(registerId); // 设置注册ID // 调用VoiceStartCheck接口进行语音验证 VoiceStartCheckResponse checkResponse = client.getAcsResponse(checkRequest); String checkCode = checkResponse.getCheckCode(); // 获取语音验证码 // 提交语音验证码 VoiceSubmitRequest submitRequest = new VoiceSubmitRequest(); submitRequest.setMethod(MethodType.POST); submitRequest.setIdentityType(1); // 设置身份类型,1为手机号码 submitRequest.setIdentity("13800000000"); // 设置手机号码 submitRequest.setCheckCode(checkCode); // 设置语音验证码 submitRequest.setBizType("default"); // 设置业务类型 submitRequest.setSource("default"); // 设置注册来源 // 调用VoiceSubmit接口提交语音验证码 VoiceSubmitResponse submitResponse = client.getAcsResponse(submitRequest); boolean success = submitResponse.getSuccess(); // 获取提交结果 if (success) { System.out.println("语音验证码验证成功!"); } else { System.out.println("语音验证码验证失败!"); } } catch (ServerException e) { e.printStackTrace(); } catch (ClientException e) { e.printStackTrace(); } } } ``` 注意:以上示例代码仅为演示语音验证码注册和验证的流程,具体的文字转语音功能实现需要参考阿里语音合成服务的API文档,并按照文档说明调用相关接口。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值