java实现阿里云的一句话录音转文字demo

最新推荐文章于 2024-06-26 09:31:26 发布

SSH_Han

最新推荐文章于 2024-06-26 09:31:26 发布

阅读量1.6k

点赞数

分类专栏：阿里云语音识别一句话录音转文字文章标签： java 语音识别

本文链接：https://blog.csdn.net/SSH_Han/article/details/113178707

版权

阿里云语音识别同时被 2 个专栏收录

1 篇文章 0 订阅

订阅专栏

一句话录音转文字

1 篇文章 0 订阅

订阅专栏

java实现阿里云的一句话录音[1分钟之内的语音]转文字demo

官方文档

https://help.aliyun.com/document_detail/92131.html?spm=a2c4g.11186623.6.574.1e1672194f1IFG

效果展示

在这里插入图片描述

具体步骤

1.获取accessToken
1.1管理端（阿里云智能语音交互）获取（容易失效）
在这里插入图片描述

1.2 代码获取
依赖文件如下：

<dependency>
    <groupId>com.aliyun</groupId>
    <artifactId>aliyun-java-sdk-core</artifactId>
    <version>3.7.1</version>
</dependency>

<!--注意fastjson包是否重复引入 -->
<dependency>
    <groupId>com.alibaba</groupId>
    <artifactId>fastjson</artifactId>
    <version>1.2.49</version>
</dependency>

具体获取accessToken代码：

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.aliyuncs.CommonRequest;
import com.aliyuncs.CommonResponse;
import com.aliyuncs.DefaultAcsClient;
import com.aliyuncs.IAcsClient;
import com.aliyuncs.exceptions.ClientException;
import com.aliyuncs.http.MethodType;
import com.aliyuncs.http.ProtocolType;
import com.aliyuncs.profile.DefaultProfile;

import java.text.SimpleDateFormat;
import java.util.Date;

public class CreateTokenDemo {

    // 地域ID
    private static final String REGIONID = "cn-shanghai";
    // 获取Token服务域名
    private static final String DOMAIN = "nls-meta.cn-shanghai.aliyuncs.com";
    // API版本
    private static final String API_VERSION = "2019-02-28";
    // API名称
    private static final String REQUEST_ACTION = "CreateToken";

    // 响应参数
    private static final String KEY_TOKEN = "Token";
    private static final String KEY_ID = "Id";
    private static final String KEY_EXPIRETIME = "ExpireTime";


    public static void main(String args[]) throws ClientException {
        // 改用自己账号的accessKeyId和accessKeySecret！！！
        String accessKeyId = "2QWEAFS231qewga24qw";
        String accessKeySecret = "21q3wrfar2q23WQDSWawf2qqewd2";

        // 创建DefaultAcsClient实例并初始化
        DefaultProfile profile = DefaultProfile.getProfile(
                REGIONID,
                accessKeyId,
                accessKeySecret);

        IAcsClient client = new DefaultAcsClient(profile);
        CommonRequest request = new CommonRequest();
        request.setDomain(DOMAIN);
        request.setVersion(API_VERSION);
        request.setAction(REQUEST_ACTION);
        request.setMethod(MethodType.POST);
        request.setProtocol(ProtocolType.HTTPS);

        CommonResponse response = client.getCommonResponse(request);
        System.out.println(response.getData());
        if (response.getHttpStatus() == 200) {
            JSONObject result = JSON.parseObject(response.getData());
            String token = result.getJSONObject(KEY_TOKEN).getString(KEY_ID);
            long expireTime = result.getJSONObject(KEY_TOKEN).getLongValue(KEY_EXPIRETIME);
            System.out.println("获取到的Token： " + token + "，有效期时间戳(单位：秒): " + expireTime);
            // 将10位数的时间戳转换为北京时间
            String expireDate = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date(expireTime * 1000));
            System.out.println("Token有效期的北京时间：" + expireDate);
        }
        else {
            System.out.println("获取Token失败！");
        }
    }
}

2.直接使用RESTFUL API 方式调用
在这里插入图片描述
依赖如下：

<dependency>
    <groupId>com.squareup.okhttp3</groupId>
    <artifactId>okhttp</artifactId>
    <version>3.9.1</version>
</dependency>

<!-- http://mvnrepository.com/artifact/com.alibaba/fastjson -->
<dependency>
    <groupId>com.alibaba</groupId>
    <artifactId>fastjson</artifactId>
    <version>1.2.42</version>
</dependency>

具体语音转文字代码：

import com.alibaba.fastjson.JSONPath;
import com.voicecomm.socket.utils.HttpUtil;

import java.util.HashMap;

public class SpeechRecognizerRESTfulDemo {
    private String accessToken;
    private String appkey;

    public SpeechRecognizerRESTfulDemo(String appkey, String token) {
        this.appkey = appkey;
        this.accessToken = token;
    }

    public void process(String fileName, String format, int sampleRate,
                        boolean enablePunctuationPrediction,
                        boolean enableInverseTextNormalization,
                        boolean enableVoiceDetection) {

        /**
         * 设置HTTP RESTful POST请求：
         * 1.使用HTTP协议。
         * 2.语音识别服务域名：nls-gateway.cn-shanghai.aliyuncs.com。
         * 3.语音识别接口请求路径：/stream/v1/asr。
         * 4.设置必选请求参数：appkey、format、sample_rate。
         * 5.设置可选请求参数：enable_punctuation_prediction、enable_inverse_text_normalization、enable_voice_detection。
         */
        String url = "http://nls-gateway.cn-shanghai.aliyuncs.com/stream/v1/asr";
        String request = url;
        request = request + "?appkey=" + appkey;
        request = request + "&format=" + format;
        request = request + "&sample_rate=" + sampleRate;
        if (enablePunctuationPrediction) {
            request = request + "&enable_punctuation_prediction=" + true;
        }
        if (enableInverseTextNormalization) {
            request = request + "&enable_inverse_text_normalization=" + true;
        }
        if (enableVoiceDetection) {
            request = request + "&enable_voice_detection=" + true;
        }

        System.out.println("Request: " + request);

        /**
         * 设置HTTP头部字段：
         * 1.鉴权参数。
         * 2.Content-Type：application/octet-stream。
         */
        HashMap<String, String> headers = new HashMap<String, String>();
        headers.put("X-NLS-Token", this.accessToken);
        headers.put("Content-Type", "application/octet-stream");

        /**
         * 发送HTTP POST请求，返回服务端的响应。
         */
        String response = HttpUtil.sendPostFile(request, headers, fileName);

        if (response != null) {
            System.out.println("Response: " + response);
            String result = JSONPath.read(response, "result").toString();
            System.out.println("识别结果：" + result);
        } else {
            System.err.println("识别失败!");
        }

    }

    public static void main(String[] args) {
		// 改用自己账号的appkey和获取的最新accessToken !!!
        String token = "22fcdb54bb29456cbd3e9c86625f304b";
        String appkey = "DWQ12dwe132fas2f";

        SpeechRecognizerRESTfulDemo demo = new SpeechRecognizerRESTfulDemo(appkey, token);

//        String fileName = SpeechRecognizerRESTfulDemo.class.getClassLoader().getResource("./nls-sample-16k.wav").getPath();
        // 录音文件全路径，一定是全路径!!!不然会报"The filePath is not a file:"
        String fileName = "C:\\Users\\Administrator\\Downloads\\demo_8k\\wav\\020390496.wav";
        String format = "pcm";
        // 看你的录音是8k还是16k!!!
        int sampleRate = 8000;
        boolean enablePunctuationPrediction = true;
        boolean enableInverseTextNormalization = true;
        boolean enableVoiceDetection = false;

        demo.process(fileName, format, sampleRate, enablePunctuationPrediction, enableInverseTextNormalization, enableVoiceDetection);
    }
}

HttpUtil：

import okhttp3.*;
import java.io.File;
import java.io.IOException;
import java.net.SocketTimeoutException;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.TimeUnit;

public class HttpUtil {

    private static String getResponseWithTimeout(Request q) {
        String ret = null;

        OkHttpClient.Builder httpBuilder = new OkHttpClient.Builder();
        OkHttpClient client = httpBuilder.connectTimeout(10, TimeUnit.SECONDS)
                .readTimeout(60, TimeUnit.SECONDS)
                .writeTimeout(60, TimeUnit.SECONDS)
                .build();

        try {
            Response s = client.newCall(q).execute();
            ret = s.body().string();
            s.close();
        } catch (SocketTimeoutException e) {
            ret = null;
            System.err.println("get result timeout");
        } catch (IOException e) {
            System.err.println("get result error " + e.getMessage());
        }

        return ret;
    }

    public static String sendPostFile(String url, HashMap<String, String> headers, String fileName) {
        RequestBody body;

        File file = new File(fileName);
        if (!file.isFile()) {
                System.err.println("The filePath is not a file: " + fileName);
            return null;
        } else {
            body = RequestBody.create(MediaType.parse("application/octet-stream"), file);
        }

        Headers.Builder hb = new Headers.Builder();
        if (headers != null && !headers.isEmpty()) {
            for (Map.Entry<String, String> entry : headers.entrySet()) {
                hb.add(entry.getKey(), entry.getValue());
            }
        }

        Request request = new Request.Builder()
                .url(url)
                .headers(hb.build())
                .post(body)
                .build();

        return getResponseWithTimeout(request);
    }

    public static String sendPostData(String url, HashMap<String, String> headers, byte[] data) {
        RequestBody body;

        if (data.length == 0) {
            System.err.println("The send data is empty.");
            return null;
        } else {
            body = RequestBody.create(MediaType.parse("application/octet-stream"), data);
        }

        Headers.Builder hb = new Headers.Builder();
        if (headers != null && !headers.isEmpty()) {
            for (Map.Entry<String, String> entry : headers.entrySet()) {
                hb.add(entry.getKey(), entry.getValue());
            }
        }

        Request request = new Request.Builder()
                .url(url)
                .headers(hb.build())
                .post(body)
                .build();

        return getResponseWithTimeout(request);
    }
}