使用腾讯云语音识别（一句话识别）示例

Chained1001

已于 2022-08-04 09:44:21 修改

阅读量5.5k

点赞数 1

文章标签： java 腾讯云语音识别

于 2020-06-16 15:53:16 首次发布

本文链接：https://blog.csdn.net/Chained1001/article/details/106781925

版权

人生若只如初见，何事秋风悲画扇。

整体思路

整体的思路大致为：前端页面上传音频文件，后端接收文件上传至腾讯云COS，上传成功后得到该音频的存储地址即Url，随后将Url传给调取腾讯云一句话识别（Url识别的方式）的方法，最终得出语音识别结果。

前期准备

1.注册腾讯云账号
2.获取SecretId和SecretKey
3.开通腾讯云语音识别及COS业务
4.新建一个Maven工程

代码实现

pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>org.example</groupId>
    <artifactId>DemoVoiceToWords</artifactId>
    <version>1.0-SNAPSHOT</version>
    <parent>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-parent</artifactId>
        <version>2.0.4.RELEASE</version>
    </parent>
    <dependencies>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-web</artifactId>
        </dependency>
        <!--   腾讯云COS依赖。  -->
        <dependency>
            <groupId>com.qcloud</groupId>
            <artifactId>cos_api</artifactId>
            <version>5.6.8</version>
        </dependency>
        <!--   语音识别依赖。  -->
        <dependency>
            <groupId>com.tencentcloudapi</groupId>
            <artifactId>tencentcloud-sdk-java</artifactId>
            <version>3.1.62</version>
        </dependency>
        <dependency>
            <groupId>com.qcloud</groupId>
            <artifactId>qcloud-java-sdk</artifactId>
            <version>2.0.1</version>
        </dependency>
        <!--   JsonObject依赖。    -->
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.66</version>
        </dependency>
    </dependencies>
</project>

前端Html代码

<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/html">
<head>
    <meta charset="UTF-8">
    <title>上传测试</title>
</head>
<body>
    <form action="http://localhost:8080/demo/upload" method="post" enctype="multipart/form-data">
        <input type="file" name="file"/>
        <input type="submit" value="提交"/>
    </form>
</body>
</html>

后端Controller层代码

package demorecognition.controller;

import demorecognition.util.COSUpLoadUtil;
import demorecognition.recognition.URLRecognition;
import com.alibaba.fastjson.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;

import java.io.IOException;

@Controller
@CrossOrigin
@RequestMapping("/demo")
public class DemoController {
    private static final Logger logger = LoggerFactory.getLogger(DemoController.class);
    /**
     * 语音文件上传并识别。
     * @param multipartFile
     * @return
     */
    @PostMapping("upload")
    public void upload(@RequestParam("file") MultipartFile multipartFile) {
        String identificationResults=null;
        logger.info("开始处理文件。");
        if (multipartFile==null){
            logger.info("上传的文件不存在。");
            return;
        }
        String URL=null;
        logger.info("开始文件上传至腾讯云COS。");
        COSUpLoadUtil clu=new COSUpLoadUtil();

        try {
            //multipartFile.getSize()：音频的文件大小，对应工具类里面的字节流长度。
            //multipartFile.getOriginalFilename()：文件的名称，具体看工具类。
            //multipartFile：multipartFile类型的音频文件。
            //voice：COS存储桶中存储文件的文件夹名称。（如果没有该文件夹，自动创建，根据需要设置。）
             URL = clu.upLoadFile2COS(multipartFile.getSize(),multipartFile.getOriginalFilename(),multipartFile,"voice");
        } catch (IOException e) {
            e.printStackTrace();
        }
        logger.info("开始通过URL进行语音识别。");
        URLRecognition ur = new URLRecognition();
        identificationResults = ur.getIdentificationResults(URL);
        JSONObject jsonObject = JSONObject.parseObject(identificationResults);
        //得到JsonString中的用户话语。
        String word=jsonObject.getString("Result");
        if (!word.equals("")){
            System.out.println("识别结果为："+word);
        }else {
            System.out.println("很抱歉，未能识别出您的话。");
        }
    }
}

COSUpLoadUtil代码

package demorecognition.util;

import com.qcloud.cos.COSClient;
import com.qcloud.cos.ClientConfig;
import com.qcloud.cos.auth.BasicCOSCredentials;
import com.qcloud.cos.auth.COSCredentials;
import com.qcloud.cos.model.ObjectMetadata;
import com.qcloud.cos.model.PutObjectRequest;
import com.qcloud.cos.model.PutObjectResult;
import com.qcloud.cos.region.Region;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.web.multipart.MultipartFile;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Date;
import java.util.UUID;

public class COSUpLoadUtil {
    private static final Logger logger = LoggerFactory.getLogger(COSUpLoadUtil.class);

    // 初始化用户身份信息。
    private static String secretId = "";
    private static String secretKey = "";
    // 地域。
    private static String bucketRegion = "ap-shanghai";
    // bucket名称。
    private static String bucketName = "";
    //根据需要设置，参考官方文档。
    private static String basicPath = "";

    // 创建cos客户端。
    private static COSCredentials cred = new BasicCOSCredentials(secretId, secretKey);
    private static Region region = new Region(bucketRegion);
    private static ClientConfig clientConfig = new ClientConfig(region);

    public static String upLoadFile2COS(Long fileSize, String filename, MultipartFile file, String filepath) throws IOException {


        // 创建cos客户端。
        COSClient cosClient = new COSClient(cred, clientConfig);

        // 获取输入流
        InputStream inputStream = new BufferedInputStream(file.getInputStream());
        ObjectMetadata objectMetadata = new ObjectMetadata();

        // 设置输入流长度为500。
        // 这里要强调一下，因为腾讯云支持本地文件上传和文件流上传，
        // 为了不必要的麻烦所以选择文件流上传，根据官方文档，为了
        // 避免oom，必须要设置元数据并告知输入流长度。
        objectMetadata.setContentLength(fileSize);

        //上传对象，命名采用UUID防止文件名重复。
        String key=basicPath + "/" + filepath + "/" + UUID.randomUUID().toString() + filename;
        PutObjectRequest putObjectRequest = new PutObjectRequest(bucketName, key, inputStream, objectMetadata);
        PutObjectResult putObjectResult = cosClient.putObject(putObjectRequest);

        //通过回调函数判断是否上传成功，有etag信息则表示上传成功，否则上传失败。
        if (putObjectResult.getETag() != null){
            logger.info("文件上传成功。");
        }else{
            logger.info("文件上传失败。");
            return null;
        }

        //设置过期时间为一周。
        logger.info("开始生成URL。");
        Date expiredTime = new Date(System.currentTimeMillis() + (3600L * 1000L * 24L * 7L));
        //生成访问对象的URL的String形式。
        String url = cosClient.generatePresignedUrl(bucketName, key, expiredTime).toString();

        // 完成上传之后，关闭连接。
        destory(cosClient);

        //判断生成URL是否成功。
        if (url != null){
            logger.info("url生成成功。");
            logger.info("生成的URL为："+url);
            return url;
        } else{
            logger.info("url生成失败。");
            return null;
        }
    }

    // 关闭连接。
    public static void destory(COSClient cosClient) {
        cosClient.shutdown();
    }
}

URLRecognition代码

package demorecognition.recognition;

import com.tencentcloudapi.asr.v20190614.AsrClient;
import com.tencentcloudapi.asr.v20190614.models.SentenceRecognitionRequest;
import com.tencentcloudapi.asr.v20190614.models.SentenceRecognitionResponse;
import com.tencentcloudapi.common.Credential;
import com.tencentcloudapi.common.exception.TencentCloudSDKException;
import com.tencentcloudapi.common.profile.ClientProfile;
import com.tencentcloudapi.common.profile.HttpProfile;

import java.io.UnsupportedEncodingException;

public class URLRecognition {

       public String getIdentificationResults(String url) {

           String SecretId = "";
           String SecretKey = "";
           String identificationResults=null;
           // 采用语音URL方式调用（一句话识别）。
           try {
               Credential cred = new Credential(SecretId, SecretKey);

               HttpProfile httpProfile = new HttpProfile();
               httpProfile.setEndpoint("asr.tencentcloudapi.com");

               ClientProfile clientProfile = new ClientProfile();
               clientProfile.setHttpProfile(httpProfile);

               AsrClient client = new AsrClient(cred, "ap-shanghai", clientProfile);
               //URL在参数中。（"Url":"https://ruskin-1256085166.cos.ap-shanghai.myqcloud.com/test.wav"）
               String params = "{\"ProjectId\":0,\"SubServiceType\":2,\"EngSerViceType\":\"16k_zh\",\"Source" +
                       "Type\":0,\"Url\":\"" + url + "\",\"VoiceFormat\":\"wav\",\"UsrAudioKey\":\"session-123\"}";
               SentenceRecognitionRequest req = SentenceRecognitionRequest.fromJsonString(params, SentenceRecognitionRequest.class);

               SentenceRecognitionResponse resp = client.SentenceRecognition(req);

               identificationResults = SentenceRecognitionRequest.toJsonString(resp);
           } catch (TencentCloudSDKException | UnsupportedEncodingException e) {
               System.out.println(e.toString());
           }
           return identificationResults;
       }
}