最近在弄语音识别这块,然后就看了下百度的语音识别:
这里介绍了使用java实验语音识别。首先你需要的东西:
1、一个pcm或这个wav格式的语音文件(我使用的是官网提供的案列16k.pcm)点击下载。
2、准备sdk包(将这个包的jar包通过build path附上就可以)点击下载;
3、需要在百度云上申请一个百度语音的项目,然后将apikey和secretkey复制到项目中。
注意的是:如果我的语音文件是16k的,rate就是16000,8k的rate就是8000(可以参考官方文档).cuid填本机mac地址就可以了,这个值没有什么要求,我用的就是sercretkey。
下面代码:
package test;
import java.io.BufferedReader;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLDecoder;
import java.net.URLEncoder;
import javax.xml.bind.DatatypeConverter;
import org.json.JSONObject;
public class demo2 {
private static final String serverURL = "http://vop.baidu.com/server_api";
private static String token = "";
private static final String testFileName = "D:\\workspace\\test2\\src\\16k.pcm"; // 百度语音提供技术支持
//put your own params here
// 下面3个值要填写自己申请的app对应的值
private static final String apiKey = "jrDWBaBvmhPBYQRjlT7Fk1yk";
private static final String secretKey = "wI2MZhBGKthjBFdyo1miAFsforCesYbX";
private static final String cuid = "wI2MZhBGKthjBFdyo1miAFsforCesYbX";
public static void main(String[] args) throws Exception {
getToken();
method1();
method2();
}
private static void getToken() throws Exception {
String getTokenURL = "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials" +
"&client_id=" + apiKey + "&client_secret=" + secretKey;
HttpURLConnection conn = (HttpURLConnection) new URL(getTokenURL).openConnection();
token = new JSONObject(printResponse(conn)).getString("access_token");
System.out.println(token);
}
private static void method1() throws Exception {
File pcmFile = new File(testFileName);
System.out.println(pcmFile.exists());
HttpURLConnection conn = (HttpURLConnection) new URL(serverURL).openConnection();
// construct params
JSONObject params = new JSONObject();
params.put("format", "pcm");
params.put("rate", 16000);
params.put("channel", "1");
params.put("token", token);
params.put("lan", "zh");
params.put("cuid", cuid);
params.put("len", pcmFile.length());
params.put("speech", DatatypeConverter.printBase64Binary(loadFile(pcmFile)));
// add request header
conn.setRequestMethod("POST");
conn.setRequestProperty("Content-Type", "application/json; charset=utf-8");
conn.setDoInput(true);
conn.setDoOutput(true);
// send request
DataOutputStream wr = new DataOutputStream(conn.getOutputStream());
wr.writeBytes(params.toString());
wr.flush();
wr.close();
printResponse(conn);
}
private static void method2() throws Exception {
File pcmFile = new File(testFileName);
HttpURLConnection conn = (HttpURLConnection) new URL(serverURL
+ "?cuid=" + cuid + "&token=" + token).openConnection();
// add request header
conn.setRequestMethod("POST");
conn.setRequestProperty("Content-Type", "audio/pcm; rate=16000");
conn.setDoInput(true);
conn.setDoOutput(true);
// send request
DataOutputStream wr = new DataOutputStream(conn.getOutputStream());
wr.write(loadFile(pcmFile));
wr.flush();
wr.close();
System.out.println(printResponse(conn));
}
private static String printResponse(HttpURLConnection conn) throws Exception {
if (conn.getResponseCode() != 200) {
// request error
System.out.println("conn.getResponseCode() = " + conn.getResponseCode());
return "";
}
InputStream is = conn.getInputStream();
BufferedReader rd = new BufferedReader(new InputStreamReader(is));
String line;
StringBuffer response = new StringBuffer();
while ((line = rd.readLine()) != null) {
response.append(line);
response.append('\r');
}
rd.close();
System.out.println(new JSONObject(response.toString()).toString(4));
return response.toString();
}
private static byte[] loadFile(File file) throws IOException {
InputStream is = new FileInputStream(file);
long length = file.length();
byte[] bytes = new byte[(int) length];
int offset = 0;
int numRead = 0;
while (offset < bytes.length
&& (numRead = is.read(bytes, offset, bytes.length - offset)) >= 0) {
offset += numRead;
}
if (offset < bytes.length) {
is.close();
throw new IOException("Could not completely read file " + file.getName());
}
is.close();
return bytes;
}
}
运行结果:
{
"scope": "public brain_all_scope audio_voice_assistant_get audio_tts_post wise_adapt lebo_resource_base lightservice_public hetu_basic lightcms_map_poi kaidian_kaidian ApsMisTest_Test权限 vis-classify_flower bnstest_fasf lpq_开放",
"session_secret": "a1872f3e791eeb5adf327086e6a0d769",
"expires_in": 2592000,
"refresh_token": "25.f74b044ed07383f7643b8259fb9af970.315360000.1827655515.282335-10467473",
"session_key": "9mzdWT+D4w5xoNHb1JejPBZ1qZKj90nUTAVDhweqqQJjH1o4PvJAYHgT8QynTP9BCVSjxA5tihkh2QCabkExrQFTpcK1VQ==",
"access_token": "24.be8efa5a534453370396a4accce50aa6.2592000.1514887515.282335-10467473"
}
24.be8efa5a534453370396a4accce50aa6.2592000.1514887515.282335-10467473
true
{
"result": ["北京科技馆,"],
"err_msg": "success.",
"sn": "592584910701512295515",
"err_no": 0,
"corpus_no": "6495259779570311967"
}
{
"result": ["北京科技馆,"],
"err_msg": "success.",
"sn": "587847696871512295516",
"err_no": 0,
"corpus_no": "6495259783832855718"
}
{"corpus_no":"6495259783832855718","err_msg":"success.","err_no":0,"result":["北京科技馆,"],"sn":"587847696871512295516"}