百度实时语音识别示例教程-基于libwebsockets库,linux c++版本
准备
1. 开通百度关于实时语音项目的内容。成功后,会有以下结果。用到的只有appid和api key。

2. 在百度控制台那边领取免费额度,下图右下角。成功后,会有下图底部的总量10小时余额。

3. 准备要识别的pcm语音文件,可以到百度云sdk页面下载sdk,然后sdk压缩包里面就有pcm文件,我用的文件名为“16k-0.pcm”。例如https://ai.baidu.com/sdk#asr
4. 编译源代码,并执行。
编译命令: g++ main.cpp -lwebsockets -lpthread -o aa
执行: aa
#include <stdio.h>
#include <stdbool.h>
#include <string.h>
#include <stdlib.h>
#include <fcntl.h>
#include <iostream>
#include <mutex>
#include <thread>
#include <libwebsockets.h>
struct lws *websocket;
using namespace std;
void *readFileToMem(string file, size_t *read_size)
{
FILE *fp;
size_t file_size;
void *mem = 0;
fp = fopen(file.c_str(), "rb");
if (fp == NULL) {
printf("failed to open input file %s\n", file.c_str());
*read_size = 0;
return 0;
}
fseek(fp, 0L, SEEK_END);
file_size = ftell(fp);
rewind(fp);
if (file_size <= 0)
{
fclose(fp);
*read_size = 0;
return 0;
}
mem = malloc(file_size);
*read_size = fread(mem, 1, file_size, fp);
if (*read_size != file_size)
{
free(mem);
*read_size = 0;
mem = 0;
}
fclose(fp);
return mem;
}
int send_websockets(const void *data, int size, enum lws_write_protocol protocol)
{
int ret;
unsigned char *buf;
buf = (unsigned char *)malloc(LWS_PRE + size);
memcpy(&buf[LWS_PRE], data, size);
ret = lws_write(websocket, &buf[LWS_PRE], size, protocol);
free(buf);
return ret;
}
void send_start_params(void)
{
int ret;
static string req = R"({"type": "START", "data": {"appid": 42720845, "appkey": "bM1hT2tQHsFKmMqhYHRInGYC", "dev_pid": 1537, "cuid": "ygt_x5_tuyou", "sample": 16000, "format": "pcm"}})";
ret = send_websockets(req.c_str(), req.size(), LWS_WRITE_TEXT);
printf("[send_start_params]: size=%d\n", ret);
}
void send_audio(void)
{
void *pcm;
size_t size;
int chunk_ms = 160;
int chunk_len = 16000 * 2 / 1000 * chunk_ms;
pcm = readFileToMem("16k-0.pcm", &size);
printf("pcm size = %d\n", size);
for(int i = 0; i < size; i += chunk_len)
{
int ret;
int send_size;
if(i + chunk_len < size)
send_size = chunk_len;
else
send_size = size - i;
ret = send_websockets((unsigned char*)pcm + i, send_size, LWS_WRITE_BINARY);
printf("[send_audio]: size=%d\n", ret);
usleep(chunk_ms * 1000);
}
}
void send_finish()
{
int ret;
static string req = R"({"type": "FINISH"})";
ret = send_websockets(req.c_str(), req.size(), LWS_WRITE_TEXT);
printf("[send_finish]: size=%d\n", ret);
}
void run(void)
{
send_start_params();
send_audio();
send_finish();
}
static int callback_websocket(struct lws *wsi, enum lws_callback_reasons reason, void *user, void *in, size_t len)
{
switch (reason) {
case LWS_CALLBACK_CLIENT_CONNECTION_ERROR:
fprintf(stderr, "Error connecting to server\n");
break;
case LWS_CALLBACK_CLIENT_ESTABLISHED:
{
fprintf(stderr, "Connected to server\n");
thread th1(run);
th1.detach();
}
break;
case LWS_CALLBACK_CLIENT_RECEIVE:
fprintf(stderr, "Received data: %s\n", (char *)in);
break;
case LWS_CALLBACK_CLOSED:
fprintf(stderr, "Disconnected from server\n");
break;
default:
break;
}
return 0;
}
struct lws_protocols protocols[] = {
{ "default", callback_websocket, 65536, 65536 },
{ NULL, NULL, 0, 0 }
};
int main(int argc, char *argv[])
{
struct lws_context_creation_info info = {};
struct lws_client_connect_info i = {};
struct lws_context *context;
info.port = CONTEXT_PORT_NO_LISTEN;
info.protocols = protocols;
info.gid = -1;
info.uid = -1;
context = lws_create_context(&info);
if (context == NULL) {
fprintf(stderr, "Creating libwebsocket context failed\n");
return 0;
}
i.port = 80;
i.path = "/realtime_asr?sn=123456xx";
i.context = context;
i.address = "vop.baidu.com";
i.host = i.address;
i.origin = i.address;
websocket = lws_client_connect_via_info(&i);
if (websocket == NULL) {
fprintf(stderr, "Error connecting to server\n");
return 0;
}
while(1)
{
int ret;
ret = lws_service(context, 10000);
if(ret < 0)
break;
}
lws_context_destroy(context);
printf("out\n");
return 0;
}