科大讯飞离线命令词识别

1 篇文章 0 订阅

一、进入科大讯飞官网,下载自己所需要的SDK----以离线命令词识别为例;

二、分析代码-----主要有以下四大函数

const char *get_audio_file(void); //选择进行离线语法识别的语音文件
int build_grammar(UserData *udata); //构建离线识别语法网络
int update_lexicon(UserData *udata); //更新离线识别语法词典
int run_asr(UserData *udata); //进行离线语法识别

2.1选择进行离线语法识别的语音文件

const char* get_audio_file(void)
{
	char key = 0;
	while(key != 27) //按Esc则退出
	{
		printf("请选择音频文件:\n");
		printf("1.开始\n");
		printf("2.测试\n");
		key = _getch();
		switch(key)
		{
		case '1':
			printf("\n1.开始\n");
			return "wav/start.pcm";
		case '2':
			printf("\n2.测试\n");
			return "wav/test.pcm";
		default:
			continue;
		}
	}
	exit(0);
	return NULL;
}

2.2构建离线识别语法网络

int build_grm_cb(int ecode, const char *info, void *udata)  //未理解该函数的作用,如有大神知道还请告知
{
	UserData *grm_data = (UserData *)udata;

	if (NULL != grm_data) {
		grm_data->build_fini = 1;
		grm_data->errcode = ecode;
	}

	if (MSP_SUCCESS == ecode && NULL != info) {
		printf("构建语法成功! 语法ID:%s\n", info);
		if (NULL != grm_data)
			_snprintf(grm_data->grammar_id, MAX_GRAMMARID_LEN - 1, info);
	}
	else
		printf("构建语法失败!%d\n", ecode);

	return 0;
}

int build_grammar(UserData *udata)
{
	FILE *grm_file                           = NULL;
	char *grm_content                        = NULL;
	unsigned int grm_cnt_len                 = 0;
	char grm_build_params[MAX_PARAMS_LEN]    = {NULL};
	int ret                                  = 0;

	grm_file = fopen(GRM_FILE, "rb");	
	if(NULL == grm_file) {
		printf("打开\"%s\"文件失败![%s]\n", GRM_FILE, strerror(errno));
		return -1; 
	}

	fseek(grm_file, 0, SEEK_END);
	grm_cnt_len = ftell(grm_file);
	fseek(grm_file, 0, SEEK_SET);

	grm_content = (char *)malloc(grm_cnt_len + 1);
	if (NULL == grm_content)
	{
		printf("内存分配失败!\n");
		fclose(grm_file);
		grm_file = NULL;
		return -1;
	}
	fread((void*)grm_content, 1, grm_cnt_len, grm_file);
	grm_content[grm_cnt_len] = '\0';
	fclose(grm_file);
	grm_file = NULL;

	_snprintf(grm_build_params, MAX_PARAMS_LEN - 1, 
		"engine_type = local, \
		asr_res_path = %s, sample_rate = %d, \
		grm_build_path = %s, ",
		ASR_RES_PATH,
		SAMPLE_RATE_16K,
		GRM_BUILD_PATH
		);
	ret = QISRBuildGrammar("bnf", grm_content, grm_cnt_len, grm_build_params, build_grm_cb, udata);    //这里使用了回调函数

	free(grm_content);
	grm_content = NULL;

	return ret;
}

2.3更新离线识别语法词典

int update_lex_cb(int ecode, const char *info, void *udata)    //未理解该函数的作用,如有大神知道还请告知
{
	UserData *lex_data = (UserData *)udata;

	if (NULL != lex_data) {
		lex_data->update_fini = 1;
		lex_data->errcode = ecode;
	}

	if (MSP_SUCCESS == ecode)
		printf("更新词典成功!\n");
	else
		printf("更新词典失败!%d\n", ecode);

	return 0;
}

int update_lexicon(UserData *udata)   //这里是更新语法,可根据自己的实际需要选择保留或者删除----仅作demo演示
{
	const char *lex_content                   = "更新前命令词\n更新后命令词";
	unsigned int lex_cnt_len                  = strlen(lex_content);
	char update_lex_params[MAX_PARAMS_LEN]    = {NULL}; 

	_snprintf(update_lex_params, MAX_PARAMS_LEN - 1, 
		"engine_type = local, text_encoding = GB2312, \
		asr_res_path = %s, sample_rate = %d, \
		grm_build_path = %s, grammar_list = %s, ",
		ASR_RES_PATH,
		SAMPLE_RATE_16K,
		GRM_BUILD_PATH,
		udata->grammar_id);
	return QISRUpdateLexicon(LEX_NAME, lex_content, lex_cnt_len, update_lex_params, update_lex_cb, udata);
}

2.4进行离线语法识别

int run_asr(UserData *udata)
{
	char asr_params[MAX_PARAMS_LEN]    = {NULL};
	const char *rec_rslt               = NULL;
	const char *session_id             = NULL;
	const char *asr_audiof             = NULL;
	FILE *f_pcm                        = NULL;
	char *pcm_data                     = NULL;
	long pcm_count                     = 0;
	long pcm_size                      = 0;
	int last_audio                     = 0;
	int aud_stat                       = MSP_AUDIO_SAMPLE_CONTINUE;
	int ep_status                      = MSP_EP_LOOKING_FOR_SPEECH;
	int rec_status                     = MSP_REC_STATUS_INCOMPLETE;
	int rss_status                     = MSP_REC_STATUS_INCOMPLETE;
	int errcode                        = -1;

	asr_audiof = get_audio_file();
	f_pcm = fopen(asr_audiof, "rb");
	if (NULL == f_pcm) {
		printf("打开\"%s\"失败![%s]\n", f_pcm, strerror(errno));
		goto run_error;
	}
	fseek(f_pcm, 0, SEEK_END);
	pcm_size = ftell(f_pcm);
	fseek(f_pcm, 0, SEEK_SET);
	pcm_data = (char *)malloc(pcm_size);
	if (NULL == pcm_data)
		goto run_error;
	fread((void *)pcm_data, pcm_size, 1, f_pcm);
	fclose(f_pcm);
	f_pcm = NULL;

	//离线语法识别参数设置
	_snprintf(asr_params, MAX_PARAMS_LEN - 1, 
		"engine_type = local, \
		asr_res_path = %s, sample_rate = %d, \
		grm_build_path = %s, local_grammar = %s, \
		result_type = xml, result_encoding = GB2312, ",
		ASR_RES_PATH,
		SAMPLE_RATE_16K,
		GRM_BUILD_PATH,
		udata->grammar_id
		);
	session_id = QISRSessionBegin(NULL, asr_params, &errcode);
	if (NULL == session_id)
		goto run_error;
	printf("开始识别...\n");

	while (1) {
		unsigned int len = 6400;

		if (pcm_size < 12800) {
			len = pcm_size;
			last_audio = 1;
		}

		aud_stat = MSP_AUDIO_SAMPLE_CONTINUE;

		if (0 == pcm_count)
			aud_stat = MSP_AUDIO_SAMPLE_FIRST;

		if (len <= 0)
			break;

		printf(">");
		errcode = QISRAudioWrite(session_id, (const void *)&pcm_data[pcm_count], len, aud_stat, &ep_status, &rec_status);
		if (MSP_SUCCESS != errcode)
			goto run_error;

		pcm_count += (long)len;
		pcm_size -= (long)len;

		//检测到音频结束
		if (MSP_EP_AFTER_SPEECH == ep_status)
			break;

		_sleep(150); //模拟人说话时间间隙
	}
	//主动点击音频结束
	QISRAudioWrite(session_id, (const void *)NULL, 0, MSP_AUDIO_SAMPLE_LAST, &ep_status, &rec_status);

	free(pcm_data);
	pcm_data = NULL;

	//获取识别结果
	while (MSP_REC_STATUS_COMPLETE != rss_status && MSP_SUCCESS == errcode) {
		rec_rslt = QISRGetResult(session_id, &rss_status, 0, &errcode);
		_sleep(150);
	}
	printf("\n识别结束:\n");
	printf("=============================================================\n");
	if (NULL != rec_rslt)
		printf("%s\n", rec_rslt);
	else
		printf("没有识别结果!");
	printf("=============================================================\n");

	goto run_exit;

run_error:
	if (NULL != pcm_data) {
		free(pcm_data);
		pcm_data = NULL;
	}
	if (NULL != f_pcm) {
		fclose(f_pcm);
		f_pcm = NULL;
	}
run_exit:
	QISRSessionEnd(session_id, NULL);
	return errcode;
}

2.5程序主函数

int main(int argc, char* argv[])
{
	const char *login_config    = "appid = 自己的appid"; //登录参数
	UserData asr_data; 
	int ret                    = 0 ;

	ret = MSPLogin(NULL, NULL, login_config); //第一个参数为用户名,第二个参数为密码,传NULL即可,第三个参数是登录参数
	if (MSP_SUCCESS != ret) {
		printf("登录失败:%d\n", ret);
		goto exit;
	}

	memset(&asr_data, 0, sizeof(UserData));
	printf("构建离线识别语法网络...\n");
	ret = build_grammar(&asr_data);  //第一次使用某语法进行识别,需要先构建语法网络,获取语法ID,之后使用此语法进行识别,无需再次构建
	if (MSP_SUCCESS != ret) {
		printf("构建语法调用失败!\n");
		goto exit;
	}
	while (1 != asr_data.build_fini)
		_sleep(300);
	if (MSP_SUCCESS != asr_data.errcode)
		goto exit;
	printf("离线识别语法网络构建完成,开始识别...\n");	
	ret = run_asr(&asr_data);
	if (MSP_SUCCESS != ret) {
		printf("离线语法识别出错: %d \n", ret);
		goto exit;
	}

	printf("请按任意键继续\n");
	_getch();
	printf("更新离线语法词典...\n");
	ret = update_lexicon(&asr_data);  //当语法词典槽中的词条需要更新时,调用QISRUpdateLexicon接口完成更新
	if (MSP_SUCCESS != ret) {
		printf("更新词典调用失败!\n");
		goto exit;
	}
	while (1 != asr_data.update_fini)
		_sleep(300);
	if (MSP_SUCCESS != asr_data.errcode)
		goto exit;
	printf("更新离线语法词典完成,开始识别...\n");
	ret = run_asr(&asr_data);
	if (MSP_SUCCESS != ret) {
		printf("离线语法识别出错: %d \n", ret);
		goto exit;
	}

exit:
	MSPLogout();
	printf("请按任意键退出...\n");
	_getch();
	return 0;
}

至此,几大函数分析基本完成,细节还需要深挖,本人也是刚接触语音识别,也不是很懂,还需要各路大神不吝啬赐教。

三、将上述代码做成DLL被其他程序调用------改写上述函数进行调用时,需特别注意函数声明,切勿重复声明。
3.1创建dll项目
3.2更改我们的头文件和源文件
3.3生成DLL文件,找到我们的DLL文件。
参考:https://blog.csdn.net/qq_37924224/article/details/115524682
注:科大讯飞的官方demo在VS2015版本以上不支持64位,仅支持32位。

  • 2
    点赞
  • 18
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
好的,下面是一个简单的C#调用科大讯飞离线命令识别的示例: 1. 在官网下载并安装科大讯飞离线命令识别SDK。 2. 在C#项目中添加SDK的引用,并在代码中添加以下命名空间: ``` using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Text; using System.Threading.Tasks; using System.Windows.Forms; using IFlyTek.Speech; ``` 3. 在代码中创建SpeechRecognizer对象,并设置相关参数,例如语音输入源、识别语言、识别模式等等。以下是一个示例: ``` SpeechRecognizer recognizer = new SpeechRecognizer(); recognizer.SetParameter(SpeechConstant.ENGINE_TYPE, SpeechConstant.TYPE_LOCAL); recognizer.SetParameter(SpeechConstant.RESULT_TYPE, SpeechConstant.RESULT_XML); recognizer.SetParameter(SpeechConstant.SUBJECT, "asr"); recognizer.SetParameter(SpeechConstant.LANGUAGE, "zh_cn"); recognizer.SetParameter(SpeechConstant.ACCENT, "mandarin"); recognizer.SetParameter(SpeechConstant.VAD_BOS, "10000"); recognizer.SetParameter(SpeechConstant.VAD_EOS, "10000"); recognizer.SetParameter(SpeechConstant.SAMPLE_RATE, "16000"); recognizer.SetParameter(SpeechConstant.ASR_DWA, "wpgs"); recognizer.SetParameter(SpeechConstant.ASR_OFFLINE_ENGINE_GRAMMER_FILE_PATH, "grammar.bnf"); recognizer.SetParameter(SpeechConstant.ASR_PTT, "1"); ``` 这里的语音输入源是本地,识别语言为中文,识别模式为命令识别,使用的是语音识别语法规则文件(grammar.bnf)。 4. 调用SpeechRecognizer对象的StartRecognize方法开始识别: ``` recognizer.StartRecognize(new RecognizerListener()); ``` 这里的RecognizerListener是一个自定义的识别监听器,用于处理识别结果。 5. 在RecognizerListener中实现OnResult方法,获取识别结果并进行处理: ``` class RecognizerListener : RecognizerListenerAdapter { public override void OnResult(RecognizerResult result, bool isLast) { string text = result.GetString(); MessageBox.Show(text); } } ``` 这里的OnResult方法会在识别结束时被调用,result参数包含了识别结果,isLast参数表示是否是最后一次识别。 需要注意的是,离线命令识别需要预先设置好命令列表,以便识别时能够快速匹配。另外,在使用科大讯飞离线命令识别时,建议使用专门的语音识别语法规则文件,以获得更好的识别效果。
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值