目录
思路
使用python的pyaudio库来进行语音录入,将语音存放在本地文件,然后再通过百度api进行语音识别,通过识别内容判断下一步棋落在哪。
成果
登录页面
游戏页面
步骤
因为代码太长,文章发不出来,所以这里只放最重要的录音代码和语音识别代码,完整代码仓库地址放在了最后
录音代码
#录音并存放在本地文件中
import os
import sys
import wave
import numpy as np
from datetime import datetime
from pyaudio import PyAudio, paInt16
class GenAudio(object):
def __init__(self):
self.num_samples = 4000 # pyaudio内置缓冲大小
self.sampling_rate = 16000 # 取样频率
self.level = 1500 # 声音保存的阈值
self.count_num = 20 # count_num个取样之内出现COUNT_NUM个大于LEVEL的取样则记录声音
self.save_length = 8 # 声音记录的最小长度:save_length * num_samples 个取样
self.time_count = 16 # 录音时间,单位s
self.voice_string = []
# 保存文件
def save_wav(self, filename):
wf = wave.open(filename, 'wb')
wf.setnchannels(1)
wf.setsampwidth(2)
wf.setframerate(self.sampling_rate)
wf.writeframes(np.array(self.voice_string).tostring())
wf.close()
def read_audio(self):
pa = PyAudio()
stream = pa.open(format=paInt16, channels=1, rate=self.sampling_rate, input=True,
frames_per_buffer=self.num_samples)
save_count = 0
save_buffer = []
time_count = self.time_count
while True:
time_count -= 1
# 读入num_samples个取样
string_audio_data = stream.read(self.num_samples)
# 将读入的数据转换为数组
# audio_data = np.fromstring(string_audio_data, dtype=np.short)
audio_data = np.frombuffer(string_audio_data, dtype='int16')
# 计算大于 level 的取样的个数
large_sample_count = np.sum(audio_data > self.level)
print(np.max(audio_data)), "large_sample_count=>", large_sample_count
# 如果个数大于COUNT_NUM,则至少保存SAVE_LENGTH个块
if large_sample_count > self.count_num:
save_count = self.save_length
else:
save_count -= 1
if save_count < 0:
save_count = 0
if save_count > 0:
save_buffer.append(string_audio_data)
else:
if len(save_buffer) > 0:
self.voice_string = save_buffer
save_buffer = []
print("Recode a piece of voice successfully!")
return True
if time_count == 0:
if len(save_buffer) > 0:
self.voice_string = save_buffer
save_buffer = []
print("Recode a piece of voice successfully!")
return True
else:
return False
return True
# 单独测试录音时要把这块的代码去掉
# if __name__ == "__main__":
# r = GenAudio()
# r.read_audio()
# r.save_wav("./templates/test.wav")
语音识别代码
使用百度语音识别api要去短语音识别标准版_短语音识别-百度AI开放平台 (baidu.com)
注册一下,具体注册步骤可以上网搜索,新用户送了几万次调用机会,不是新用户也没关系,这个也不是很贵,调用100次也才3毛钱,用来自己测试demo足够了。
注册好了以后有一个api key和secret key,将这个两个放入下面对应的代码中就行了。
下面是代码:
# coding=utf-8
# 此文档参考百度api的官方demo
import sys
import json
import base64
import time
IS_PY3 = sys.version_info.major == 3
if IS_PY3:
from urllib.request import urlopen
from urllib.request import Request
from urllib.error import URLError
from urllib.parse import urlencode
timer = time.perf_counter
else:
from urllib2 import urlopen
from urllib2 import Request
from urllib2 import URLError
from urllib import urlencode
if sys.platform == "win32":
timer = time.clock
else:
# On most other platforms the best timer is time.time()
timer = time.time
#你的API_KEY
API_KEY = 'fk6lgD'
#你的SECRET_KEY
SECRET_KEY = 'GELxN59GvdVBSpaErcG'
# 需要识别的文件
AUDIO_FILE = './templates/test.wav' # 只支持 pcm/wav/amr 格式,极速版额外支持m4a 格式
# 文件格式
FORMAT = AUDIO_FILE[-3:] # 文件后缀只支持 pcm/wav/amr 格式,极速版额外支持m4a 格式
CUID = '123456PYTHON'
# 采样率
RATE = 16000 # 固定值
# 普通版
DEV_PID = 1537 # 1537 表示识别普通话,使用输入法模型。根据文档填写PID,选择语言及识别模型
ASR_URL = 'http://vop.baidu.com/server_api'
SCOPE = 'audio_voice_assistant_get' # 有此scope表示有asr能力,没有请在网页里勾选,非常旧的应用可能没有
#测试自训练平台需要打开以下信息, 自训练平台模型上线后,您会看见 第二步:“”获取专属模型参数pid:8001,modelid:1234”,按照这个信息获取 dev_pid=8001,lm_id=1234
# DEV_PID = 8001 ;
# LM_ID = 1234 ;
# 忽略scope检查,非常旧的应用可能没有
# SCOPE = False
class DemoError(Exception):
pass
""" TOKEN start """
TOKEN_URL = 'http://aip.baidubce.com/oauth/2.0/token'
def fetch_token():
params = {'grant_type': 'client_credentials',
'client_id': API_KEY,
'client_secret': SECRET_KEY}
post_data = urlencode(params)
if (IS_PY3):
post_data = post_data.encode( 'utf-8')
req = Request(TOKEN_URL, post_data)
try:
f = urlopen(req)
result_str = f.read()
except URLError as err:
result_str = err.read()
if (IS_PY3):
result_str = result_str.decode()
result = json.loads(result_str)
if ('access_token' in result.keys() and 'scope' in result.keys()):
if SCOPE and (not SCOPE in result['scope'].split(' ')): # SCOPE = False 忽略检查
raise DemoError('scope is not correct')
return result['access_token']
else:
raise DemoError('MAYBE API_KEY or SECRET_KEY not correct: access_token or scope not found in token response')
""" TOKEN end """
# 单独测试语音转文字时要把这块的注释去掉
# if __name__ == '__main__':
# token = fetch_token()
#
# speech_data = []
# with open(AUDIO_FILE, 'rb') as speech_file:
# speech_data = speech_file.read()
#
# length = len(speech_data)
# if length == 0:
# raise DemoError('file %s length read 0 bytes' % AUDIO_FILE)
# speech = base64.b64encode(speech_data)
# if (IS_PY3):
# speech = str(speech, 'utf-8')
# params = {'dev_pid': DEV_PID,
# #"lm_id" : LM_ID, #测试自训练平台开启此项
# 'format': FORMAT,
# 'rate': RATE,
# 'token': token,
# 'cuid': CUID,
# 'channel': 1,
# 'speech': speech,
# 'len': length
# }
# post_data = json.dumps(params, sort_keys=False)
# # print post_data
# req = Request(ASR_URL, post_data.encode('utf-8'))
# req.add_header('Content-Type', 'application/json')
# try:
# begin = timer()
# f = urlopen(req)
# result_str = f.read()
# print ("Request time cost %f" % (timer() - begin))
# except URLError as err:
# print('asr http response http code : ' + str(err.code))
# result_str = err.read()
#
# if (IS_PY3):
# result_str = str(result_str, 'utf-8')
# print(result_str)
# with open("result.txt","w") as of:
# of.write(result_str)
有什么不会的地方可以给我留言,看到了会回复
完整项目的git仓库链接:基于flask的语音控制五子棋: 基于flask的语音控制五子棋,使用百度语音识别api (gitee.com)