测试介绍
本脚本主要是用做测试【语音助手或智能家居】的【识别和唤醒】功能
语音数据存储形式:由多个嵌套文件夹组成,如【唤醒文件夹】——>【青年人文件夹】——>音频,所以会多一个搜索音频文件的方法
代码流程:
- 文件夹下的音频文件搜索
- 播放音频文件
- 读取log日志
- 比对结果
- 输出保存
写的逻辑比较乱,水平有限,见谅
通过serial库调用串口实现测试
唤醒测试
# coding=utf-8
import re
import os
import serial
import serial.tools.list_ports
import time
import pandas as pd
import pyaudio
import wave
# -------------播放-------------
def play_audio(audio_path):
CHUNK = 1024
wf = wave.open(audio_path,mode='rb')
p = pyaudio.PyAudio()
steam = p.open(format=p.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True)
data = wf.readframes(CHUNK)
while data != b'':
steam.write(data)
data = wf.readframes(CHUNK)
steam.stop_stream()
steam.close()
p.terminate()
# ---------搜索文件夹-----------
def search_files(path,all_files = []):
filename_list = os.listdir(path)
for filename in filename_list:
cur_path = os.path.join(path,filename)
if os.path.isdir(cur_path):
search_files(cur_path,all_files)
else:
cur_path_tr = cur_path.replace(' ','_')
os.rename(cur_path,cur_path_tr)
all_files.append(cur_path)
return all_files
#-----------串口日志捕获,并打印---------------
def catch_output(com):
#从串口中读取每一行日志信息
lines = com.readlines()
line = [line.decode('utf-8').strip() for line in lines]
return line
# 输入,打印输出并保存
def send_out_cmd(com,path,save_path,test_num):
# 获取文件夹下所有音频路径
all_files = search_files(path)
count = 0 #计数:NO.
df = pd.DataFrame(columns=['audio','txt','result_txt']) #保存格式:音频,预期结果,真正结果
correct = [] # 储存结果:正确1,无识别0,错误识别或串扰-1,
# 播放每一条音频,并获取结果
for audio_path in all_files:
count += 1
#--------------唤醒音频----------------
play_audio(audio_path) #播放
time.sleep(2)
data = catch_output(com) #打印日志
#---------------提取结果---------------
audio = audio_path.split('\\')[-1] #音频名
txt = audio_path.split('\\')[-2] #预期结果,从上一级文件夹查找
# 当没有读取到日志文件中的信息时
if len(data) == 0:
result_txt = 'nan'
print(f"NO.{count}: {audio}")
print(f"expected: {txt}")
print(f"actual: {result_txt}")
correct.append(0)
print("result: False\n")
df = pd.concat([df,pd.DataFrame([[audio,txt,result_txt]],columns=['audio','txt','result_txt'])],ignore_index=True)
# 日志中有一个或多个时
for d in data:
result_txt = re.findall(r'\[TXT\]: (.*?), \[KID\]', str(d))[0] #提取结果
print(f"NO.{count}: {audio}")
print(f"expected: {txt}")
print(f"actual: {result_txt}")
# 判断结果是否正确,并保存到correct中
if result_txt.strip() == txt:
correct.append(1)
print("result: True\n")
else:
correct.append(-1)
print("result: error\n")
# 将此条结果保存
df = pd.concat([df,pd.DataFrame([[audio,txt,result_txt]],columns=['audio','txt','result_txt'])],ignore_index=True)
df['correct'] = correct
df.to_excel(save_path)
# 控制测试数量
if count==test_num:
break
# 判断结果保存
df['correct'] = correct
#结果统计
desc_result(df,correct)
df.to_excel(save_path)
#--------------结果统计------------------
def desc_result(df,correct):
print(f"唤醒总数:{len(df)} 唤醒正确数:{correct.count(1)} 准确率:{round(float(correct.count(1) / (len(df))) * 100, 2)}%")
print(f"串扰率:{round(float(correct.count(-1) / (len(df))) * 100, 2)}% 拒识率:{round(float(correct.count(0) / (len(df))) * 100, 2)}%")
# -------------检查可用串口-------------
def search_port():
# 读取可用串口列表
port_list = list(serial.tools.list_ports.comports())
# 打印结果
if len(port_list) == 0:
print('无可用串口')
else:
for i in range(0,len(port_list)):
print(port_list[i])
if __name__ == '__main__':
search_port()
# 链接串口
com = serial.Serial(port='COM4', baudrate=115200, timeout=2)
# 向串口发送信息并打印
path = r'C:\Users\bwli14\Desktop\**'
save_path = r'C:\Users\bwli14\Desktop\result.xlsx' #保存路径
test_num = 5 #测试数量
send_out_cmd(com,path,save_path,test_num)
print('结束...')
input('Press Enter to exit...')
识别测试
# coding=utf-8
import re
import os
import serial
import time
import pandas as pd
import serial.tools.list_ports
import pyaudio
import wave
# -------------播放-------------
def play_audio(audio_path):
CHUNK = 1024
wf = wave.open(audio_path,mode='rb')
p = pyaudio.PyAudio()
steam = p.open(format=p.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True)
data = wf.readframes(CHUNK)
while data != b'':
steam.write(data)
data = wf.readframes(CHUNK)
steam.stop_stream()
steam.close()
p.terminate()
# ---------搜索文件夹-----------
def search_files(path,all_files = []):
filename_list = os.listdir(path)
for filename in filename_list:
cur_path = os.path.join(path,filename)
if os.path.isdir(cur_path):
search_files(cur_path,all_files)
else:
cur_path_tr = cur_path.replace(' ','_')
os.rename(cur_path,cur_path_tr)
all_files.append(cur_path)
return all_files
#-----------串口日志捕获,并打印---------------
def catch_output(com):
#从串口中读取每一行日志信息
lines = com.readlines()
line = [line.decode('utf-8').strip() for line in lines]
return line
# 输入,打印输出并保存
def send_out_cmd(com,dist_path,awa_path,save_path,test_num):
# 获取文件夹下所有音频路径
all_files = search_files(dist_path)
count = 0 # 计数:NO.
df = pd.DataFrame(columns=['audio', 'txt', 'result_txt']) # 保存格式:音频,预期结果,真正结果
correct = [] # 储存结果:正确1,无识别0,错误识别或串扰-1
# 播放每一条音频,并获取结果
for audio_path in all_files:
count += 1
#--唤醒语音--
huanxing_start(com,awa_path)
#--------------识别音频----------------
play_audio(audio_path) # 播放
time.sleep(2)
data = catch_output(com) # --打印日志--
# 提取结果
audio = audio_path.split('\\')[-1] # 音频名
txt = audio_path.split('\\')[-2] # 预期结果,从上一级文件夹查找
# 当没有读取到日志文件中的信息时
if len(data) == 0:
result_txt = 'nan'
print(f"NO.{count}: {audio}")
print(f"expected: {txt}")
print(f"actual: {result_txt}")
correct.append(0)
print("result: False\n")
df = pd.concat([df,pd.DataFrame([[audio,txt,result_txt]],columns=['audio','txt','result_txt'])],ignore_index=True)
# 日志中有一个或多个时
for d in data:
result_txt = re.findall(r'\[TXT\]: (.*?), \[KID\]', str(d))[0]
print(f"NO.{count}: {audio}")
print(f"expected: {txt}")
print(f"actual: {result_txt}")
# 判断结果是否正确,并保存到correct中
if result_txt.strip() == txt:
correct.append(1)
print("result: True\n")
else:
correct.append(-1)
print("result: error\n")
# 将此条结果保存
df = pd.concat([df,pd.DataFrame([[audio,txt,result_txt]],columns=['audio','txt','result_txt'])],ignore_index=True)
df['correct'] = correct
df.to_excel(save_path)
# 控制测试数量
if count==test_num:
break
# 判断结果保存
df['correct'] = correct
#------------结果统计--------------
desc_result(df,correct)
df.to_excel(save_path)
#--------------结果统计------------------
def desc_result(df,correct):
print(f"句总数:{len(df)} 句正确数:{correct.count(1)} 准确率:{round(float(correct.count(1) / (len(df))) * 100, 2)}%")
print(f"串扰率:{round(float(correct.count(-1) / (len(df))) * 100, 2)}% 拒识率:{round(float(correct.count(0) / (len(df))) * 100, 2)}%")
# ---------唤醒音频-----------
def huanxing_start(com,awa_path):
# 播放唤醒音频,直到唤醒成功
while True:
play_audio(awa_path)
time.sleep(1)
s = catch_output(com)
try:
result_txt = re.findall(r'\[TXT\]: (.*?), \[KID\]', str(s))[0]
except IndexError:
continue
if result_txt == '摩根摩根':
print('唤醒成功')
break
# -------------检查可用串口-------------
def search_port():
# 读取可用串口列表
port_list = list(serial.tools.list_ports.comports())
# 打印结果
if len(port_list) == 0:
print('无可用串口')
else:
for i in range(0,len(port_list)):
print(port_list[i])
if __name__ == '__main__':
search_port()
com = serial.Serial(port='COM4', baudrate=115200, timeout=2)
dist_path = r'C:\Users\bwli14\Desktop\***' #识别音频路径
awa_path = r'C:\Users\bwli14\Desktop\**\**.wav' #唤醒音频
save_path = r'C:\Users\bwli14\Desktop\result.xlsx' #保存路径
test_num = 5 #测试数量
send_out_cmd(com,dist_path,awa_path,save_path,test_num)
print('结束...')
input('Press Enter to exit...')
通过adb命令保存到日志,再读取实现测试
唤醒测试
# coding=utf-8
import os
import re
import subprocess
import time
import wave
import pandas as pd
import pyaudio
from loguru import logger
#-------------播放-------------
def play_audio(audio_path):
CHUNK = 1024
wf = wave.open(audio_path,mode='rb')
p = pyaudio.PyAudio()
steam = p.open(format=p.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True)
data = wf.readframes(CHUNK)
while data != b'':
steam.write(data)
data = wf.readframes(CHUNK)
steam.stop_stream()
steam.close()
p.terminate()
#---------搜索文件夹-----------
def search_files(path,all_files = []):
filename_list = os.listdir(path)
for filename in filename_list:
cur_path = os.path.join(path,filename)
if os.path.isdir(cur_path):
search_files(cur_path,all_files)
else:
cur_path_tr = cur_path.replace(' ','_')
os.rename(cur_path,cur_path_tr)
all_files.append(cur_path)
return all_files
#---------adb命令输入到cmd----------
def system_shell(shl):
os.system(shl)
#----------读取日志信息并返回-------------
def read_logs():
fo = open(r"C:\Users\bwli14\Desktop\**\**.log", "rb") # 一定要用'rb'因为seek 是以bytes来计算的
# print ("文件名为: ", fo.name)
global start_point #使用全局变量,让start_point 时刻保持在已经输出过的那个字节位
fo.seek(start_point, 1)#移动文件读取指针到指定位置
lines = fo.readlines()
log = ''
for line in lines:
try:
log = log + ''.join(line.decode('utf-8'))
except UnicodeDecodeError:
continue
# print ("读取的数据为:" + str(log))
#输出后的指针位置赋值给start_piont
start_point=fo.tell()
fo.close()
return log
#-----正则提取-------
def re_extract(log):
if re.findall('ci_wakeup',log):
return True
else:
return False
#-----结果描述统计-------
def desc_result(df):
print(f"唤醒总数:{len(df)} 唤醒正确数:{len(df[df['result']==True])}")
print(f"准确率:{round(float(len(df[df['result']==True]) / (len(df))) * 100, 2)}% 拒识率:{round(float(len(df[df['result']==False])/ (len(df))) * 100, 2)}%")
if __name__ == '__main__':
start_point=0 #文件指针,不可修改
shl = 'adb pull /sdcard/***/***.log C:\\Users\\bwli14\\Desktop\\**' #adb pull命令
path = r'C:\Users\bwli14\Desktop\*******' #语料地址
save_path = r'C:\Users\bwli14\Desktop\***.xlsx' #保存路径
expected = 'ci_wakeup' #期望值
test_num = 5 #测试数量
wait_time = 2 #未响应时的等待时间
all_files = search_files(path) #搜索全部语料地址
read_logs() #初始化log,使其在最新内容开始位置
df = pd.DataFrame(columns=['audio','result','expected','actual','play_aftertime','response_time'])
count = 0
for audio_path in all_files:
count+=1
play_audio(audio_path) #播放音频
time.sleep(1) #等待响应
#记录播放的时间和本地时间
start = time.perf_counter()
play_aftertime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
#判断是否响应
while True:
system_shell(shl) #输入adb命令
log = read_logs() #读取log最新内容
#记录响应的时间和本地时间
end = time.perf_counter()
response_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
#判读是否唤醒
if re_extract(log)==True:
break
#判断等待时间是否超时
if round(end-start,2)>=wait_time-1:
response_time = 0
break
# system_shell(shl)
# log = read_logs()
result = re_extract(log) #获取唤醒结果
audio_name = audio_path.split('\\')[-1] #音频名
#获取真实结果
if result:
actual = expected
else:
actual = ''
#保存
new_df = pd.DataFrame([[audio_name,result,expected,actual,play_aftertime,response_time]],
columns=['audio','result','expected','actual','play_aftertime','response_time'])
df = pd.concat([df,new_df],ignore_index=True)
df.to_excel(save_path)
print(f"NO.{count}:{audio_name}")
print(f'expected:{expected} \nactual:{actual}')
print(f"result: {result}\n")
if count == test_num:
break
desc_result(df)
识别测试
识别测试根据上面的自行总结出来,这里就做个练习。