语音测试,串口和adb


测试介绍

本脚本主要是用做测试【语音助手或智能家居】的【识别和唤醒】功能
语音数据存储形式:由多个嵌套文件夹组成,如【唤醒文件夹】——>【青年人文件夹】——>音频,所以会多一个搜索音频文件的方法

代码流程:

  1. 文件夹下的音频文件搜索
  2. 播放音频文件
  3. 读取log日志
  4. 比对结果
  5. 输出保存

写的逻辑比较乱,水平有限,见谅

通过serial库调用串口实现测试

唤醒测试

# coding=utf-8
import re
import os
import serial
import serial.tools.list_ports
import time
import pandas as pd
import pyaudio
import wave

# -------------播放-------------
def play_audio(audio_path):
    CHUNK = 1024

    wf = wave.open(audio_path,mode='rb')
    p = pyaudio.PyAudio()

    steam = p.open(format=p.get_format_from_width(wf.getsampwidth()),
                   channels=wf.getnchannels(),
                   rate=wf.getframerate(),
                   output=True)

    data = wf.readframes(CHUNK)

    while data != b'':
        steam.write(data)
        data = wf.readframes(CHUNK)

    steam.stop_stream()
    steam.close()
    p.terminate()

# ---------搜索文件夹-----------
def search_files(path,all_files = []):
    filename_list = os.listdir(path)
    for filename in filename_list:
        cur_path = os.path.join(path,filename)
        if os.path.isdir(cur_path):
            search_files(cur_path,all_files)
        else:
            cur_path_tr = cur_path.replace(' ','_')
            os.rename(cur_path,cur_path_tr)
            all_files.append(cur_path)
    return all_files

#-----------串口日志捕获,并打印---------------
def catch_output(com):
        #从串口中读取每一行日志信息
        lines = com.readlines()
        line = [line.decode('utf-8').strip() for line in lines]
        return line

# 输入,打印输出并保存
def send_out_cmd(com,path,save_path,test_num):
    # 获取文件夹下所有音频路径
    all_files = search_files(path)

    count = 0   #计数:NO.
    df = pd.DataFrame(columns=['audio','txt','result_txt']) #保存格式:音频,预期结果,真正结果
    correct = []    # 储存结果:正确1,无识别0,错误识别或串扰-1,

    # 播放每一条音频,并获取结果
    for audio_path in all_files:
        count += 1

        #--------------唤醒音频----------------
        play_audio(audio_path) #播放
        time.sleep(2)
        data = catch_output(com) #打印日志

        #---------------提取结果---------------
        audio = audio_path.split('\\')[-1]  #音频名
        txt = audio_path.split('\\')[-2]    #预期结果,从上一级文件夹查找

        # 当没有读取到日志文件中的信息时
        if len(data) == 0:
            result_txt = 'nan'
            print(f"NO.{count}: {audio}")
            print(f"expected: {txt}")
            print(f"actual: {result_txt}")
            correct.append(0)
            print("result: False\n")
            df = pd.concat([df,pd.DataFrame([[audio,txt,result_txt]],columns=['audio','txt','result_txt'])],ignore_index=True)
        # 日志中有一个或多个时
        for d in data:
            result_txt = re.findall(r'\[TXT\]: (.*?), \[KID\]', str(d))[0]  #提取结果

            print(f"NO.{count}: {audio}")
            print(f"expected: {txt}")
            print(f"actual: {result_txt}")

            # 判断结果是否正确,并保存到correct中
            if result_txt.strip() == txt:
                correct.append(1)
                print("result: True\n")
            else:
                correct.append(-1)
                print("result: error\n")
            # 将此条结果保存
            df = pd.concat([df,pd.DataFrame([[audio,txt,result_txt]],columns=['audio','txt','result_txt'])],ignore_index=True)
            df['correct'] = correct
            df.to_excel(save_path)

        # 控制测试数量
        if count==test_num:
            break
    # 判断结果保存
    df['correct'] = correct
    #结果统计
    desc_result(df,correct)
    df.to_excel(save_path)

#--------------结果统计------------------
def desc_result(df,correct):
    print(f"唤醒总数:{len(df)} 唤醒正确数:{correct.count(1)} 准确率:{round(float(correct.count(1) / (len(df))) * 100, 2)}%")
    print(f"串扰率:{round(float(correct.count(-1) / (len(df))) * 100, 2)}% 拒识率:{round(float(correct.count(0) / (len(df))) * 100, 2)}%")

# -------------检查可用串口-------------
def search_port():
    # 读取可用串口列表
    port_list = list(serial.tools.list_ports.comports())
    # 打印结果
    if len(port_list) == 0:
       print('无可用串口')
    else:
         for i in range(0,len(port_list)):
            print(port_list[i])

if __name__ == '__main__':
    search_port()
    # 链接串口
    com = serial.Serial(port='COM4', baudrate=115200, timeout=2)
    # 向串口发送信息并打印
    path = r'C:\Users\bwli14\Desktop\**'
    save_path = r'C:\Users\bwli14\Desktop\result.xlsx'  #保存路径
    test_num = 5    #测试数量
    send_out_cmd(com,path,save_path,test_num)

    print('结束...')
    input('Press Enter to exit...')

识别测试

# coding=utf-8
import re
import os
import serial
import time
import pandas as pd
import serial.tools.list_ports
import pyaudio
import wave

# -------------播放-------------
def play_audio(audio_path):
    CHUNK = 1024

    wf = wave.open(audio_path,mode='rb')
    p = pyaudio.PyAudio()

    steam = p.open(format=p.get_format_from_width(wf.getsampwidth()),
                   channels=wf.getnchannels(),
                   rate=wf.getframerate(),
                   output=True)

    data = wf.readframes(CHUNK)

    while data != b'':
        steam.write(data)
        data = wf.readframes(CHUNK)

    steam.stop_stream()
    steam.close()
    p.terminate()

# ---------搜索文件夹-----------
def search_files(path,all_files = []):
    filename_list = os.listdir(path)
    for filename in filename_list:
        cur_path = os.path.join(path,filename)
        if os.path.isdir(cur_path):
            search_files(cur_path,all_files)
        else:
            cur_path_tr = cur_path.replace(' ','_')
            os.rename(cur_path,cur_path_tr)
            all_files.append(cur_path)
    return all_files

#-----------串口日志捕获,并打印---------------
def catch_output(com):
        #从串口中读取每一行日志信息
        lines = com.readlines()
        line = [line.decode('utf-8').strip() for line in lines]
        return line


# 输入,打印输出并保存
def send_out_cmd(com,dist_path,awa_path,save_path,test_num):
    # 获取文件夹下所有音频路径
    all_files = search_files(dist_path)

    count = 0  # 计数:NO.
    df = pd.DataFrame(columns=['audio', 'txt', 'result_txt'])  # 保存格式:音频,预期结果,真正结果
    correct = []  # 储存结果:正确1,无识别0,错误识别或串扰-1

    # 播放每一条音频,并获取结果
    for audio_path in all_files:
        count += 1

        #--唤醒语音--
        huanxing_start(com,awa_path)
        #--------------识别音频----------------
        play_audio(audio_path)  # 播放
        time.sleep(2)
        data = catch_output(com)  # --打印日志--

        # 提取结果
        audio = audio_path.split('\\')[-1]  # 音频名
        txt = audio_path.split('\\')[-2]  # 预期结果,从上一级文件夹查找

        # 当没有读取到日志文件中的信息时
        if len(data) == 0:
            result_txt = 'nan'
            print(f"NO.{count}: {audio}")
            print(f"expected: {txt}")
            print(f"actual: {result_txt}")
            correct.append(0)
            print("result: False\n")
            df = pd.concat([df,pd.DataFrame([[audio,txt,result_txt]],columns=['audio','txt','result_txt'])],ignore_index=True)
        # 日志中有一个或多个时
        for d in data:
            result_txt = re.findall(r'\[TXT\]: (.*?), \[KID\]', str(d))[0]

            print(f"NO.{count}: {audio}")
            print(f"expected: {txt}")
            print(f"actual: {result_txt}")

            # 判断结果是否正确,并保存到correct中
            if result_txt.strip() == txt:
                correct.append(1)
                print("result: True\n")
            else:
                correct.append(-1)
                print("result: error\n")
            # 将此条结果保存
            df = pd.concat([df,pd.DataFrame([[audio,txt,result_txt]],columns=['audio','txt','result_txt'])],ignore_index=True)
            df['correct'] = correct
            df.to_excel(save_path)

        # 控制测试数量
        if count==test_num:
            break
    # 判断结果保存
    df['correct'] = correct
    #------------结果统计--------------
    desc_result(df,correct)

    df.to_excel(save_path)

#--------------结果统计------------------
def desc_result(df,correct):
    print(f"句总数:{len(df)} 句正确数:{correct.count(1)} 准确率:{round(float(correct.count(1) / (len(df))) * 100, 2)}%")
    print(f"串扰率:{round(float(correct.count(-1) / (len(df))) * 100, 2)}% 拒识率:{round(float(correct.count(0) / (len(df))) * 100, 2)}%")

# ---------唤醒音频-----------
def huanxing_start(com,awa_path):
    # 播放唤醒音频,直到唤醒成功
    while True:
        play_audio(awa_path)
        time.sleep(1)
        s = catch_output(com)
        try:
            result_txt = re.findall(r'\[TXT\]: (.*?), \[KID\]', str(s))[0]
        except IndexError:
            continue
        if result_txt == '摩根摩根':
            print('唤醒成功')
            break

# -------------检查可用串口-------------
def search_port():
    # 读取可用串口列表
    port_list = list(serial.tools.list_ports.comports())
    # 打印结果
    if len(port_list) == 0:
       print('无可用串口')
    else:
         for i in range(0,len(port_list)):
            print(port_list[i])

if __name__ == '__main__':
    search_port()
    com = serial.Serial(port='COM4', baudrate=115200, timeout=2)
    dist_path = r'C:\Users\bwli14\Desktop\***'  #识别音频路径
    awa_path = r'C:\Users\bwli14\Desktop\**\**.wav'    #唤醒音频
    save_path = r'C:\Users\bwli14\Desktop\result.xlsx'  #保存路径
    test_num = 5    #测试数量
    send_out_cmd(com,dist_path,awa_path,save_path,test_num)
    print('结束...')
    input('Press Enter to exit...')

通过adb命令保存到日志,再读取实现测试

唤醒测试

# coding=utf-8
import os
import re
import subprocess
import time
import wave

import pandas as pd
import pyaudio
from loguru import logger

#-------------播放-------------
def play_audio(audio_path):
    CHUNK = 1024

    wf = wave.open(audio_path,mode='rb')
    p = pyaudio.PyAudio()

    steam = p.open(format=p.get_format_from_width(wf.getsampwidth()),
                   channels=wf.getnchannels(),
                   rate=wf.getframerate(),
                   output=True)

    data = wf.readframes(CHUNK)

    while data != b'':
        steam.write(data)
        data = wf.readframes(CHUNK)

    steam.stop_stream()
    steam.close()
    p.terminate()

#---------搜索文件夹-----------
def search_files(path,all_files = []):
    filename_list = os.listdir(path)
    for filename in filename_list:
        cur_path = os.path.join(path,filename)
        if os.path.isdir(cur_path):
            search_files(cur_path,all_files)
        else:
            cur_path_tr = cur_path.replace(' ','_')
            os.rename(cur_path,cur_path_tr)
            all_files.append(cur_path)
    return all_files

#---------adb命令输入到cmd----------
def system_shell(shl):
    os.system(shl)

#----------读取日志信息并返回-------------
def read_logs():
    fo = open(r"C:\Users\bwli14\Desktop\**\**.log", "rb") # 一定要用'rb'因为seek 是以bytes来计算的
    # print ("文件名为: ", fo.name)
    global start_point #使用全局变量,让start_point 时刻保持在已经输出过的那个字节位
    fo.seek(start_point, 1)#移动文件读取指针到指定位置
    lines = fo.readlines()
    log = ''
    for line in lines:
        try:
            log = log + ''.join(line.decode('utf-8'))
        except UnicodeDecodeError:
            continue
    # print ("读取的数据为:" + str(log))
    #输出后的指针位置赋值给start_piont
    start_point=fo.tell()
    fo.close()
    return log

#-----正则提取-------
def re_extract(log):
    if re.findall('ci_wakeup',log):
        return True
    else:
        return False

#-----结果描述统计-------
def desc_result(df):
    print(f"唤醒总数:{len(df)} 唤醒正确数:{len(df[df['result']==True])}")
    print(f"准确率:{round(float(len(df[df['result']==True]) / (len(df))) * 100, 2)}% 拒识率:{round(float(len(df[df['result']==False])/ (len(df))) * 100, 2)}%")


if __name__ == '__main__':
    start_point=0   #文件指针,不可修改
    shl = 'adb pull /sdcard/***/***.log C:\\Users\\bwli14\\Desktop\\**'   #adb pull命令
    path = r'C:\Users\bwli14\Desktop\*******' #语料地址
    save_path = r'C:\Users\bwli14\Desktop\***.xlsx'   #保存路径
    expected = 'ci_wakeup' #期望值
    test_num = 5    #测试数量
    wait_time = 2   #未响应时的等待时间
    all_files = search_files(path)  #搜索全部语料地址
    read_logs() #初始化log,使其在最新内容开始位置
    df = pd.DataFrame(columns=['audio','result','expected','actual','play_aftertime','response_time'])
    count = 0
    for audio_path in all_files:
        count+=1
        play_audio(audio_path)  #播放音频
        time.sleep(1)   #等待响应
        #记录播放的时间和本地时间
        start = time.perf_counter()
        play_aftertime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        #判断是否响应
        while True:
            system_shell(shl)   #输入adb命令
            log = read_logs()   #读取log最新内容
            #记录响应的时间和本地时间
            end = time.perf_counter()
            response_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
            #判读是否唤醒
            if re_extract(log)==True:
                break
            #判断等待时间是否超时
            if round(end-start,2)>=wait_time-1:
                response_time = 0
                break
        # system_shell(shl)
        # log = read_logs()
        result = re_extract(log)    #获取唤醒结果
        audio_name = audio_path.split('\\')[-1] #音频名
        #获取真实结果
        if result:
            actual = expected
        else:
            actual = ''
        #保存
        new_df = pd.DataFrame([[audio_name,result,expected,actual,play_aftertime,response_time]],
                              columns=['audio','result','expected','actual','play_aftertime','response_time'])
        df = pd.concat([df,new_df],ignore_index=True)
        df.to_excel(save_path)

        print(f"NO.{count}:{audio_name}")
        print(f'expected:{expected} \nactual:{actual}')
        print(f"result: {result}\n")

        if count == test_num:
            break
    desc_result(df)

识别测试

识别测试根据上面的自行总结出来,这里就做个练习。

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值