pAura,python音频录制分析和训练分类

pAura

python音频录制和分析
https://github.com/tyiannak/paura

安装依赖

系统:ubuntu-18.04.5-desktop-amd64.iso

# 安装pythonlib
pip install pyAudioAnalysis
pip install numpy
# 如果速度较慢,可以加镜像 -i 参数,如
pip install scipy -i https://pypi.tuna.tsinghua.edu.cn/simple
pip install opencv-python matplotlib tqdm eyed3 pydub imblearn plotly pyaudio -i https://pypi.tuna.tsinghua.edu.cn/simple   
# Install portaudio
sudo apt-get install libasound-dev portaudio19-dev libportaudio2 libportaudiocpp0
# install opencv for python
sudo apt-get install python-opencv
# required only for paura_lite.py
sudo apt-get install gnuplot

录制音频并分类

python paura.py --blocksize 1.0 --spectrogram --chromagram --record_segments --record_all

deep_audio_features

训练卷积神经网络作为音频分类器
https://github.com/tyiannak/deep_audio_features

采集数据

可以用拾音器和pAura采集数据

训练

from deep_audio_features.bin import basic_training as bt
bt.train_model(["computer","keyboard"], "energy")

测试

from deep_audio_features.bin import basic_test as btest
d, p = btest.test_model("pkl/energy.pt", '1.wav', layers_dropped=0, test_segmentation=False)
print("==> d",d)
print("==> p",p)
class_names = ["computer","keyboard"]
print("==> class",class_names[d[0]])

pAura 和 deep_audio_features 组合使用

主要修改record_audio函数,使用pAura 的录音功能和把pAura的声音分类算法替换为deep_audio_features 的算法

import sys
import time
import numpy
import scipy
import cv2
import argparse
import scipy.io.wavfile as wavfile
from pyAudioAnalysis import ShortTermFeatures as sF
from pyAudioAnalysis import MidTermFeatures as mF
from pyAudioAnalysis import audioTrainTest as aT
import scipy.signal
import itertools
import operator
import datetime
import signal
import pyaudio
import os
import struct
from deep_audio_features.bin import basic_test as btest

global fs
global all_data
global outstr
fs = 8000
FORMAT = pyaudio.paInt16
all_data = []
plot_h = 150
plot_w = 720
status_h = 150


def signal_handler(signal, frame):
    """
    This function is called when Ctr + C is pressed and is used to output the
    final buffer into a WAV file
    """
    # write final buffer to wav file
    if len(all_data) > 1:
        wavfile.write(outstr + ".wav", fs, numpy.int16(all_data))
    sys.exit(0)


signal.signal(signal.SIGINT, signal_handler)



"""
Utility functions
"""


def most_common(L):
    # get an iterable of (item, iterable) pairs
    SL = sorted((x, i) for i, x in enumerate(L))
    groups = itertools.groupby(SL, key=operator.itemgetter(0))

    # auxiliary function to get "quality" for an item
    def _auxfun(g):
        item, iterable = g
        count = 0
        min_index = len(L)
        for _, where in iterable:
            count += 1
            min_index = min(min_index, where)
        return count, -min_index

    # pick the highest-count/earliest item
    return max(groups, key=_auxfun)[0]


def plotCV(function, width, height, max_val):
    if len(function) > width:
        hist_item = height * (function[len(function) - width - 1:-1] / max_val)
    else:
        hist_item = height * (function / max_val)
    h = numpy.zeros((height, width, 3))
    hist = numpy.int32(numpy.around(hist_item))

    for x, y in enumerate(hist):
        cv2.line(h, (x, int(height / 2)),
                 (x, height - y), (255, 0, 255))

    return h


"""
Core functionality:
"""


def record_audio(block_size, fs=8000, show_spec=False, show_chroma=False,
                 log_sounds=False, logs_all=False):

    # inialize recording process
    mid_buf_size = int(fs * block_size)
    pa = pyaudio.PyAudio()
    stream = pa.open(format=FORMAT, channels=1, rate=fs,
                     input=True, frames_per_buffer=mid_buf_size)
    mid_buf = []
    count = 0
    global all_data
    global outstr
    all_data = []
    # initalize counters etc
    time_start = time.time()
    outstr = datetime.datetime.now().strftime("%Y_%m_%d_%I:%M%p")
    out_folder = outstr + "_segments"
    if log_sounds:
        if not os.path.exists(out_folder):
            os.makedirs(out_folder)
    # load segment model
    # [classifier, MEAN, STD, class_names, mt_win, mt_step, st_win, st_step, _] = aT.load_model("model")
    
    while 1:
        try:
            block = stream.read(mid_buf_size)
            count_b = len(block) / 2
            format = "%dh" % (count_b)
            shorts = struct.unpack(format, block)
            cur_win = list(shorts)
            mid_buf = mid_buf + cur_win
            del cur_win

            # time since recording started:
            e_time = (time.time() - time_start)
            # data-driven time
            data_time = (count + 1) * block_size
            x = numpy.int16(mid_buf)
            seg_len = len(x)

            if log_sounds:
                # TODO: log audio files
                out_file = os.path.join(out_folder,
                                        "{0:.2f}_".format(e_time).zfill(8) +
                                        ".wav")
                #shutil.copyfile("temp.wav", out_file)
                wavfile.write(out_file, fs, x)
            
            print("==> out_file",out_file)
            d, p = btest.test_model("pkl/energy.pt", out_file, layers_dropped=0, test_segmentation=False)
            print("==> d",d)
            print("==> p",p)
            class_names = ["computer","keyboard"]            
            print("==> class",class_names[d[0]])
            print("{0:.2f}\t{1:s}".format(e_time,
                                        class_names[d[0]]))
            
            mid_buf = []
            ch = cv2.waitKey(10)
            count += 1
        except IOError:
            print("Error recording")


def parse_arguments():
    record_analyze = argparse.ArgumentParser(description="Real time "
                                                         "audio analysis")
    record_analyze.add_argument("-bs", "--blocksize",
                                  type=float, choices=[0.25, 0.5, 0.75, 1, 2],
                                  default=1, help="Recording block size")
    record_analyze.add_argument("-fs", "--samplingrate", type=int,
                                  choices=[4000, 8000, 16000, 32000, 44100],
                                  default=8000, help="Recording block size")
    record_analyze.add_argument("--chromagram", action="store_true",
                                  help="Show chromagram")
    record_analyze.add_argument("--spectrogram", action="store_true",
                                  help="Show spectrogram")
    record_analyze.add_argument("--record_segments", action="store_true",
                                  help="Record detected sounds to wavs")
    record_analyze.add_argument("--record_all", action="store_true",
                                  help="Record the whole recording to a single"
                                       " audio file")
    return record_analyze.parse_args()


if __name__ == "__main__":
    args = parse_arguments()
    fs = args.samplingrate
    if fs != 8000:
        print("Warning! Segment classifiers have been trained on 8KHz samples. "
              "Therefore results will be not optimal. ")
    record_audio(args.blocksize, fs, args.spectrogram,
                 args.chromagram, args.record_segments, args.record_all)
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

绯虹剑心

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值