DTMF双音多频信号拨号音识别Python实现

明中月0921

已于 2023-12-11 00:59:16 修改

阅读量229

点赞数 1

文章标签： python numpy 开发语言

于 2023-12-11 00:54:06 首次发布

本文链接：https://blog.csdn.net/qq_62682582/article/details/134917453

版权

本例仅用于记录笔者的一次大作业。代码源于网络大神，进行修改和滤波处理后得到了理想的结果。

import wave
import numpy as np
import matplotlib.pyplot as plt
from scipy import signal


def s_add_wn(x: float, snr: float):        #增加噪音本例未用到
    Ps = np.sum(abs(x) ** 2) / len(x)
    Pn = Ps / (10 ** ((snr / 10)))
    noise = np.random.randn(len(x)) * np.sqrt(Pn)
    signal_add_noise = x + noise
    return signal_add_noise


def filter_bp(x, fs, wl, wh):
    fN = 3
    fc = fs / 2
    w1c = wl / fc
    w2c = wh / fc
    b, a = signal.butter(fN, [w1c, w2c], 'bandpass')
    x_filter = signal.filtfilt(b, a, x)
    return x_filter


def dtmf_encoder(number):
    m = 0.5  # 每个音调的长度
    s = 0.5  # 音调之间的间隔
    dtmf_mapping = {
        '1': (697, 1209),
        '2': (697, 1336),
        '3': (697, 1477),
        'A': (697, 1633),
        '4': (770, 1209),
        '5': (770, 1336),
        '6': (770, 1477),
        'B': (770, 1633),
        '7': (852, 1209),
        '8': (852, 1336),
        '9': (852, 1477),
        'C': (852, 1633),
        '*': (941, 1209),
        '0': (941, 1336),
        '#': (941, 1477),
        'D': (941, 1633), }
    if len(number) == 1:
        s = 0
    x = np.array([])
    x = np.zeros(int(s * framerate))
    n = np.arange(0, int(m * framerate))
    for num in number:
        p = np.sin(2 * np.pi * (dtmf_mapping[num][0] / framerate) * n) + np.sin(
            2 * np.pi * (dtmf_mapping[num][1] / framerate) * n)
        space = np.zeros(int(s * framerate))
        x = np.concatenate((x, p, space))
    return x


def wave_write(wave_file, wave_data, framerate, sampwidth=2, channels=1):
    f = wave.open(wave_file, "wb")
    # 配置声道数、量化位数和取样频率
    f.setnchannels(channels)
    f.setsampwidth(sampwidth)
    f.setframerate(framerate)
    # 将wav_data转换为二进制数据写入文件
    wave_data = wave_data * int(20000 / max(abs(wave_data)))
    wave_data = np.int16(wave_data)
    f.writeframes(wave_data.tobytes())
    f.close()


def signal_energy(signal, win=80, framerate=48000):
    print("signal length:", len(signal))
    w = np.reshape(signal[:int(len(signal) / win) * win], (-1, win))
    # print(w)
    print(w.shape)
    # this is a 10ms interval..
    # compute energy of the tone
    w_energy = np.sum(w * w, axis=1)

    # plot it
    time = np.arange(0, len(w_energy)) * (len(signal) / len(w_energy) / framerate)
    plt.figure(figsize=(20, 4))
    plt.plot(time, w_energy)


# from the plot, a threshold of 200 is enough to separate sound and silence
# based on that we can define a function that returns the start and stop indices of the tones
def split_dtmf(x, th=5, win=80):
    edges = []
    w = np.reshape(x[:int(len(x) / win) * win],
                   (-1, win))  # 假设x是一个包含128个元素的数组，而win为8，那么这段代码会从x中选取16个元素（因为128 // 8 = 16），然后将它们重塑成一个16行（每行8个元素）的二维数组
    print(w)
    w_e = np.sum(w * w, axis=1)  # w_e是通过对w的每一行（axis=1）进行平方求和   对每行的80个元素进行平方求和
    L = len(w_e)
    print("w_e", w_e)
    idx = 0
    while idx < L:
        while idx < L and w_e[idx] < th:
            idx = idx + 1
        if idx >= L:
            break
        i = idx
        while i < L and w_e[i] > th:
            i = i + 1
        if i * win - idx * win > 2500:
            edges.append((idx * win, i * win))
        print(idx, i)
        idx = i
    return edges


def decode_dtmf(x, th=5, edges=None):
    L_Freqs = np.array([697.0, 770.0, 852.0, 941.0])
    H_Freqs = np.array([1209.0, 1336.0, 1477.0, 1633.0])
    KEYS = [['1', '2', '3'], ['4', '5', '6'], ['7', '8', '9'], ['*', '0', '#', 'D']]
    L_RANGE = (680.0, 960.0)
    H_RANGE = (1180.0, 1700.0)
    number = []
    if edges is None:
        edges = split_dtmf(x, th)
    for edge in edges:
        # compute dft of tone segment
        X = np.abs(np.fft.fft(x[edge[0]:edge[1]]))
        print("edge", edge)
        N = len(X)
        print("N", N)
        res = float(framerate) / N  # 每个DFT仓的分辨率
        print("res", res)
        # look for peak in low frequency range
        a = int(L_RANGE[0] / res)
        b = int(L_RANGE[1] / res)
        lo = a + np.argmax(X[a:b])
        # look for peak in high frequency range
        a = int(H_RANGE[0] / res)
        b = int(H_RANGE[1] / res)
        hi = a + np.argmax(X[a:b])
        row = np.argmin(abs(L_Freqs - lo * res))
        col = np.argmin(abs(H_Freqs - hi * res))
        number.append(KEYS[row][col])
        print(number)
    return number


file_name = '附件1.3.wav'
# %%
f = wave.open(file_name, 'rb')
params = f.getparams()
nchannels, sampwidth, framerate, nframes = params[:4]  # 通道数、量化位数（单位字节）、频率、帧数

print('nchannels:', nchannels)
print('sampwidth:', sampwidth)
print('framerate:', framerate)
print('nframes:', nframes)

strData = f.readframes(nframes)  # 读取音频，字符串格式
if sampwidth == 1:
    waveData = np.frombuffer(strData, dtype=np.int8)
elif sampwidth == 2:
    waveData = np.frombuffer(strData, dtype=np.int16)
elif sampwidth == 3:
    waveData = np.frombuffer(strData, dtype=np.int24)
elif sampwidth == 4:
    waveData = np.frombuffer(strData, dtype=np.int32)

waveData = waveData * 1.0 / (max(abs(waveData)))  # wave幅值归一化
waveData = np.reshape(waveData, [nframes, nchannels])
waveData = waveData[:, 0]
f.close()

time = np.arange(0, nframes) * (1.0 / framerate)
plt.figure()
plt.plot(time, waveData)
plt.xlabel("Time(s)")
plt.ylabel("Amplitude")
plt.title("Wavedata")
plt.grid('on')  # 标尺，on：有，off:无。
plt.show()
waveData = waveData.reshape(len(waveData), )
print(waveData)
waveData = filter_bp(waveData, framerate, 680, 1700)
print(waveData)
# waveData[160240:167280] = 0

signal_energy(waveData)
result = decode_dtmf(waveData)
print(result)

参考链接

基于python的DTMF编解码_基于python的dtmf分析-CSDN博客

侵删