本例仅用于记录笔者的一次大作业。代码源于网络大神,进行修改和滤波处理后得到了理想的结果。
import wave
import numpy as np
import matplotlib.pyplot as plt
from scipy import signal
def s_add_wn(x: float, snr: float): #增加噪音本例未用到
Ps = np.sum(abs(x) ** 2) / len(x)
Pn = Ps / (10 ** ((snr / 10)))
noise = np.random.randn(len(x)) * np.sqrt(Pn)
signal_add_noise = x + noise
return signal_add_noise
def filter_bp(x, fs, wl, wh):
fN = 3
fc = fs / 2
w1c = wl / fc
w2c = wh / fc
b, a = signal.butter(fN, [w1c, w2c], 'bandpass')
x_filter = signal.filtfilt(b, a, x)
return x_filter
def dtmf_encoder(number):
m = 0.5 # 每个音调的长度
s = 0.5 # 音调之间的间隔
dtmf_mapping = {
'1': (697, 1209),
'2': (697, 1336),
'3': (697, 1477),
'A': (697, 1633),
'4': (770, 1209),
'5': (770, 1336),
'6': (770, 1477),
'B': (770, 1633),
'7': (852, 1209),
'8': (852, 1336),
'9': (852, 1477),
'C': (852, 1633),
'*': (941, 1209),
'0': (941, 1336),
'#': (941, 1477),
'D': (941, 1633), }
if len(number) == 1:
s = 0
x = np.array([])
x = np.zeros(int(s * framerate))
n = np.arange(0, int(m * framerate))
for num in number:
p = np.sin(2 * np.pi * (dtmf_mapping[num][0] / framerate) * n) + np.sin(
2 * np.pi * (dtmf_mapping[num][1] / framerate) * n)
space = np.zeros(int(s * framerate))
x = np.concatenate((x, p, space))
return x
def wave_write(wave_file, wave_data, framerate, sampwidth=2, channels=1):
f = wave.open(wave_file, "wb")
# 配置声道数、量化位数和取样频率
f.setnchannels(channels)
f.setsampwidth(sampwidth)
f.setframerate(framerate)
# 将wav_data转换为二进制数据写入文件
wave_data = wave_data * int(20000 / max(abs(wave_data)))
wave_data = np.int16(wave_data)
f.writeframes(wave_data.tobytes())
f.close()
def signal_energy(signal, win=80, framerate=48000):
print("signal length:", len(signal))
w = np.reshape(signal[:int(len(signal) / win) * win], (-1, win))
# print(w)
print(w.shape)
# this is a 10ms interval..
# compute energy of the tone
w_energy = np.sum(w * w, axis=1)
# plot it
time = np.arange(0, len(w_energy)) * (len(signal) / len(w_energy) / framerate)
plt.figure(figsize=(20, 4))
plt.plot(time, w_energy)
# from the plot, a threshold of 200 is enough to separate sound and silence
# based on that we can define a function that returns the start and stop indices of the tones
def split_dtmf(x, th=5, win=80):
edges = []
w = np.reshape(x[:int(len(x) / win) * win],
(-1, win)) # 假设x是一个包含128个元素的数组,而win为8,那么这段代码会从x中选取16个元素(因为128 // 8 = 16),然后将它们重塑成一个16行(每行8个元素)的二维数组
print(w)
w_e = np.sum(w * w, axis=1) # w_e是通过对w的每一行(axis=1)进行平方求和 对每行的80个元素进行平方求和
L = len(w_e)
print("w_e", w_e)
idx = 0
while idx < L:
while idx < L and w_e[idx] < th:
idx = idx + 1
if idx >= L:
break
i = idx
while i < L and w_e[i] > th:
i = i + 1
if i * win - idx * win > 2500:
edges.append((idx * win, i * win))
print(idx, i)
idx = i
return edges
def decode_dtmf(x, th=5, edges=None):
L_Freqs = np.array([697.0, 770.0, 852.0, 941.0])
H_Freqs = np.array([1209.0, 1336.0, 1477.0, 1633.0])
KEYS = [['1', '2', '3'], ['4', '5', '6'], ['7', '8', '9'], ['*', '0', '#', 'D']]
L_RANGE = (680.0, 960.0)
H_RANGE = (1180.0, 1700.0)
number = []
if edges is None:
edges = split_dtmf(x, th)
for edge in edges:
# compute dft of tone segment
X = np.abs(np.fft.fft(x[edge[0]:edge[1]]))
print("edge", edge)
N = len(X)
print("N", N)
res = float(framerate) / N # 每个DFT仓的分辨率
print("res", res)
# look for peak in low frequency range
a = int(L_RANGE[0] / res)
b = int(L_RANGE[1] / res)
lo = a + np.argmax(X[a:b])
# look for peak in high frequency range
a = int(H_RANGE[0] / res)
b = int(H_RANGE[1] / res)
hi = a + np.argmax(X[a:b])
row = np.argmin(abs(L_Freqs - lo * res))
col = np.argmin(abs(H_Freqs - hi * res))
number.append(KEYS[row][col])
print(number)
return number
file_name = '附件1.3.wav'
# %%
f = wave.open(file_name, 'rb')
params = f.getparams()
nchannels, sampwidth, framerate, nframes = params[:4] # 通道数、量化位数(单位字节)、频率、帧数
print('nchannels:', nchannels)
print('sampwidth:', sampwidth)
print('framerate:', framerate)
print('nframes:', nframes)
strData = f.readframes(nframes) # 读取音频,字符串格式
if sampwidth == 1:
waveData = np.frombuffer(strData, dtype=np.int8)
elif sampwidth == 2:
waveData = np.frombuffer(strData, dtype=np.int16)
elif sampwidth == 3:
waveData = np.frombuffer(strData, dtype=np.int24)
elif sampwidth == 4:
waveData = np.frombuffer(strData, dtype=np.int32)
waveData = waveData * 1.0 / (max(abs(waveData))) # wave幅值归一化
waveData = np.reshape(waveData, [nframes, nchannels])
waveData = waveData[:, 0]
f.close()
time = np.arange(0, nframes) * (1.0 / framerate)
plt.figure()
plt.plot(time, waveData)
plt.xlabel("Time(s)")
plt.ylabel("Amplitude")
plt.title("Wavedata")
plt.grid('on') # 标尺,on:有,off:无。
plt.show()
waveData = waveData.reshape(len(waveData), )
print(waveData)
waveData = filter_bp(waveData, framerate, 680, 1700)
print(waveData)
# waveData[160240:167280] = 0
signal_energy(waveData)
result = decode_dtmf(waveData)
print(result)
参考链接
基于python的DTMF编解码_基于python的dtmf分析-CSDN博客
侵删