官方网站
Praat: doint phonetics by computer
Python Praat库
安装测试
激活虚拟环境python 3.6.12
,直接使用pip
指令进行安装
pip install praat-parselmouth
测试
import parselmouth
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
绘制声波波幅图像
sns.set() # 使用seaborn的默认设置
plt.rcParams['figure.dpi']
def plot_sound():
snd=pm.Sound('audio/w_1.wav')
plt.figure()
plt.plot(snd.xs(), snd.values.T)
plt.xlim([snd.xmin, snd.xmax])
plt.xlabel('time [s]')
plt.ylabel('amplitude')
plt.show()
plot_sound()
采集得到的图像如下
片段声音采集
It is also possible to extract part of speech fragment and plot it separately.
# 提取声音片段
def sound_fragement():
snd_part=snd.extract_part(from_time=0.9, preserve_times=True)
plt.figure()
plt.plot(snd_part.xs(), snd_part.values.T)
plt.xlim([snd_part.xmin, snd_part.xmax])
plt.xlabel('time [s]')
plt.ylabel('amplitude')
plt.show()
sound_fragement()
声音信号频谱和强度
# spectrogram
def draw_spectrogram(spectrogram, dynamic_range=70):
X, Y=spectrogram.x_grid(), spectrogram.y_grid()
sg_db=10*np.log10(spectrogram.values) # 取对数处理
plt.pcolormesh(X, Y, sg_db, vmin=sg_db.max() - dynamic_range, cmap='spring_r')
plt.ylim([spectrogram.ymin, spectrogram.ymax])
plt.xlabel('time [s]')
plt.ylabel('freq [Hz]')
# intensity
def draw_intensity(intensity):
plt.plot(intensity.xs(), intensity.values.T, linewidth=1.5, linestyle='-.',color='blue')
plt.grid(False)
plt.ylim(0)
plt.ylabel('intensity [dB]')
def curve_plot():
intensity=snd.to_intensity() # 强度
spectrogram=snd.to_spectrogram() # 频率
plt.figure()
draw_spectrogram(spectrogram)
plt.twinx()
draw_intensity(intensity)
plt.xlim([snd.xmin, snd.xmax])
plt.show()
curve_plot()
滑动窗口采样
The Parselmouth functions and methods have the same arguments as the Praat commands, so we can for example also change the window size of the spectrogram analysis to get a
narrow-band spectrogram
.
def draw_pitch(pitch):
pitch_values=pitch.selected_array['frequency']
pitch_values[pitch_values==0]=np.nan
plt.plot(pitch.xs(), pitch_values, 'o', markersize=3, color='blue')
plt.ylim(0, pitch.ceiling)
plt.ylabel('fundamental freq. [Hz]')
def pitch_plot():
pitch=snd.to_pitch()
pre_emphasized_snd=snd.copy()
pre_emphasized_snd.pre_emphasize()
spectrogram=pre_emphasized_snd.to_spectrogram(window_length=0.03, maximum_frequency=8000)
plt.figure()
draw_spectrogram(spectrogram)
plt.twinx()
draw_pitch(pitch)
plt.xlim([snd.xmin, snd.xmax])
plt.show()
pitch_plot()
使用seaborn
中的FaceGrid函数可以同时展示多个音频文件的谱信息
'''
多文件显示
'''
import pandas as pd
def facet_util(data, **kwargs):
digit, speaker_id=data[['digit', 'speaker_id']].iloc[0]
sound=pm.Sound('digit/{}_{}.wav'.format(digit, speaker_id))
draw_spectrogram(sound.to_spectrogram())
plt.twinx()
draw_pitch(sound.to_pitch())
if digit != 5:
plt.ylabel('')
plt.yticks([])
def sns_face_grid():
ans=pd.read_csv('digit/digit_list.csv')
grid=sns.FacetGrid(ans, row='speaker_id', col='digit')
grid.map_dataframe(facet_util)
grid.set_titles(col_template="{col_name}", row_template="{row_name}")
grid.set_axis_labels('time [s]', 'frequency [Hz]')
grid.set(facecolor='white', xlim=(0, None))
plt.show()
sns_face_grid()
MP3文件转为WAV文件
发现Praat库无法直接处理MP3文件,需要进行转换,安装pydub
库
pip install pydub
需要下载ffmpeg软件,解压后添加到系统环境变量中
发现直接调用pydub
进行转换会遇到权限问题,目前尚未解决
'''
mp3文件转为wav文件
'''
from pydub import AudioSegment
import pydub
pydub.AudioSegment.converter="E:\\ffmpeg_2020\\bin"
def mp3_trans_wav():
snd=AudioSegment.from_mp3('audio/adu_2.mp3')
snd.export('test_.wav', format='wav')
直接在CMD
中调用ffmpeg
软件对mp3
文件进行解析
考虑通过python
调用cmd
命令转换mp3
文件
'''
在cmd中调用ffmpeg指令转换文件格式
ffmpeg -i {mp3 filepath} {wav filepath}
'''
import os
def cmd_func():
# ans=os.popen('ipconfig')
# print(ans.read())Q
src='adu_2.mp3'
dst='wav_2.wav'
ff_cmd='ffmpeg -i audio/{} wav/{}'.format(src, dst)
ans=os.popen(ff_cmd)
print(ans)
cmd_func()
结果可行