音乐分类器 :将时域连续转换为频域离散
import numpy as np
from scipy import fft
from scipy.io import wavfile
from tqdm import tqdm
# 准备音乐数据
def create_fit(g ,n):
# zfill 返回指定长度的字符串,原字符串右对齐,前面填充0
rad = "./genres/" + g + "/converted/" + g + "." + str(n).zfill(5) + ".au.wav"
# sample_rate:采样率
# 模电(连续的信号)变数电(数字的表达)
# 采样率越高,如果单位时间采样点越多,信息损失的越少
# X 就是音乐文件
sample_rate, X = wavfile.read(rad)
# 用傅立叶变化处理1000以下的赫兹
fft_features = abs(fft(X)[:1000])
sad = "./trainset/" + g + "." + str(n).zfill(5) + ".fft"
# 把特征存到某个具体的路径下面去
np.save(sad, fft_features)
if __name__ == '__main__':
# 把wav格式做fft转换
# genre_list = ["classical", "jazz", "country", "pop", "rock", "metal"]
# for g in genre_list:
# for n in tqdm(range(100)):
# create_fit(g, n)
# 加载训练集数据,分割训练集以及测试集,进行分类器的训练
# 构造训练集
genre_list = ["classical", "jazz", "country", "pop", "rock", "metal"]
X = []
Y = []
for g in genre_list:
for n in range(100):
rad = "./trainset/" + g + '.' + str(n).zfill(5) + ".fft.npy"
# 一个声音文件的0-1000的频率
fft_features = np.load(rad)
X.append(fft_features)
Y.append(genre_list.index(g))
X = np.array(X)
Y = np.array(Y)
# print(X.shape)
print(X)
print(Y)
"""
训练模型
"""
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(X, Y)
"""
预测
"""
print('Starting read wavfile...')
sample_rate, test = wavfile.read("./genres/metal/converted/metal.00080.au.wav")
# sample_rate, test = wavfile.read("./genres/heibao-wudizirong-remix.wav")
testdata_fft_features = abs(fft(test))[:1000]
# print(sample_rate, testdata_fft_features, len(testdata_fft_features))
type_index = model.predict([testdata_fft_features])
print(type_index)
print(genre_list[type_index[0]])