在这里插入代码片
基本库
from audioop import mul
from functools import lru_cache
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold,StratifiedKFold
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import minmax_scale
from keras.models import Sequential, Model
from keras.layers import Conv2D, Flatten, Dense, MaxPool2D, Dropout, LSTM, BatchNormalization, Input, Conv1D,\
BatchNormalization, GlobalAveragePooling1D, concatenate, Permute, Dropout, MaxPool1D, Flatten, \
Reshape, Lambda, RepeatVector, Multiply
from keras.utils import to_categorical
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import os
import librosa
import librosa.display
import glob
from tqdm import tqdm
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping, CSVLogger, TensorBoard
from keras import optimizers
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.ensemble import VotingClassifier
from keras import regularizers
from sklearn.utils import class_weight
label_dict = {'aloe': 0, 'burger': 1, 'cabbage': 2,'candied_fruits':3, 'carrots': 4, 'chips':5,
'chocolate': 6, 'drinks': 7, 'fries': 8, 'grapes': 9, 'gummies': 10, 'ice-cream':11,
'jelly': 12, 'noodles': 13, 'pickles': 14, 'pizza': 15, 'ribs': 16, 'salmon':17,
'soup': 18, 'wings': 19}
label_dict_inv = {v:k for k,v in label_dict.items()}
def extract_features(parent_dir, sub_dirs, max_file=10, file_ext="*.wav", flag="mix"):
c = 0
label, feature = [], []
for sub_dir in sub_dirs:
for fn in tqdm(glob.glob(os.path.join(parent_dir, sub_dir, file_ext))[:max_file]):
label_name = fn.split('/')[-1].split('\\')[0]
label.extend([label_dict[label_name]])
X, sample_rate = librosa.load(fn, res_type='kaiser_fast')
if flag == "mfcc":
mfcc = np.mean(librosa.feature.mfcc(y=X,
sr=sample_rate,
n_mfcc=128).T,
axis=0)
feature.append(mfcc)
elif flag == "mix":
n0 = 9000
n1 = 9100
mfcc = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=128).T,axis=0)
zero_crossings = librosa.zero_crossings(X[n0:n1], pad=False)
temp = np.hstack((mfcc,zero_crossings))
hop_length = 512
chromagram = np.mean(librosa.feature.chroma_stft(X, sr=sample_rate, hop_length=hop_length).T,axis=0)
temp = np.hstack((temp,chromagram))
feature.append(temp)
else:
mels = np.mean(librosa.feature.melspectrogram(y=X,
sr=sample_rate).T,
axis=0)
feature.extend([mels])
return [feature, label]
parent_dir = './train/'
save_dir = "./"
folds = sub_dirs = np.array(['aloe','burger','cabbage','candied_fruits',
'carrots','chips','chocolate','drinks','fries',
'grapes','gummies','ice-cream','jelly','noodles','pickles',
'pizza','ribs','salmon','soup','wings'])
def dnn(cnn_shape = (16, 8, 1), lstm_shape = (128, 1)):
cnn_input = Input(shape=cnn_shape, name='cnn_input')
lstm_input = Input(shape=lstm_shape, name='lstm_input')
input_dim = (16, 8, 1)
x = LSTM(64, return_sequences=False)(lstm_input)
x = Dense(64,activation='softmax')(x)
y1 = Conv1D(64, 5, padding='same', activation='relu')(lstm_input)
y1 = MaxPool1D(pool_size=3)(y1)
dim_num = y1.shape[1]
x = RepeatVector(dim_num)(x)
y1 = Multiply()([y1, x])
y = Conv2D(64, (3, 3), padding="same", activation="relu", input_shape=input_dim)(cnn_input)
y = BatchNormalization()(y)
y = MaxPool2D(pool_size=(2, 2))(y)
y = Dropout(0.15)(y)
y = Conv2D(128, (3, 3), padding="same", activation="relu")(y)
y = BatchNormalization()(y)
y = MaxPool2D(pool_size=(2, 2))(y)
y = Dropout(0.3)(y)
y = Conv2D(128, (3, 3), padding="same", activation="relu")(y)
y = BatchNormalization()(y)
y = Dropout(0.2)(y)
y = Reshape((-1, 64))(y)
output = concatenate([y, y1], axis=1)
output = GlobalAveragePooling1D()(output)
output = Dense(20, activation="softmax")(output)
model = Model(inputs=[cnn_input, lstm_input], outputs=output)
model.summary()
optimizer = optimizers.Adam(lr=0.001)
model.compile(optimizer = optimizer,
loss = 'categorical_crossentropy',
metrics = ['accuracy'])
return model
def cnn(input_shape=(16, 8, 1)):
model_conv = Sequential()
input_dim = input_shape
model_conv.add(Conv2D(64, (5, 5), padding = "same", activation = "relu", input_shape = input_dim))
model_conv.add(MaxPool2D(pool_size=(2, 2)))
model_conv.add(Conv2D(128, (3, 3), padding = "same", activation = "relu"))
model_conv.add(MaxPool2D(pool_size=(2, 2)))
model_conv.add(Dropout(0.1))
model_conv.add(Flatten())
model_conv.add(Dense(1024, activation = "relu"))
model_conv.add(Dense(100, activation='relu'))
model_conv.add(Dense(20, activation = "softmax"))
optimizer = optimizers.Adam(lr=0.001)
model_conv.compile(optimizer = optimizer,
loss = 'categorical_crossentropy',
metrics = ['accuracy'])
model_conv.summary()
return model_conv
def train(model=cnn, nf=True, featrue_path="features_mfcc.npy", label_path="label_mfcc.npy"):
X = np.load(featrue_path)
Y = np.load(label_path)
if nf:
nfold = 5
kf = KFold(n_splits=nfold, shuffle=True, random_state=2020)
i = 0
for train_index, valid_index in kf.split(X, Y):
train_x, val_x = X[train_index],X[valid_index]
train_y, val_y = Y[train_index],Y[valid_index]
checkpoint = ModelCheckpoint("./record/weight/dnn_mfcc-ep{epoch:03d}-loss{loss:.3f}-val_acc{val_acc:.3f}.h5",
monitor="val_acc",
verbose=1, save_best_only=True,
mode='max')
reduce_lr = ReduceLROnPlateau(monitor='val_acc',
patience=100, mode='auto',
factor=0.1, cooldown=0,
min_lr=1e-5,
verbose=1)
csvlogger = CSVLogger(filename='./record/log/train.csv',
append=True)
earlystopping = EarlyStopping(monitor='val_acc',
min_delta=0,
patience=100,
verbose=1,
mode='max')
tensorboard = TensorBoard(log_dir="./record/log/")
if model == cnn:
train_x = train_x.reshape(-1, 16, 8, 1)
val_x = val_x.reshape(-1, 16, 8, 1)
model.fit(train_x, train_y,
epochs = 500,
batch_size = 128,
validation_data = (val_x, val_y),
callbacks=[checkpoint, reduce_lr, csvlogger, earlystopping, tensorboard])
else:
X_train = train_x.reshape(-1, 16, 8, 1)
X_val = val_x.reshape(-1, 16, 8, 1)
lstm_input = train_x.reshape(-1, 128, 1)
lstm_val = val_x.reshape(-1, 128, 1)
model.fit({'cnn_input': X_train, 'lstm_input': lstm_input},
train_y,
epochs=500,
batch_size=128,
validation_data=({'cnn_input': X_val, 'lstm_input': lstm_val}, val_y),
callbacks=[checkpoint, reduce_lr, csvlogger, earlystopping, tensorboard])
else:
train_x, val_x, train_y, val_y = train_test_split(X, Y, random_state = 1, stratify=Y)
if model == cnn:
train_x = train_x.reshape(-1, 16, 8, 1)
val_x = val_x.reshape(-1, 16, 8, 1)
model.fit(train_x, train_y,
epochs = 500,
batch_size = 128,
validation_data = (val_x, val_y),
callbacks=[checkpoint, reduce_lr, csvlogger, earlystopping, tensorboard])
else:
X_train = train_x.reshape(-1, 16, 8, 1)
X_val = val_x.reshape(-1, 16, 8, 1)
lstm_input = train_x.reshape(-1, 128, 1)
lstm_val = val_x.reshape(-1, 128, 1)
model.fit({'cnn_input': X_train, 'lstm_input': lstm_input},
train_y,
epochs=1000, batch_size=128,
validation_data=({'cnn_input': X_val, 'lstm_input': lstm_val}, val_y),
callbacks=[checkpoint, reduce_lr, csvlogger, earlystopping, tensorboard])
def extract_features(test_dir, file_ext="*.wav", flag="mix"):
feature = []
for fn in tqdm(glob.glob(os.path.join(test_dir, file_ext))[:]):
X, sample_rate = librosa.load(fn,res_type='kaiser_fast')
if flag == "mfcc":
mfcc = np.mean(librosa.feature.mfcc(y=X,
sr=sample_rate,
n_mfcc=128).T,
axis=0)
feature.append(mfcc)
elif flag == "mix":
n0 = 9000
n1 = 9100
mfcc = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=128).T,axis=0)
zero_crossings = librosa.zero_crossings(X[n0:n1], pad=False)
temp = np.hstack((mfcc,zero_crossings))
hop_length = 512
chromagram = np.mean(librosa.feature.chroma_stft(X, sr=sample_rate, hop_length=hop_length).T,axis=0)
temp = np.hstack((temp,chromagram))
feature.append(temp)
else:
mels = np.mean(librosa.feature.melspectrogram(y=X,
sr=sample_rate).T,
axis=0)
feature.extend([mels])
return feature
def voting(preds_conv, preds_dense, preds_lstm):
prob_max = np.tile(np.max(preds_conv, axis=1).reshape(-1, 1), preds_conv.shape[1])
preds_c = preds_conv // prob_max
prob_max = np.tile(np.max(preds_dense, axis=1).reshape(-1, 1), preds_dense.shape[1])
preds_d = preds_dense // prob_max
prob_max = np.tile(np.max(preds_lstm, axis=1).reshape(-1, 1), preds_lstm.shape[1])
preds_l = preds_lstm // prob_max
result_voting = preds_c + preds_d + preds_l
preds_voting = np.argmax(result_voting, axis=1)
return preds_voting
def mul_test(cnn, dnn, cnn_weight, dnn_weight, test_path='./test_a/'):
X_test = extract_features(test_path, flag="mfcc")
X_test = np.vstack(X_test)
cnn.load_weights(cnn_weight)
cnn = cnn.predict(X_test.reshape(-1, 16, 8, 1))
dnn.load_weights(dnn_weight)
x_test = X_test.reshape(-1, 16, 8, 1)
lstm_test = X_test.reshape(-1, 128, 1)
dnn = dnn.predict({'cnn_input': x_test, 'lstm_input': lstm_test})
preds = voting(cnn, dnn, dnn)
preds = [label_dict_inv[x] for x in preds]
path = glob.glob('./test_a/*.wav')
result = pd.DataFrame({'name':path, 'label': preds})
result['name'] = result['name'].apply(lambda x: x.split('\\')[-1])
result.to_csv('submit4.csv', index=None)
def single_test(model, weight, test_path='./test_a/'):
X_test = extract_features(test_path, flag="mfcc")
X_test = np.vstack(X_test)
model.load_weights(weight)
x_test = X_test.reshape(-1, 16, 8, 1)
lstm_test = X_test.reshape(-1, 128, 1)
predictions = model.predict({'cnn_input': x_test, 'lstm_input': lstm_test})
preds = np.argmax(predictions, axis = 1)
preds = [label_dict_inv[x] for x in preds]
path = glob.glob('./test_a/*.wav')
result = pd.DataFrame({'name':path, 'label': preds})
result['name'] = result['name'].apply(lambda x: x.split('\\')[-1])
result.to_csv('submit3.csv', index=None)
if __name__ == "__main__":
dnn = dnn(cnn_shape=(16, 8, 1), lstm_shape=(128, 1))
cnn = cnn(input_shape=(16, 8, 1))
cnn_weight = "./record/weight/cnn_mfcc-ep001-loss0.000-val_acc1.000.h5"
dnn_weight = "./record/weight/dnn_mfcc-ep001-loss0.003-val_acc1.000.h5"
mul_test(cnn, dnn, cnn_weight, dnn_weight)