Task1 食物声音识别-Baseline
!wget http: // tianchi- competition. oss- cn- hangzhou. aliyuncs. com/ 531887 / train_sample. zip
--2021-04-12 23:39:55-- http://tianchi-competition.oss-cn-hangzhou.aliyuncs.com/531887/train_sample.zip
Resolving tianchi-competition.oss-cn-hangzhou.aliyuncs.com (tianchi-competition.oss-cn-hangzhou.aliyuncs.com)... 118.31.232.194
Connecting to tianchi-competition.oss-cn-hangzhou.aliyuncs.com (tianchi-competition.oss-cn-hangzhou.aliyuncs.com)|118.31.232.194|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 540689175 (516M) [application/zip]
Saving to: ‘train_sample.zip’
100%[======================================>] 540,689,175 12.7MB/s in 41s
2021-04-12 23:40:36 (12.5 MB/s) - ‘train_sample.zip’ saved [540689175/540689175]
!unzip - qq train_sample. zip
!\rm train_sample. zip
!wget http: // tianchi- competition. oss- cn- hangzhou. aliyuncs. com/ 531887 / test_a. zip
--2021-04-12 23:41:46-- http://tianchi-competition.oss-cn-hangzhou.aliyuncs.com/531887/test_a.zip
Resolving tianchi-competition.oss-cn-hangzhou.aliyuncs.com (tianchi-competition.oss-cn-hangzhou.aliyuncs.com)... 118.31.232.194
Connecting to tianchi-competition.oss-cn-hangzhou.aliyuncs.com (tianchi-competition.oss-cn-hangzhou.aliyuncs.com)|118.31.232.194|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1092637852 (1.0G) [application/zip]
Saving to: ‘test_a.zip’
100%[====================================>] 1,092,637,852 13.9MB/s in 85s
2021-04-12 23:43:12 (12.2 MB/s) - ‘test_a.zip’ saved [1092637852/1092637852]
!unzip - qq test_a. zip
!\rm test_a. zip
import pandas as pd
import numpy as np
from sklearn. model_selection import train_test_split
from sklearn. metrics import classification_report
from sklearn. model_selection import GridSearchCV
from sklearn. preprocessing import MinMaxScaler
from tensorflow. keras. models import Sequential
from tensorflow. keras. layers import Conv2D, Flatten, Dense, MaxPool2D, Dropout
from tensorflow. keras. utils import to_categorical
from sklearn. ensemble import RandomForestClassifier
from sklearn. svm import SVC
!pip install librosa - - user
import os
import librosa
import librosa. display
import glob
!pip install tqdm
from tqdm import tqdm
def extract_features ( parent_dir, sub_dirs, max_file= 10 , file_text= "*.wav" ) :
c = 0
label, feature = [ ] , [ ]
for sub_dir in sub_dirs:
for fn in tqdm( glob. glob( os. path. join( parent_dir, sub_dir, file_text) ) [ : max_file] ) :
label_name = fn. split( '/' ) [ - 2 ]
'''str = "Line1-abcdef \nLine2-abc \nLine4-abcd";
print str.split( ); # 以空格为分隔符,包含 \n
print str.split(' ', 1 ); # 以空格为分隔符,分隔成两个
['Line1-abcdef', 'Line2-abc', 'Line4-abcd']
['Line1-abcdef', '\nLine2-abc \nLine4-abcd']'''
label. extend( label_dict[ label_name] )
X, sample_rate = librosa. load( fn, res_type= 'kaiser_fast' )
mels = np. mean( librosa. feature. melspectrogram( y= X, sr= sample_rate) . T, axis= 0 )
feature. extend( [ mels] )
return [ feature, label]
parent_dir = './train_sample'
save_dir = './'
folds = sub_dirs = np. array( [ 'aloe' , 'burger' , 'cabbage' , 'candied_fruits' ,
'carrots' , 'chips' , 'chocolate' , 'drinks' , 'fries' ,
'grapes' , 'gummies' , 'ice-cream' , 'jelly' , 'noodles' , 'pickles' ,
'pizza' , 'ribs' , 'salmon' , 'soup' , 'wings' ] )
temp = extract_features( parent_dir, sub_dirs, max_file= 100 )
temp = np. array( temp)
data = temp. transpose( )
'''根据维度进行转置'''
X = np. vstack( data[ : , 0 ] )
Y = np. array( data[ : , 1 ] )
X, shape
Y, shape
X_train, X_test, Y_train, Y_test = train_test_split( X, Y, random_state = 1 , stratify= Y)
print ( '训练集的大小' , len ( X_train) )
print ( '测试集的大小' , len ( X_test) )
X_train = X_train. reshape( - 1 , 16 , 8 , 1 )
X_test = X_test. reshape( - 1 , 16 , 8 , 1 )
model = Sequential( )
input_dim = ( 16 , 8 , 1 )
model. add( Conv2D( 64 , ( 3 , 3 ) , padding = "same" , activation = "tanh" , input_shape = input_dim) )
model. add( MaxPool2D( pool_size= ( 2 , 2 ) ) )
model. add( Conv2D( 128 , ( 3 , 3 ) , padding = "same" , activation = "tanh" ) )
model. add( MaxPool2D( pool_size= ( 2 , 2 ) ) )
model. add( Dropout( 0.1 ) )
model. add( Flatten( ) )
model. add( Dense( 1024 , activation = "tanh" ) )
model. add( Dense( 20 , activation = "softmax" ) )
model. compile ( optimizer = 'adam' , loss = 'categorical_crossentropy' , metrics = [ 'accuracy' ] )
model. summary( )
model. fit( X_train, Y_train, epochs = 20 , batch_size = 15 , validation_data = ( X_test, Y_test) )
'''validation_data用来在每个epoch之后,或者每几个epoch,验证一次验证集,用来及早发现问题,比如过拟合,或者超参数设置有问题。
这样可以方便我们及时调整参数
针对超参的选择我们是根据验证集上的效果来进行调整的,因此验证集可以看做参与到“人工调参”的训练过程;
2)注意训练集、验证集和测试集应该服从同一数据分布,这样我们才能进行玄学调参;
3)测试集是可以没有的,但验证集是必须有的,如果验证集具有足够的泛化代表性,是可以不需要设置测试集的;注意测试集的存在只是为了验证我们在训练集和验证集上进行模型的超参和参数训练后,验证我们得到的模型是否具有泛化性能!'''
def extract_features ( test_dir, file_ext= "*.wav" ) :
feature = [ ]
for fn in tqdm( glob. glob( os. path. join( test_dir, file_ext) ) [ : ] ) :
X, sample_rate = librosa. load( fn, res_type= 'kaiser_fast' )
mels = np. mean( librosa. feature. melspectrogram( y= X, sr= sample_rate) . T, axis= 0 )
feature. extend( [ mels] )
return feature
X_test = extract_features( './test_a/' )
X_test = np. vstack( X_test)
predictions = model. predict( X_test. reshape( - 1 , 16 , 8 , 1 ) )
preds = np. argmax( predictions, axis = 1 )
preds = [ label_dict_inv[ x] for x in preds]
path = glob. glob( './test_a/*.wav' )
result = pd. DataFrame( { 'name' : path, 'label' : preds} )
result[ 'name' ] = result[ 'name' ] . apply ( lambda x: x. split( '/' ) [ - 1 ] )
result. to_csv( 'submit.csv' , index= None )
!ls . / test_a/ * . wav | wc - l
!wc - l submit. csv