1,数据预处理
1.1 文件路径
import os
from imutils import paths
# 获得该文件夹下所有文件名
filenames = os.listdir(d_path)
# 文件路径组合
os.path.join(d_path, i)
imagePaths = list(paths.list_images(args["dataset"]))
1.2 加载数据
数据格式一般是(样本个数,样本长度)
from scipy.io import loadmat
# 加载mat格式
loadmat(file_path)
1.3 添加标签和one-hot编码
标签有原来的1,2,3,4,5...,转成one-hot编码
# 第一种
from sklearn.preprocessing import OneHotEncoder
# one-hot编码
def one_hot(Train_Y, Test_Y):
Train_Y = np.array(Train_Y).reshape([-1, 1])
Test_Y = np.array(Test_Y).reshape([-1, 1])
Encoder = preprocessing.OneHotEncoder()
Encoder.fit(Train_Y)
Train_Y = Encoder.transform(Train_Y).toarray()
Test_Y = Encoder.transform(Test_Y).toarray()
Train_Y = np.asarray(Train_Y, dtype=np.int32)
Test_Y = np.asarray(Test_Y, dtype=np.int32)
return Train_Y, Test_Y
# 第二种
from tensorflow.python.keras.utils import to_categorical
labels = to_categorical(labels,labels_nums)
1.4 数据标准化
from sklearn import preprocessing
def scalar_stand(Train_X, Test_X):
# 用训练集标准差标准化训练集以及测试集
scalar = preprocessing.StandardScaler().fit(Train_X)
Train_X = scalar.transform(Train_X)
Test_X = scalar.transform(Test_X)
return Train_X, Test_X
1.5 数据切分
最后数据变成:X_valid, Y_valid, X_test, Y_test
def valid_test_slice(Test_X, Test_Y):
test_size = rate[2] / (rate[1] + rate[2])
ss = StratifiedShuffleSplit(n_splits=1, test_size=test_size)
for train_index, test_index in ss.split(Test_X, Test_Y):
X_valid, X_test = Test_X[train_index], Test_X[test_index]
Y_valid, Y_test = Test_Y[train_index], Test_Y[test_index]
return X_valid, Y_valid, X_test, Y_test
2,创建模型
# 实例化一个Sequential
model = Sequential()
#第一层卷积
model.add(Conv1D(filters=32, kernel_size=20, strides=8, padding='same',kernel_regularizer=l2(1e-4), input_shape=input_shape))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling1D(pool_size=4, strides=4, padding='valid'))
# 从卷积到全连接需要展平
model.add(Flatten())
# 添加全连接层
model.add(Dense(units=100, activation='relu', kernel_regularizer=l2(1e-4)))
# 增加输出层,共num_classes个单元
model.add(Dense(units=num_classes, activation='softmax', kernel_regularizer=l2(1e-4)))
keras学习资料: https://blog.csdn.net/sinat_26917383/article/details/72857454
3,训练
# 开始模型训练
model.fit(x=x_train, y=y_train, batch_size=batch_size, epochs=epochs,
verbose=1, validation_data=(x_valid, y_valid), shuffle=True,
callbacks=[tb_cb])
4,测试
# 评估模型
score = model.evaluate(x=x_test, y=y_test, verbose=0)
model.evaluate 和 model.predict 的区别
5,常用包
包名 | 描述 | 参考链接 |
scipy | SciPy是一款方便、易于使用、专为科学和工程设计的python工具包,它包括了统计、优化、整合以及线性代数模块、傅里叶变换、信号和图像图例,常微分方差的求解等 | https://www.jianshu.com/p/6c742912047f |
imutils | 图像处理的工具包 | https://www.jianshu.com/p/bb34ddf2a947 |
matplotlib | 画图包 | https://www.jianshu.com/p/da385a35f68d |
sklearn | Scikit-learn(sklearn)是机器学习中常用的第三方模块,对常用的机器学习方法进行了封装,包括回归(Regression)、降维(Dimensionality Reduction)、分类(Classfication)、聚类(Clustering)等方法 | https://www.jianshu.com/p/6ada34655862 |
keras | 基于 Python 的深度学习库 | https://blog.csdn.net/sinat_26917383/article/details/72857454 |