可以做类似波士顿房价的预测,等等多个输入变量预测单个输出变量
按列读取影响因素
import numpy as np
import pandas as pd
file = r'C:\Users\xy\Desktop\temp.xlsx'
data = pd.read_excel(file)
data.columns = ['y', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x9','x10']
数据预处理
我这里的数据没做归一化,可以使用scaler中的包去做下归一化效果更好
import statsmodels.api as sm
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn import metrics
from sklearn.model_selection import train_test_split
dataX = sm.add_constant(data.iloc[:,1:])
dataY = data['y']
print(dataX.shape)
print(dataY.shape)
x = np.array(dataX)
x=np.expand_dims(x,axis=2)
y = np.array(dataY)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1)
print(x.shape)
1DCNN的输入也需要三维Tensor,这里通过expand_dims扩充一维特征向量
模型搭建
模型只是简单的搭了个CNN,存在很大优化空间
from tensorflow.keras.utils import plot_model
from tensorflow.keras import backend as K
def ss_dect(y_true, y_pred):
SS_res = K.sum(K.square( y_true-y_pred ))
SS_tot = K.sum(K.square( y_true - K.mean(y_true) ) )
return ( 1 - SS_res/(SS_tot + K.epsilon()) )
model = Sequential()
model.add(Conv1D(16, 3,input_shape=(11,1), activation='relu'))
model.add(Conv1D(32, 3, activation='relu'))
# model.add(MaxPooling1D(3))
model.add(Conv1D(32, 3, activation='relu'))
model.add(Conv1D(64, 3, activation='relu'))
model.add(Conv1D(64, 3, activation='relu'))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(1, activation='linear'))
print(model.summary())
plot_model(model, to_file='./cnn_model.png', show_shapes=True)
model.compile(optimizer='adam', loss='mse', metrics=[ss_dect])
model.fit(x_train,y_train, validation_data=(x_test, y_test),epochs=1000, batch_size=8)
实验结果评价
scores = model.evaluate(x_test,y_test,verbose=0)
print('accuracy:%.2f%%'%(scores[1]*100))
predicted = model.predict(x_test)
result =abs(np.mean(predicted - y_test))
print(result)