问题描述:
dnn深度神经网络,或者说mlp多层感知器的实现,通过加载excel的数据,即划分好的训练集和测试集(如何加载和划分,在我另一篇博客里),训练dnn,实现回归预测并将结果保存至指定excel文件中。使用时将修改下文件路径即可使用。
程序是在jupyter notebook下完成的,如想在pycharm下运行,需做少量修改。
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import tensorflow as tf
import xlrd
import os
from openpyxl import load_workbook
from xlutils.copy import copy
# In[2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')
# In[106]:
#dtrain = pd.read_csv('data/train_csv.csv',header=0)
#dtest = pd.read_csv('data/test_csv.csv',header=0)
dtrain = pd.read_excel('data_4000/train_split.xls',header=0)
dtest = pd.read_excel('data_4000/test_split.xls',header=0)
x_train = dtrain.iloc[:,0:6]
y_train = dtrain.iloc[:,8:10]
x_test = dtest.iloc[:,0:6]
y_test = dtest.iloc[:,8:10]
# In[107]:
x_train_index = x_train.dtypes[x_train.dtypes != 'object'].index
x_train[x_train_index] = x_train[x_train_index].apply((lambda x: (x - x.mean()) / (x.std())))
# In[108]:
x_test_index = x_test.dtypes[x_test.dtypes != 'object'].index
x_test[x_test_index] = x_test[x_test_index].apply((lambda x: (x - x.mean()) / (x.std())))
# In[109]:
model = tf.keras.Sequential([tf.keras.layers.Dense(100,input_shape=(6,),activation='relu'),
tf.keras.layers.Dense(100,activation='relu'),
#tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(500,activation='relu'),
#tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(500,activation='relu'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(500,activation='relu'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(500,activation='relu'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(500,activation='relu'),
#tf.keras.layers.Dropout(0.1),
tf.keras.layers.Dense(2)])
# In[110]:
model.summary()
# In[ ]:
model.compile(optimizer='adam',
loss='mse')
def scheduler(epoch):
# 前50个epoch学习率保持不变,5个epoch后学习率按比例衰减
if epoch < 10:
return 0.001
elif epoch < 50:
return 0.0001
else:
lr = 0.0001 * tf.math.exp(0.01 * (50 - epoch))
return lr.numpy()
callbacks = [
tf.keras.callbacks.ModelCheckpoint('./models/weights.hdf5',
monitor='loss',
save_best_only=True),
#tf.keras.callbacks.EarlyStopping(patience=10, min_delta=1e-4),
tf.keras.callbacks.LearningRateScheduler(scheduler)]
history = model.fit(x_train,y_train,epochs=150, verbose=1, callbacks=callbacks)
# In[105]:
result = model.evaluate(x_test, y_test)
model.predict(x_test)
# In[65]:
if result <7.7:
model.save('/models/')
# In[67]:
predict = model.predict(x_test)
print(predict.shape)
def write_excel_xls(path, value):
index = len(value) # 获取需要写入数据的行数
workbook = xlrd.open_workbook(path) # 打开工作簿
sheets = workbook.sheet_names() # 获取工作簿中的所有表格
worksheet = workbook.sheet_by_name(sheets[0]) # 获取工作簿中所有表格中的的第一个表格
rows_old = worksheet.nrows # 获取表格中已存在的数据的行数
new_workbook = copy(workbook) # 将xlrd对象拷贝转化为xlwt对象
new_worksheet = new_workbook.get_sheet(0) # 获取转化后工作簿中的第一个表格
for i in range(0,worksheet.nrows):
for j in range(0,worksheet.ncols):
new_worksheet.write(i, j, 0)
for i in range(0, index):
for j in range(0, len(value[i])):
a= value[i][j].astype(np.float64)
new_worksheet.write(i, j, a) # 追加写入数据,注意是从i+rows_old行开始写入
new_workbook.save(path) # 保存工作簿
aim_pred_file =r'C:\Users\gj7520\Desktop\pythob_files\dnn2\data_4000\pre_data_77.xls'
write_excel_xls(aim_pred_file,predict)
# In[68]:
def wirte_gt_test(aim_path, gt_test_path):
gt_test = pd.read_excel(gt_test_path,header= 0)
value = np.array(gt_test.iloc[:,8:10])
print(value.shape)
index = len(value) # 获取需要写入数据的行数
workbook = xlrd.open_workbook(aim_path) # 打开工作簿
sheets = workbook.sheet_names() # 获取工作簿中的所有表格
worksheet = workbook.sheet_by_name(sheets[0]) # 获取工作簿中所有表格中的的第一个表格
rows_old = worksheet.nrows # 获取表格中已存在的数据的行数
new_workbook = copy(workbook) # 将xlrd对象拷贝转化为xlwt对象
new_worksheet = new_workbook.get_sheet(0) # 获取转化后工作簿中的第一个表格
for i in range(0,worksheet.nrows):
for j in range(2,worksheet.ncols):
new_worksheet.write(i, j, 0)
for i in range(0, index):
for j in range(0, len(value[i])):
a = value[i][j].astype(np.float64)
new_worksheet.write(i , j + 2, a) # 追加写入数据,注意是从i+rows_old行开始写入
new_workbook.save(aim_path) # 保存工作簿
gt_test_file = 'data_4000/test_split.xls'
wirte_gt_test(aim_pred_file,gt_test_file)
# In[60]:
plt.plot(history.history['loss'])
# In[ ]:
# In[ ]: