***一维神经网络做回归预测(3个自变量和5个不同的因变量)
一 数据集的加载及归一化
知识点:
- las文件读取
- 矩阵堆叠
- 矩阵列命名(方便后面看输出矩阵的自变量和因变量是什么)
- 归一化
fileName_23 = "井/WY23.las"
fileName_1 = '井/weiye1jgw.las' # 31904~35880
fileName_11 = '井/weiye11jgw.las' # 32800~37640
fileName_29 = '井/wy29jgw.las' # 32800~36980
fileName_35 = "井/WY35.las"
# fileName_9 = '井/wy9jgw' 用不了
las_23 = lasio.read(fileName_23)
las_1 = lasio.read(fileName_1)
las_11 = lasio.read(fileName_11)
las_29 = lasio.read(fileName_29)
las_35 = lasio.read(fileName_35)
a = [3, 7, 11] #DTS DT RHOB
b = [9, 6, 10, 12, 1] #GASV, PHI, GR, BI, TOC
# 自变量
Para_VVR_23 = las_23.data[34576:38576, a]
Para_VVR_1 = las_1.data[31904:35880, a]
Para_VVR_11 = las_11.data[32800:37640, a]
Para_VVR_29 = las_29.data[32800:36980, a]
Para_VVR_35 = las_35.data[26601:27849, a]
Para_VVR = np.vstack([Para_VVR_23, Para_VVR_1, Para_VVR_11, Para_VVR_29, Para_VVR_35])
df_Para = pd.DataFrame(Para_VVR, columns=['DTS', 'DT', 'RHOB'])
#因变量
Aim_23 = las_23.data[34576:38576, b]
Aim_1 = las_1.data[31904:35880, b]
Aim_11 = las_11.data[32800:37640, b]
Aim_29 = las_29.data[32800:36980, b]
Aim_35 = las_35.data[26601:27849, b] #las_35.data[26601:27849, selct_num] las_23.data[34576:38576, selct_num]
Aim = np.vstack([Aim_23, Aim_1, Aim_11, Aim_29, Aim_35])
df_Aim = pd.DataFrame(Aim, columns=['GASV', 'PHI', 'GR', 'BI', 'TOC'])
selct_num = [0, 1, 2, 3, 4, 5, 6, 7]
data = np.hstack([Para_VVR, Target])
data_column = data.shape[1]
# 归一化
min_max_scaler = preprocessing.MinMaxScaler()
NL_data = min_max_scaler.fit_transform(data)
NL_input_data = NL_data[:, :data_column - 1]
NL_input_data = NL_input_data.reshape((-1, 1, data_column - 1)) # 由外向内看,三个大括号里有3000个两个括号,两个括号里有5个,一个括号里有一个
NL_output_data = NL_data[:, -1]
# 训练集、测试集分开
NL_train_X_data = NL_input_data[0:16996]
print("NL_train_X_data.shape: ", NL_train_X_data.shape)
NL_train_Y_data = NL_output_data[0:16996]
NL_test_data = NL_input_data[16996:-1]
print("NL_test_data.shape: ", NL_test_data.shape)
二 模型的建立
def oned_cnn_model(sw_width, n_features, NL_train_X_data, NL_train_Y_data, NL_test_data, epoch_num, verbose_set):
model = Sequential()
# 对于一维卷积来说,data_format='channels_last'是默认配置,该API的规则如下:
# 输入形状为:(batch, steps, channels);输出形状为:(batch, new_steps, filters),padding和strides的变化会导致new_steps变化
# 如果设置为data_format = 'channels_first',则要求输入形状为: (batch, channels, steps).通道数在前还是在后的问题
model.add(Conv1D(filters=3, kernel_size=3, activation='relu',
strides=1, padding='same', data_format='channels_first'))
model.add(Conv1D(filters=3, kernel_size=3, activation='relu',
strides=1, padding='same', data_format='channels_first'))
model.add(MaxPooling1D(pool_size=3, strides=None, padding='same',
data_format='channels_first'))
model.add(Conv1D(filters=6, kernel_size=3, activation='relu',
strides=1, padding='same', data_format='channels_first'))
model.add(Conv1D(filters=6, kernel_size=3, activation='relu',
strides=1, padding='same', data_format='channels_first'))
model.add(MaxPooling1D(pool_size=3, strides=None, padding='same',
data_format='channels_first'))
model.add(Conv1D(filters=9, kernel_size=3, activation='relu',
strides=1, padding='same', data_format='channels_first'))
model.add(Conv1D(filters=9, kernel_size=3, activation='relu',
strides=1, padding='same', data_format='channels_first'))
# 对于一维池化层来说,data_format='channels_last'是默认配置,该API的规则如下:
# 3D 张量的输入形状为: (batch_size, steps, features);输出3D张量的形same状为:(batch_size, downsampled_steps, features)
# 如果设置为data_formsameat = 'channels_first',则要求输入形状为:(batch_size, features, steps)
model.add(MaxPooling1D(pool_size=3, strides=None, padding='same',
data_format='channels_first'))
# data_format参数的作用是在将模型从一种数据格式切换到另一种数据格式时保留权重顺序。默认为channels_last。
# 如果设置为channels_last,那么数据输入形状应为:(batch,…,channels);如果设置为channels_first,那么数据输入形状应该为(batch,channels,…)
# 输出为(batch, 之后参数尺寸的乘积)
model.add(Flatten())
# Dense执行以下操作:output=activation(dot(input,kernel)+bias),
# 其中,activation是激活函数,kernel是由层创建的权重矩阵,bias是由层创建的偏移向量(仅当use_bias为True时适用)。
# 2D 输入:(batch_size, input_dim);对应 2D 输出:(batch_size, units)
model.add(Dense(units=30, activation='relu',
use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', ))
# 因为要预测下一个时间步的值,因此units设置为1
model.add(Dense(units=1))
# 配置模型
model.compile(optimizer='adam', loss='mse',
metrics=['accuracy'], loss_weights=None, sample_weight_mode=None, weighted_metrics=None,
target_tensors=None)
# X为输入数据,y为数据标签;batch_size:每次梯度更新的样本数,默认为32。
# verbose: 0,1,2. 0=训练过程无输出,1=显示训练过程进度条,2=每训练一个epoch打印一次信息
history = model.fit(NL_train_X_data, NL_train_Y_data, batch_size=20, epochs=epoch_num, verbose=verbose_set)
print('\n', model.summary())
yhat = model.predict(np.array(NL_test_data), verbose=0)
yhat = yhat.reshape(yhat.shape[0], 1)
print("yhat.shape:", yhat.shape)
# 将test和yhat放在一起进行还原 `
return yhat, model, history
三 训练
yhat, model, history = oned_cnn_model(sw_width, n_features, NL_train_X_data, NL_train_Y_data, NL_test_data,
epoch_num, verbose_set) # 这里改变了原代码n_features, sw_width的位置,由数据格式来定
NL_test_data = NL_test_data.reshape(-1, data_column - 1)
NL_test_yhat = np.hstack((NL_test_data, yhat))
test_yhat = min_max_scaler.inverse_transform(NL_test_yhat)
yhat = test_yhat[:, -1]
四 结果输出到txt并将结果图按照因变量名称保存到文件夹中
知识点
- 可输出矩阵每列所指变量名和变量
- 这里输出了每一组数据的R2
- 画图先画了因变量和自变量,再把预测值画再因变量那个图上看效果
with open(r'D:\software\pycharm\PyCharm 2019.3.3\projects\predict_GASV\结果\R2\四口井3to5.txt', 'a') as f:
# for k in range(8):
# f.writelines([df_Para.keys()[k], ' ']) #writelines可以写入列表形式的字符串,即把字符串放在列表中写出
f.writelines([df_Aim.keys()[i], ': '])
# f.write('predict')
# f.write("\n")
# np.savetxt(f, np.around(result, 3), delimiter=" ")
f.writelines(str(r2_score(las_35.data[26601:27848, b[i]], yhat)))
f.write("\n")
# f.write("\n")
# f.write("\n")
# 画图
name = las_35.keys()[b[i]]
fig = plt.figure(figsize=(10, 9)) # 作图画布大小
for f in range(4):
if(f<3):
ax = fig.add_subplot(1, 4, f + 1) # 图布一行len(a)列,画在第f+1块
ax.plot(result[:, f], las_35.index[26601:27848])
ax.set_xlabel(df_Para.keys()[f])
ax.xaxis.tick_top()
ax.invert_yaxis()
plt.tick_params(labelsize=6)
else:
ax = fig.add_subplot(1, 4, f+1)
ax.plot(Target[16996:-1, ], las_35.index[26601:27848])
ax.set_xlabel(las_35.keys()[b[i]])
ax.xaxis.tick_top()
plt.tick_params(labelsize=6)
ax = fig.add_subplot(1, 4, 4)
ax.plot(yhat, las_35.index[26601:27848], color="red")
ax.set_xlabel(las_35.keys()[b[i]])
ax.xaxis.tick_top()
ax.invert_yaxis()
plt.tick_params(labelsize=6)
print("name: ", name)
plt.title(name)
plt.savefig(r"D:\software\pycharm\PyCharm 2019.3.3\projects\predict_GASV\结果\图片\四口井3to5\threecon/" + name + ".png")
plt.show()
出来的效果图
五 遇到的问题
- las文件的读取:利用lasio.read()函数
- 构造训练集和测试集:python中选择行和列[a:b,c:d],逗号前是行,表示从第a+1行开始到b+1行结束(数组中是从0开始算的);逗号后是列
- 模型的建立:这里的模型选择了三层,其实我觉得做回归,而且自变量个数很少,凭头铁的直觉不需要三层。这里选择了channel_first,也就是通道数为1。类比图片,一个位置,由3个参数决定,而这里一个位置应该只有1个参数决定,所以是1个通道数。