导入需要的模块
import os
import zipfile
import paddle
import numpy as np
from matplotlib import pyplot as plt
设置dtype默认为float64 使用paddle调入uci_housing数据集
先设置数据类型的好处在于可以避免在散点图绘制时,因数据类型不匹配而报错
paddle.set_default_dtype('float64') #设置全局变量
print("数据开始加载")
train_dataset = paddle.text.datasets.UCIHousing(mode='train')
eval_dataset = paddle.text.datasets.UCIHousing(mode='test')
train_loader = paddle.io.DataLoader(train_dataset, batch_size=32, shuffle=True)
eval_loader = paddle.io.DataLoader(eval_dataset, batch_size=32, shuffle=False)
print("数据加载完成")
书写训练模型
我们假定波士顿房价是符合线性的
#线性回归训练模型
class Regressor(paddle.nn.Layer):
def __init__(self):
super(Regressor, self).__init__()
self.linear = paddle.nn.Linear(13, 1, None) #None 表示不使用激活函数
def forward(self, inputs):
x = self.linear(inputs)
return x
开始训练
#调用模型,并传入参数
mode = Regressor()
mode.train() #paddle 自带函数 表训练模型
mse_loss = paddle.nn.MSELoss() #mse 均方误差 处理损失函数 该值越小精确度越高
opt = paddle.optimizer.SGD(learning_rate = 0.005, parameters=mode.parameters()) #opt 函数优化,使预测值不断逼近真实值
#SGD随机梯度下降
epoch_nums = 200 #训练数 200轮
all_train_accs = [] #存放训练精度,便于之后绘制图像
all_train_loss = [] #存放训练损失,便于之后绘制图像
Batch = 0
Batches = []
for epoch_num in range(epoch_nums):
for batch_id, data in enumerate(train_loader()):
feature = data[0]
label = data[1]
predict = mode(feature)
loss = mse_loss(predict, label)
loss.backward() #数据反传
opt.step() #自动完成模型优化和参数更新
opt.clear_grad() #清零,防止梯度累加
#可视化输出,便于查看数据进行调整
if batch_id != 0 and batch_id%10 == 0:
Batch += 10
Batches.append(Batch)
all_train_loss.append(loss.numpy()[0])
#all_train_accs.append(accs.numpy()[0])
print('epoch:{}, step:{}, train_loss:{}'.format(epoch_num, batch_id, loss.numpy()[0]))
paddle.save(mode.state_dict(),'Regressor') #保存训练结果,方便后面整理
绘制折线图观察损失收敛的情况
#损失制图,可视化操作,观察训练结果
def draw_train_loss(Batches, train_loss):
title = 'training loss'
plt.title(title, fontsize = 24)
plt.xlabel('Batch', fontsize = 16)
plt.ylabel('Loss', fontsize = 16)
plt.plot(Batches, train_loss, color = 'red', label = 'training loss')
plt.legend()
plt.grid()
plt.show()
draw_train_loss(Batches, all_train_loss)
输出结果:
训练结果还是可以接受的,和老师讲的也相近
模型评估 看看训练的好不好
#模型评估 输出在验证集上的损失值
para_state_dict = paddle.load('Regressor')
mode = Regressor() #虽然上边已经调用过一次了,但这里为了代码的可复用还是再声明一次
mode.set_state_dict(para_state_dict) #加载模型参数
mode.eval() #测试模式
losses = []
infer_truths = []
ground_truths = []
for batch_id, data in enumerate(eval_loader):
feature = data[0]
label = data[1]
ground_truths.extend(label.numpy())
predict = mode(feature)
infer_truths.extend(predict.numpy())
loss = mse_loss(predict, label)
losses.append(loss.numpy()[0])
avg_loss = np.mean(losses)
print("当前训练模型载验证集上的损失值是:", avg_loss)
结果:
当前训练模型载验证集上的损失值是: 14.965278549837329
散点图对比
def draw_infer_result(ground_truths, infer_truths):
title = 'BPSTON'
plt.title(title, fontsize = 32)
x = np.arange(1,30)
y = x
plt.plot(x,y)
plt.xlabel('Ground',fontsize = 16)
plt.ylabel('Infer',fontsize = 16)
plt.scatter(ground_truths, infer_truths, color = 'green', label='training cost')
plt.legend()
plt.grid()
plt.show()
调用:
draw_infer_result(ground_truths, infer_truths)
输出结果:
提问:list.extend() 和 list.append() 有什么不同,代码中,为什么有的用了extend 有的用了append?
心得总结:
哎啊,初学搞懂这些确实不容易,现在可以自信地说这些代码懂了80%了吧,还蛮开心的。
因为一直有学校的专业课程,所以就是断断续续,到今天差不多有20多天了吧,总算可以独立把代码敲下来。或许三天打鱼两天晒网也是一个不错的学习方法,哈哈,因为顺应自然的节奏,全靠自然驱动,然后和顿悟撞个满怀。
飞桨的"问号"查询法帮助还挺大的,具体怎么操作,我放在下方的代码块中。看英文其实比理解中午更直接一点,一些语义翻译成中文总是有点绕口。
好了,这是今天的小确幸,做一个快乐学习的人儿。
"问号"查询方式
?paddle.nn.Layer
查询结果
Init signature: paddle.nn.Layer(name_scope=None, dtype='float32') Docstring: Dynamic graph Layer based on OOD, includes the parameters of the layer, the structure of the forward graph and so on. Parameters: name_scope (str, optional): prefix name used by the layer to name parameters. If prefix is "my_layer", parameter name in MyLayer can be "my_layer_0.w_n", where "w" is the parameter base name and "n" is an unique suffix auto-generated. If None, prefix name will be snake cased class name. Default: None. dtype(str, optional): data type of this parameter. If set str, it can be "bool", "float16", "float32", "float64", "int8", "int16", "int32", "int64", "uint8" or "uint16". Default: "float32" Returns: None Init docstring: __init__(self: paddle.fluid.core_avx.Layer) -> None File: /opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/dygraph/layers.py Type: type Subclasses: Sequential, ParameterList, LayerList, Conv2D, Conv3D, Conv3DTranspose, Pool2D, Linear, InstanceNorm, BatchNorm, ...