输出:
w_true.shape: (2,)
w_reshape.shape: (2, 1)
Y.shape : (3000, 1)
features[0]: tf.Tensor([-0.4468004 -1.99245 ], shape=(2,), dtype=float32)
lables[0]: tf.Tensor([-1.6724218], shape=(1,), dtype=float32)
tf.Tensor([0.20223746 0.37185124], shape=(2,), dtype=float32)
tf.Tensor([4.583185], shape=(1,), dtype=float32)
w_pred: <tf.Variable 'Variable:0' shape=(2, 1) dtype=float32, numpy=
array([[0.898227 ],
[4.1819596]], dtype=float32)>
b_pred: <tf.Variable 'Variable:0' shape=(1,) dtype=float32, numpy=array([-1.8331218], dtype=float32)>
begin to train:
epoch: 0 loss: tf.Tensor(0.06641422, shape=(), dtype=float32)
epoch: 1 loss: tf.Tensor(0.00013125899, shape=(), dtype=float32)
epoch: 2 loss: tf.Tensor(7.7137787e-07, shape=(), dtype=float32)
epoch: 3 loss: tf.Tensor(3.143039e-07, shape=(), dtype=float32)
epoch: 4 loss: tf.Tensor(3.2408622e-07, shape=(), dtype=float32)
epoch: 5 loss: tf.Tensor(4.1735092e-07, shape=(), dtype=float32)
epoch: 6 loss: tf.Tensor(4.5431543e-07, shape=(), dtype=float32)
epoch: 7 loss: tf.Tensor(4.2399319e-07, shape=(), dtype=float32)
epoch: 8 loss: tf.Tensor(4.0079112e-07, shape=(), dtype=float32)
epoch: 9 loss: tf.Tensor(3.8878218e-07, shape=(), dtype=float32)
w_true: tf.Tensor([6. 1.], shape=(2,), dtype=float32)
w_pred: tf.Tensor([6.0000153 0.99999523], shape=(2,), dtype=float32)
b_true: 3
b_pred: <tf.Variable 'Variable:0' shape=(1,) dtype=float32, numpy=array([2.9999723], dtype=float32)>
代码:
# -*- coding: utf-8 -*-
"""
Created on Wed Mar 15 15:43:47 2023
@author: Shawn.Li
"""
import tensorflow as tf
import random
from d2l import tensorflow as d2l
from matplotlib import pyplot as plt
# 根据线性方程生成一个数据集
# 假设线性方程的系数为 [4, 2.4] shape = 2,
# 偏置为 3
# 生成的数据集大小为 3000 个
def synthetic_data(w,b,samples_num): #@save
X = tf.random.normal((samples_num,w.shape[0])) # samples_num * w.shape([0]) = 3000 * 2
w_reshape = tf.reshape(w,(-1,1))
print("w_reshape.shape:",w_reshape.shape)
Y = tf.matmul(X,w_reshape) # w不做reshape就只是一个一维的张量,无法与x进行矩阵乘法计算
Y = Y + b
Y += tf.random.normal(Y.shape,stddev = 0.001) # Y.shape = 3000*1
print("Y.shape :",Y.shape)
return X,Y
w_true = tf.constant([6., 1]) # 定义这个张量后,维度只是一维的,与X进行矩阵乘法时候需要 renshape
b_true = 3
print("w_true.shape:",w_true.shape)
features ,lables = synthetic_data(w_true,b_true,3000)
print("features[0]:",features[0])
print("lables[0]:",lables[0])
# 在坐标图中绘制(features,lables)
d2l.set_figsize()
d2l.plt.scatter(features[:,1].numpy(),lables.numpy(),0.1)
d2l.plt.scatter(features[:,0].numpy(),lables.numpy(),0.1)
# 这个地方是有问题的,如果是线性函数,那么注释掉26行的代码,图像中应该显示的是一条直线
# 实际情况并不是这个样子,没有看出来是哪里出了问题,应该是对某个指令的理解错误了
fig = plt.figure()
axe1 = plt.axes(projection = '3d')
axe1.scatter(features[:,0].numpy(),features[:,1].numpy(),lables.numpy(),s=0.01)
plt.show()
# 随机的批量迭代读取数据集中的数据
# 关于 yield 的一个通俗的解释:https://blog.csdn.net/mieleizhi0522/article/details/82142856/
# 数据迭代器,通过不断的调用data_iter.next(),或者是for循环,从数据集中随机取出batch_size大小的一组数据
# tf.gather 根据indices(索引)从张量中抽取数据
def data_iter(features,lables,batch_size):
examples_num = len(lables)
#print("examples_num:",examples_num)
indices = list(range(examples_num))
#print("indices:",indices[0:5])
random.shuffle(indices) # 注意这里不是通过return返回结果,而是类似于C里面的指针返回结果,因为没注意到这个,调试了半个多小时
#print("indices:",indices[0:5])
for i in range(0,examples_num,batch_size):
#print("i:",i)
j = tf.constant(indices[i:min(i+batch_size,examples_num)])
#print("j:",j)
yield tf.gather(features,j) , tf.gather(lables,j)
batch_data = data_iter(features,lables,batch_size = 20)
for x,y in batch_data:
print(x[0])
print(y[0])
break
# 在线性回归中我们的目标模型是已知的,模型参数是未知的,我们正是通过不断的迭代数据集中的
# 数据,反向逼近模型参数
# 我们根据模型的形式先创建它,并进行参数随机初始化
def linear_regression(w,x,b):
return (tf.matmul(x,w) + b)
w_pred = tf.Variable(tf.random.normal([2,1]),trainable=True) # 注意标注为可训练型变量
b_pred = tf.Variable(tf.random.normal([1]),trainable=True)
print("w_pred:",w_pred,"\nb_pred:",b_pred)
# 然后设计一个损失函数和优化算法
def calc_lost(y,y_hat):
return ((y_hat - tf.reshape(y,y_hat.shape))**2/2) # 平方损失函数
def sgd(paramaters,grads,learn_rate,batch_size): # 梯度下降法优化函数
for param,grad in zip(paramaters,grads): # zip 是个迭代器,用于逐个取出序列元素
param.assign_sub(learn_rate*grad/batch_size)
# 进行线性回归的计算,也就是训练
lr = 0.03 # 学习率
epochs_num = 10 # 训练轮次
batch_size = 30
print("begin to train:")
for epoch in range(epochs_num): # 相当于3000组样本数据被打乱重排了5次,系数被优化了 3000/batch_size * epochs_num 次
batch_data1 = data_iter(features,lables,batch_size)
#print("data iter success")
for x_iter,y_iter in batch_data1:
with tf.GradientTape() as g: # 记录计算过程,用于自动梯度计算
model = linear_regression(w_pred, x_iter, b_pred) # 利用模型函数和初始化的模型参数创建模型
#print("build mode success")
loss = calc_lost(y_iter,model) # 计算训练损失
#print("loss calculate success")
w_grad,b_grad = g.gradient(loss,[w_pred,b_pred]) # 计算损失函数关于预测参数的梯度
sgd([w_pred,b_pred],[w_grad,b_grad],lr,batch_size) # 沿梯度反方向优化预测参数
# 计算每轮训练的效果,用损失函数表示
epoch_loss = calc_lost(y_iter,linear_regression(w_pred,x_iter,b_pred))
print("epoch:",epoch,"loss:",tf.reduce_mean(epoch_loss))
print("w_true:",w_true)
print("w_pred:",tf.reshape(w_pred,w_true.shape))
print("b_true:",b_true)
print("b_pred:",b_pred)