数据归一化
假如我们不对数据进行归一化,数据会在损失曲线下降到一般的时候就不再快速下降了。因为再快速下降的时候,是因为某个特征带来的红利,但一旦红利耗尽之后,再次迭代就不再那么明显了,会十分缓慢。
如图:
对数据进行归一化之后再进行线性回归:
归一化手段,示例使用均值归一化,公式如下:
X
:
=
X
−
a
v
g
(
X
)
r
a
n
g
e
X:=\frac{X-avg(X)}{range}
X:=rangeX−avg(X)
这样可以把X控制在
−
0.5
<
X
<
0.5
-0.5<X<0.5
−0.5<X<0.5
import torch
from debug import ptf_tensor
x = torch.tensor([[1, 1, 1], [2, 3, 1], [3, 5, 1],
[4, 2, 1], [5, 4, 1]], dtype=torch.float32)
y = torch.tensor([-10, 12, 14, 16, 18],
dtype=torch.float32)
x_mean, x_std = torch.mean(x, dim=0), torch.std(x, dim=0)
x_mean[-1], x_std[-1] = 0, 1 #因为所有组的x的最后一个元素都是1,为了避免出现nan
x_norm = (x - x_mean) / x_std
ptf_tensor(x_mean,'mean')
ptf_tensor(x_std,'std')
ptf_tensor(x_norm,'norm')
输出:
The info of mean:
#############################
@dims: 1
@size: torch.Size([3])
@ele_sum: 3
@dtype: torch.float32
@data:
tensor([3., 3., 0.])
#############################
The info of std:
#############################
@dims: 1
@size: torch.Size([3])
@ele_sum: 3
@dtype: torch.float32
@data:
tensor([1.5811, 1.5811, 1.0000])
#############################
The info of norm:
#############################
@dims: 2
@size: torch.Size([5, 3])
@ele_sum: 15
@dtype: torch.float32
@data:
tensor([[-1.2649, -1.2649, 1.0000],
[-0.6325, 0.0000, 1.0000],
[ 0.0000, 1.2649, 1.0000],
[ 0.6325, -0.6325, 1.0000],
[ 1.2649, 0.6325, 1.0000]])
#############################
我们对之前的线性回归进行归一化,可以缩小迭代次数。之前我们都是用30001次,现在我们只需要10001次。
import torch
import torch.nn
import torch.optim
x = torch.tensor([[1000000, 0.0001], [2000000, 0.0003],
[3000000, 0.0005], [4000000, 0.0002], [5000000, 0.0004]])
y = torch.tensor([-1000., 1200., 1400., 1600., 1800.]).reshape(-1, 1)
x_mean, x_std = torch.mean(x,dim=0), torch.std(x,dim=0)
x_norm = (x-x_mean)/x_std
y_mean, y_std = torch.mean(y,dim=0), torch.std(y,dim=0)
y_norm=(y-y_mean)/y_std
fc=torch.nn.Linear(2,1)
criterion=torch.nn.MSELoss()
optimizer=torch.optim.Adam(fc.parameters())
for step in range(10001):
if step:
optimizer.zero_grad()
loss_norm.backward()
optimizer.step()
pred_norm=fc(x_norm)
loss_norm=criterion(pred_norm,y_norm)
pred=pred_norm * y_std + y_mean
loss = criterion (pred,y)
if step % 1000 ==0:
print('step = {}, loss = {:g}'.format(step, loss))
输出:
step = 0, loss = 2.25879e+06
step = 1000, loss = 292322
step = 2000, loss = 216877
step = 3000, loss = 213382
step = 4000, loss = 213333
step = 5000, loss = 213333
step = 6000, loss = 213333
step = 7000, loss = 213333
step = 8000, loss = 213333
step = 9000, loss = 213333
step = 10000, loss = 213333
Demo: 世界人口数据线性回归实验
(1)从CSV文件获取数据
url='C:/Users/HUAWEI/Desktop/深度学习/Blog附带代码/population.csv'
df = pd.read_csv(url, index_col=0)
#print(df) #数据预览
years=torch.tensor(df.iloc[:,0],dtype=torch.float32) # 取出第0列数据作为Years
populations=torch.tensor(df.iloc[:,1],dtype=torch.float32) #取出第1列数据作为人口数据
(2)Reshape
x=years.reshape(-1,1) # reshpae(-1,1)等价于reshape(n,1),因为大多时候我们都不知道所有元素的总数
ptf_tensor(x,'years reshape',)
y=populations
'''
这是我们需要的数据形式:
X=[ [,,] , [,,] , [,,] ] # X=m*n m是样本数量,n是特征数
y=[,,,] # Y=m*1
W=[,,] # W=n*1
Y=XW
'''
(3)归一化
def get_mean_std_norm(x):
x_mean,x_std=torch.mean(x),torch.std(x)
x_norm=(x-x_mean)/x_std
return x_mean,x_std,x_norm
x_mean,x_std,x_norm=get_mean_std_norm(x)
y_mean,y_std,y_norm=get_mean_std_norm(y)
(4)建造四大组件:线性容器,标准容器,梯度容器,权值浏览器
Linear,criterion,optimizer,weights和bias
分别用于:
- 计算函数
- 计算当前Loss
- 更新梯度
- 浏览计算过程中的中间结果
fc=torch.nn.Linear(1,1) #Y=XW式子中y的数量和x的数量
criterion=torch.nn.MSELoss()
optimizer=torch.optim.Adam(fc.parameters())
weights_norm,bias_norm=fc.parameters() #每次结果生成器
(5)迭代
for step in range(20001):
if step:
optimizer.zero_grad()
loss_norm.backward()
optimizer.step()
output_norm=fc(x_norm)
pred_norm=output_norm.squeeze()
loss_norm=criterion(pred_norm,y_norm) # 统一size才能计算loss
weights=y_std/x_std *weights_norm
bias=(weights_norm*(0-x_mean)/x_std+bias_norm)*y_std+y_mean
if step % 5000==0:
print('step={}, weights={}, loss={}'.format(step,weights.item(),loss_norm))
(6)绘图
plt.scatter(years, populations, s=0.1, label='actual', color='k')
plt.plot(years.tolist(), (years*weights + bias).squeeze(dim=0).tolist(), label='result', color='k')
#注意:这里years.size=[67],result.size=[1,67],所以要对result进行squeeze
plt.xlabel('Year')
plt.ylabel('Population')
plt.legend()
plt.show()
Demo 完整代码:
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn
import torch.optim
from debug import ptf_tensor
url='C:/Users/HUAWEI/Desktop/深度学习/Blog附带代码/population.csv'
df = pd.read_csv(url, index_col=0)
#print(df) #数据预览
years=torch.tensor(df.iloc[:,0],dtype=torch.float32) # 取出第0列数据作为Years
populations=torch.tensor(df.iloc[:,1],dtype=torch.float32) #取出第1列数据作为人口数据
ptf_tensor(years,'years',data_only=True) #预览数据
ptf_tensor(populations,'populations',data_only=True) #预览数据
x=years.reshape(-1,1) # reshpae(-1,1)等价于reshape(n,1),因为大多时候我们都不知道所有元素的总数
ptf_tensor(x,'years reshape',)
y=populations
'''
这是我们需要的数据形式:
X=[ [,,] , [,,] , [,,] ] # X=m*n m是样本数量,n是特征数
y=[,,,] # Y=m*1
W=[,,] # W=n*1
Y=XW
'''
def get_mean_std_norm(x):
x_mean,x_std=torch.mean(x),torch.std(x)
x_norm=(x-x_mean)/x_std
return x_mean,x_std,x_norm
x_mean,x_std,x_norm=get_mean_std_norm(x)
y_mean,y_std,y_norm=get_mean_std_norm(y)
fc=torch.nn.Linear(1,1) #Y=XW式子中y的数量和x的数量
criterion=torch.nn.MSELoss()
optimizer=torch.optim.Adam(fc.parameters())
weights_norm,bias_norm=fc.parameters() #每次结果生成器
for step in range(20001):
if step:
optimizer.zero_grad()
loss_norm.backward()
optimizer.step()
output_norm=fc(x_norm)
pred_norm=output_norm.squeeze()
loss_norm=criterion(pred_norm,y_norm) # 统一size才能计算loss
weights=y_std/x_std *weights_norm
bias=(weights_norm*(0-x_mean)/x_std+bias_norm)*y_std+y_mean
if step % 5000==0:
print('step={}, weights={}, loss={}'.format(step,weights.item(),loss_norm))
plt.scatter(years, populations, s=0.1, label='actual', color='k')
plt.plot(years.tolist(), (years*weights + bias).squeeze(dim=0).tolist(), label='result', color='k')
#注意:这里years.size=[67],result.size=[1,67],所以要对result进行squeeze
plt.xlabel('Year')
plt.ylabel('Population')
plt.legend()
plt.show()
输出:
step=0, weights=-3741121.0, loss=1.0858874320983887
step=5000, weights=75291096.0, loss=0.0031296168453991413
step=10000, weights=75291232.0, loss=0.0031296154484152794
step=15000, weights=75291248.0, loss=0.0031296159140765667
step=20000, weights=75291248.0, loss=0.0031296173110604286