pytorch简单的神经网络,可用于简单的预测
简言
简单的神经网络,介绍就不写了
主函数
# coding=utf-8
import torch
import math
import torch.nn as nn
import torch.optim as optim
# import matplotlib.pyplot as plt#画图
import numpy as np
import torch.utils.data as Data
from model.utils import *
from torch.optim.lr_scheduler import StepLR
import torch.optim.lr_scheduler as lr_scheduler
# use_gpu = torch.cuda.is_available()
use_gpu = False
class Activation_Net(nn.Module):
def __init__(self, in_dim, n_hidden_1, n_hidden_2, n_hidden_3, n_hidden_4, n_hidden_5, out_dim, num_layer=1):
super(Activation_Net, self).__init__()
self.num_layer = num_layer
if self.num_layer == 1:
self.layer1 = nn.Sequential(nn.Linear(in_dim, n_hidden_1),
nn.Tanh(),
nn.Linear(n_hidden_1, out_dim))
elif self.num_layer == 2:
self.layer1 = nn.Sequential(nn.Linear(in_dim, n_hidden_1),
nn.Tanh())
self.layer2 = nn.Sequential(nn.Linear(n_hidden_1, n_hidden_2),
nn.Tanh(),
nn.Linear(n_hidden_2, out_dim))
elif self.num_layer == 3:
self.layer1 = nn.Sequential(nn.Linear(in_dim, n_hidden_1),
nn.Tanh())
self.layer2 = nn.Sequential(nn.Linear(n_hidden_1, n_hidden_2),
nn.Tanh())
self.layer3 = nn.Sequential(nn.Linear(n_hidden_2, n_hidden_3),
nn.Tanh(),
nn.Linear(n_hidden_3, out_dim))
elif self.num_layer == 4:
self.layer1 = nn.Sequential(nn.Linear(in_dim, n_hidden_1),
nn.Tanh())
self.layer2 = nn.Sequential(nn.Linear(n_hidden_1, n_hidden_2),
nn.Tanh())
self.layer3 = nn.Sequential(nn.Linear(n_hidden_2, n_hidden_3),
nn.Tanh())
self.layer4 = nn.Sequential(nn.Linear(n_hidden_3, n_hidden_4),
nn.Tanh(),
nn.Linear(n_hidden_4, out_dim))
else:
self.layer1 = nn.Sequential(nn.Linear(in_dim, n_hidden_1),
nn.Tanh())
self.layer2 = nn.Sequential(nn.Linear(n_hidden_1, n_hidden_2),
nn.Tanh())
self.layer3 = nn.Sequential(nn.Linear(n_hidden_2, n_hidden_3),
nn.Tanh())
self.layer4 = nn.Sequential(nn.Linear(n_hidden_3, n_hidden_4),
nn.Tanh())
self.layer5 = nn.Sequential(nn.Linear(n_hidden_4, n_hidden_5),
nn.Tanh(),
nn.Linear(n_hidden_5, out_dim))
def forward(self, x):
if self.num_layer == 1:
x = self.layer1(x)
elif self.num_layer == 2:
x = self.layer1(x)
x = self.layer2(x)
elif self.num_layer == 3:
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
elif self.num_layer == 4:
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
else:
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.layer5(x)
return x
if __name__ == '__main__':
test_num = 3 # 测试集数量- 2016 2017 2018
train_num = 13
all_num = 16 # 所有样本种类量
normal_name = 'm' # 归一化方法
BATCH_SIZE = 2
con_net = False
labels = ['accident_wclog', 'dead_wclog', 'hurt_wclog', 'money_wclog']
choosed_labels = [1,2,3,4]
# choosed_labels = [1]
# choosed_labels = [2,4]
learning_rate = 0.001 # 学习率
set_epoch = 100000 # 可以设置这个来变化学习率,达到这个迭代次数后,学习率变小为之前的 0.1倍,
num_epoches = 1 # 自己设置的迭代次数
num_layer = 4 # 你要用几个隐藏层,最多设置3个, 默认是 1个
n_hidden_1, n_hidden_2, n_hidden_3, n_hidden_4, n_hidden_5 = 12, 11, 10, 8, 6 # 隐藏层的神经元个数,如果上一行设置的1,则n_hidden_2, n_hidden_3就不参与,可以随便设置
# 读取数据
train_features, test_features, lf, rf = loadData3('../data/x.txt', test_num, nor=normal_name, all_num=all_num)
train_label, test_label, ll, rl = loadData3('../data/y.txt', test_num, nor=normal_name, all_num=all_num)
BATCH_SIZE = int(train_features.shape[0])
# print(BATCH_SIZE)
# 标签名字
features_name = {'总人口数', '男性人口数', '女性人口数', '城镇人口', '农村人口', '人口自然增长率', '汽车驾驶人数', '人口密度',
'流动人口', 'GDP', '人均GDP', '公路里程', '人均公路里程', '民用汽车拥有量', '交通运输投资'}
label_name = {'事故死亡人数', '直接经济损失'}
# 加载数据
train_dataset = Data.TensorDataset(train_features, train_label)
train_loader = Data.DataLoader(
dataset=train_dataset,
batch_size=BATCH_SIZE,
shuffle=True,
num_workers=2
)
test_dataset = Data.TensorDataset(test_features, test_label)
test_loader = Data.DataLoader(
dataset=test_dataset, # torch TensorDataset format
batch_size=1, # mini batch size
shuffle=False, # random shuffle for training
num_workers=2 # subprocesses for loading data
)
in_dim, out_dim = train_features.shape[1], train_label.shape[1] # 输入输出维度
model = Activation_Net(in_dim, n_hidden_1, n_hidden_2, n_hidden_3, n_hidden_4, n_hidden_5, out_dim, num_layer)
criterion = nn.MSELoss()
# optimizer = optim.Adam(model.parameters(), lr=learning_rate,betas=(0.9,0.999))
# optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)
# optimizer = optim.SGD(model.parameters(), lr=learning_rate)
optimizer = optim.Rprop(model.parameters(), lr=learning_rate, etas=(0.5, 1.2), step_sizes=(1e-06, 50)) # 弹性反向传播算法 Rprop算法
# optimizer = optim.Adadelta(model.parameters(), lr=1.0, rho=0.9, eps=1e-06, weight_decay=0) # 自适应学习率调整 Adadelta算法
# optimizer = optim.RMSprop(model.parameters(), lr=learning_rate, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False) # RMSprop算法
# optimizer = optim.LBFGS(model.parameters(), lr=1, max_iter=20, max_eval=None, tolerance_grad=1e-05, tolerance_change=1e-09, history_size=100, line_search_fn=None) # L-BFGS算法
# optimizer = optim.Adamax(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0) # Adamax算法
scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=0.995)
dead_wclog = []
money_wclog = []
accident_wclog = []
hurt_wclog = []
losslog = []
wc_labels = []
wclog = []
log = [accident_wclog, dead_wclog, hurt_wclog, money_wclog]
for cc in choosed_labels:
wc_labels.append(labels[cc - 1])
wclog.append(log[cc - 1])
org_xl = []
chg_xl = []
if con_net:
print('===> 继续最好的模型 ')
try:
checkpoint = torch.load('../checkpoint/low_error_parament.t7')
model.load_state_dict(checkpoint['state']) # 从字典中依次读取
start_epoch = checkpoint['epoch']
optimizer = optim.Adam(model.parameters(), lr=learning_rate* 0.01, betas=(0.9, 0.999))
print('===> 读取完成')
except FileNotFoundError:
print('Can\'t found bp_parament.t7')
# 训练模型
for epoch in range(num_epoches):
loss_train = []
for step, (x, y) in enumerate(train_loader):
optimizer.zero_grad()
out = model(x)
loss = criterion(out, y)
loss = torch.sqrt(loss)
loss_train.append(loss.cpu().item())
optimizer.zero_grad()
loss.backward()
optimizer.step()
scheduler.step()
losslog.append(np.mean(loss_train))
print(epoch + 1)
if (epoch + 1) % 1 == 0:
print('epoch: %d, train_loss:\t%f' %(epoch + 1, np.mean(loss_train)))
loss_test = []
a = None # 预测的
b = None # 原始的
for x, y in test_loader:
out = model(x)
if a == None:
a = out.detach().numpy().tolist()
b = y.numpy().tolist()
else:
a.extend(out.detach().numpy().tolist())
b.extend(y.numpy().tolist())
loss = criterion(out, y)
loss = torch.sqrt(loss)
loss_test.append(loss.cpu().item())
a = back(np.array(a), np.array(ll), np.array(rl))
b = back(np.array(b), np.array(ll), np.array(rl))
wc = cal_wc(a, b, test_num, out_dim)
print('\t\t\ttest_loss:\t%f' % np.mean(loss_test))
print(wc)
wc = [[row[i] for row in wc] for i in range(len(wc[0]))]
for iii in range(len(wc)):
wclog[iii].append(wc[iii])
for www in range(len(wclog)):
draw(wclog[www], learning_rate, num_epoches, n_hidden_1, n_hidden_2, wc_labels[www])
'''
存储参数
'''
print('===> 存储 models ...')
state = {
'state': model.state_dict(),
'epoch': epoch # 将epoch一并保存
}
torch.save(state, '../checkpoint/bp_parament.t7')
print("train over")
# 模型评估
print('===> 读取模型 ')
try:
checkpoint = torch.load('../checkpoint/rprop/bp_parament.t7')
model.load_state_dict(checkpoint['state']) # 从字典中依次读取
start_epoch = checkpoint['epoch']
print('===> 读取完成')
except FileNotFoundError:
print('Can\'t found bp_parament.t7')
train_loader = Data.DataLoader(
dataset=train_dataset,
batch_size=BATCH_SIZE,
shuffle=False,
num_workers=2
)
chg_a, org_b = get_a_b(train_loader, model, criterion, ll, rl)
chg_a = [[row[i] for row in chg_a] for i in range(len(chg_a[0]))]
org_b = [[row[i] for row in org_b] for i in range(len(org_b[0]))]
print(len(chg_a[0]))
print('训练的数据')
np.set_printoptions(precision=2)
for cc in chg_a:
print(np.array(cc))
print('--over--')
for i in range(len(chg_a)):
draw_org_p(chg_a[i], org_b[i], train_num, wc_labels[i])
# draw_org_m(chg_a, org_b, train_num)
a, b = get_a_b(test_loader, model, criterion, ll, rl)
print('--a--')
print(a)
print('---')
# print('归一化之后的原始值')
tt = cal_wc(a, b, test_num, out_dim)
print('全国相对误差:')
print(tt)
自定义包
# coding=utf-8
import torch
import numpy as np
from correlation import noramlization as nr
from scipy.stats import linregress
import matplotlib.pyplot as plt
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
"""
加载数据
"""
def loadData(fileName, test_rate, nor=None):
data = []
with open(fileName) as txtData:
lines = txtData.readlines()
for line in lines:
lineData = line.split() # 去除空白
lineData = list(map(float,lineData))
data.append(lineData)
data = np.array(data)
# 归一化
if nor == 'm':
data = nr.max_min(data)
elif nor == 'z':
data = nr.z_score(data)
data_len = int(len(data) * test_rate)
training_data = data[: data_len]
test_data = data[data_len: ]
return torch.FloatTensor(training_data), torch.FloatTensor(test_data)
def loadDataTwo(fileName, test_rate):
data = []
with open(fileName) as txtData:
lines = txtData.readlines()
for line in lines:
lineData = line.split() # 去除空白
lineData = list(map(float,lineData))
data.append(lineData)
data = np.array(data)
# 归一化
# data = nr.noramlizete(data)
data_len = int(len(data) * test_rate)
training_data = data[: data_len]
test_data = data[data_len: ]
return torch.FloatTensor(training_data), torch.FloatTensor(test_data)
def noramlizate(data, name='m'):
'''
归一化,返回tensor
'''
if type(data) is not np.ndarray:
data = np.array(data)
if name == 'm':
data = nr.max_min(data)
else:
data = nr.z_score(data)
return torch.FloatTensor(data)
def get_r2(model, data, sign=1.):
slope, intercept, r_value, p_value, std_err = linregress(model, data)
r2 = r_value ** 2
return sign*r2
def loadData2(fileName, test_num, nor=None, all_num=20):
data = []
with open(fileName) as txtData:
lines = txtData.readlines()
for line in lines:
lineData = line.split() # 去除空白
lineData = list(map(float,lineData))
data.append(lineData)
data = np.array(data)
# 归一化
l, r = None, None
if nor == 'm':
data, l, r = nr.max_min(data)
elif nor == 'z':
data = nr.z_score(data)
train_num = all_num - test_num
training_data = []
test_data = []
i = 0
while(i < len(data)):
# print(i)
for j in range(train_num):
training_data.append(data[i])
i += 1
for j in range(test_num):
test_data.append(data[i])
i += 1
return torch.FloatTensor(training_data), torch.FloatTensor(test_data), l, r
def loadData3(fileName, test_num, nor=None, all_num=15):
# 加载全国的
data = []
with open(fileName) as txtData:
lines = txtData.readlines()
for line in lines:
lineData = line.split() # 去除空白
lineData = list(map(float,lineData))
data.append(lineData)
data = np.array(data)
# 归一化
l, r = None, None
if nor == 'm':
data, l, r = nr.max_min(data)
elif nor == 'z':
data = nr.z_score(data)
train_num = all_num - test_num
# train_num = all_num
train_data = []
test_data = []
i = 0
while(i < len(data)):
# print(i)
for j in range(train_num):
train_data.append(data[i])
i += 1
i -= test_num
for j in range(test_num):
test_data.append(data[i])
i += 1
# print(len(training_data) / train_num)
# print(len(test_data) / test_num)
return torch.FloatTensor(train_data), torch.FloatTensor(test_data), l, r
def get_wc(a, b):
res = (a - b) / b
return res
def back(data, l, r):
x = l + data * (r - l)
return x
def cal_wc(a, b, test_num, out_dim):
at = np.zeros((test_num, out_dim))
bt = np.zeros((test_num, out_dim))
i = 0
while (i < len(a)):
for j in range(test_num):
at[j] += a[i]
bt[j] += b[i]
i += 1
tt = get_wc(at, bt)
return tt
def draw_org_p(a, b, train_num, s):
# at = np.zeros((train_num, 1))
# bt = np.zeros((train_num, 1))
# i = 0
# while (i < len(a)):
# for j in range(train_num):
# at[j] += a[i]
# bt[j] += b[i]
# i += 1
tit = '../fig/' + 'person' + '.png'
plt.xlabel('year')
plt.ylabel('dead numble')
plt.title(s)
y = [i for i in range(2004, 2004 + train_num)]
# print()
pa = []
pb = []
ma = []
mb = []
# for i in range(len(at)):
# pa.append(at[i][0])
# ma.append(at[i][1])
# pb.append(bt[i][0])
# mb.append(bt[i][0])
l = len(y)
# print(l)
# print(len(b))
plt.plot(y, a[: l], label='pre', marker='d')
plt.plot(y, b[: l], label='org', marker='*')
# plt.plot(ma, y)
# plt.plot(mb, y)
plt.savefig(tit)
plt.legend()
plt.show()
def draw_org_m(a, b, train_num):
at = np.zeros((train_num, 2))
bt = np.zeros((train_num, 2))
i = 0
while (i < len(a)):
for j in range(train_num):
at[j] += a[i]
bt[j] += b[i]
i += 1
tit = '../fig/' + 'money' + '.png'
plt.xlabel('year')
plt.ylabel('dead numble')
y = [i for i in range(2004, 2004 + train_num )]
pa = []
pb = []
ma = []
mb = []
for i in range(len(at)):
pa.append(at[i][0])
ma.append(at[i][1])
pb.append(bt[i][0])
mb.append(bt[i][0])
# plt.plot(pa, y)
# plt.plot(pb, y)
plt.plot(y, ma, label='pre')
plt.plot(y, mb, label='org')
plt.savefig(tit)
plt.legend()
plt.show()
def get_a_b(test_loader, model, criterion, ll, rl):
a = None # 预测的
b = None # 原始的
for x, y in test_loader:
out = model(x)
if a == None:
a = out.detach().numpy().tolist()
b = y.numpy().tolist()
else:
a.extend(out.detach().numpy().tolist())
b.extend(y.numpy().tolist())
loss = criterion(out, y)
print('test loss : %f' % loss.item())
print('归一化之后的原始值')
print(b)
print('归一化之后的预测值')
print(a)
a = back(np.array(a), np.array(ll), np.array(rl))
b = back(np.array(b), np.array(ll), np.array(rl))
return a, b
def draw(log, lr, n_ep, n1, n2, s):
tit = '../fig/' + s + str(lr) + '_' + str(n_ep) + '_' + str(n1) + '_' + str(n2) + '.png'
plt.xlabel('epoch')
plt.ylabel('wu_cha')
plt.title(s)
y = [[abs(row[i]) for row in log] for i in range(len(log[0]))]
x = [i for i in range(len(log))]
make = ['*', 'd', 's', '1']
for i in range(len(y)):
plt.plot(x, y[i], label=2017 + i, marker=make[i])
plt.savefig(tit)
plt.legend()
plt.show()