# 导入模块
import torch
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,classification_report
from torch import nn,optim
import torch.nn.functional as F
sns.set(style='whitegrid',palette='muted',font_scale=1.2)
HAPPY_COLORS_PALETTE = ["#01BEFE", "#FFDD00", "#FF7D00", "#FF006D", "#93D30C", "#8F00FF"]
sns.set_palette(sns.color_palette(HAPPY_COLORS_PALETTE))
rcParams['figure.figsize'] = 12,6
np.random.seed(42)
torch.manual_seed(42)
<torch._C.Generator at 0x2c012576cd0>
df = pd.read_excel(r'F:\桌面\1131论文计划\数据\气候数据\端数据.xlsx')
df
雨量(mm) | 云量 | 短波辐射(W/m²) | 地面太阳能辐射(W/m²) | |
---|---|---|---|---|
0 | 0.000000 | 0.076082 | 0.000 | 0.000 |
1 | 0.000000 | 0.166019 | 0.000 | 0.000 |
2 | 0.000000 | 0.158138 | 0.000 | 0.000 |
3 | 0.444259 | 0.775674 | 0.000 | 0.000 |
4 | 0.580704 | 0.702988 | 0.000 | 0.000 |
... | ... | ... | ... | ... |
1666 | 2.676830 | 0.087764 | 480.097 | 576.898 |
1667 | 1.879050 | 0.074139 | 639.283 | 768.179 |
1668 | 5.603720 | 0.330152 | 750.970 | 902.385 |
1669 | 9.771170 | 0.384817 | 507.725 | 610.096 |
1670 | 8.595600 | 0.972438 | 444.973 | 534.691 |
1671 rows × 4 columns
df.shape
(1671, 4)
# df.iloc[0:4].apply(lambda x: x.mean(),axis=0)[0:4]
df[df.iloc[1:] == 0] = None
df = df.fillna(0)
target = []
p =([0.2, 0.8])
for i in range(len(df)):
value= np.random.choice([0, 1], p = p)
target.append(value)
df['target'] = target
df
雨量(mm) | 云量 | 短波辐射(W/m²) | 地面太阳能辐射(W/m²) | target | |
---|---|---|---|---|---|
0 | 0.000000 | 0.076082 | 0.000 | 0.000 | 1 |
1 | 0.000000 | 0.166019 | 0.000 | 0.000 | 1 |
2 | 0.000000 | 0.158138 | 0.000 | 0.000 | 1 |
3 | 0.444259 | 0.775674 | 0.000 | 0.000 | 1 |
4 | 0.580704 | 0.702988 | 0.000 | 0.000 | 0 |
... | ... | ... | ... | ... | ... |
1666 | 2.676830 | 0.087764 | 480.097 | 576.898 | 1 |
1667 | 1.879050 | 0.074139 | 639.283 | 768.179 | 1 |
1668 | 5.603720 | 0.330152 | 750.970 | 902.385 | 0 |
1669 | 9.771170 | 0.384817 | 507.725 | 610.096 | 1 |
1670 | 8.595600 | 0.972438 | 444.973 | 534.691 | 1 |
1671 rows × 5 columns
# 样本平衡性
sns.countplot(df.target)
C:\Users\kingS\anaconda3\envs\pytorch_gpu\lib\site-packages\seaborn\_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
FutureWarning
<AxesSubplot:xlabel='target', ylabel='count'>
df.target.value_counts() / df.shape[0]
1 0.787552
0 0.212448
Name: target, dtype: float64
拆分数据集
X_train,X_test,y_train,y_test = train_test_split(df.iloc[:,0:4],df.iloc[:,4:5])
X_train
雨量(mm) | 云量 | 短波辐射(W/m²) | 地面太阳能辐射(W/m²) | |
---|---|---|---|---|
1499 | 0.510666 | 0.198878 | 289.87400 | 348.32000 |
1426 | 0.000000 | 0.570845 | 284.09500 | 341.37600 |
537 | 0.295273 | 0.700285 | 158.36400 | 190.29500 |
642 | 0.017410 | 0.974240 | 707.54500 | 850.20400 |
1551 | 0.000000 | 0.395067 | 866.13400 | 1040.77000 |
... | ... | ... | ... | ... |
1030 | 0.000000 | 0.789411 | 0.00000 | 0.00000 |
1090 | 2.516090 | 0.000000 | 495.54500 | 595.46000 |
133 | 0.000000 | 0.060874 | 662.38600 | 795.94100 |
1485 | 1.201880 | 0.576212 | 1.48415 | 1.78339 |
1239 | 0.000000 | 0.110398 | 399.67100 | 480.25500 |
1253 rows × 4 columns
数据类型转换
X_train = torch.from_numpy(X_train.to_numpy()).float()
X_test = torch.from_numpy(X_test.to_numpy()).float()
y_train = torch.squeeze(torch.from_numpy(y_train.to_numpy())).float()
y_test = torch.squeeze(torch.from_numpy(y_test.to_numpy())).float()
print(X_train.shape,y_train.shape)
print(X_test.shape,y_test.shape)
torch.Size([1253, 4]) torch.Size([1253])
torch.Size([418, 4]) torch.Size([418])
构建神经网络
class NaiveNet(nn.Module):
# 初始化
def __init__(self, n_features):
super(NaiveNet, self).__init__()
self.fc1 = nn.Linear(n_features,5) # nn.Linear(in_features = 64*64*3, out_features = 1)
self.fc2 = nn.Linear(5,3)
self.fc3 = nn.Linear(3,1)
def forward(self,x):
x = F.relu(self.fc1(x)) # Calculate the derivative
x = F.relu(self.fc2(x)) # 也可以进行张量操作和计算
return torch.sigmoid(self.fc3(x))
def ann_viz(model, view=True, filename="network.gv"):
"""Vizualizez a Sequential model.
# Arguments
model: A Keras model instance.
view: whether to display the model after generation.
filename: where to save the vizualization. (a .gv file)
title: A title for the graph
"""
from graphviz import Digraph
import torch
HAPPY_COLORS_PALETTE = ["#01BEFE", "#FFDD00", "#FF7D00", "#FF006D", "#93D30C", "#8F00FF"]
input_layer = 0
hidden_layers_nr = 0
layer_types = []
hidden_layers = []
output_layer = 0
layers = [layer for layer in model.modules() if type(layer) == torch.nn.Linear]
for layer in layers:
if layer == layers[0]:
input_layer = layer.in_features
hidden_layers_nr += 1
if type(layer) == torch.nn.Linear:
hidden_layers.append(layer.out_features)
layer_types.append("Dense")
else:
raise Exception("Input error")
else:
if layer == layers[-1]:
output_layer = layer.out_features
else:
hidden_layers_nr += 1
if type(layer) == torch.nn.Linear:
hidden_layers.append(layer.out_features)
layer_types.append("Dense")
else:
raise Exception("Hidden error")
last_layer_nodes = input_layer
nodes_up = input_layer
g = Digraph("g", filename=filename)
n = 0
g.graph_attr.update(splines="false", nodesep="0.5", ranksep="0", rankdir='LR')
# Input Layer
with g.subgraph(name="cluster_input") as c:
if type(layers[0]) == torch.nn.Linear:
the_label = "Input Layer"
if layers[0].in_features > 10:
the_label += " (+" + str(layers[0].in_features - 10) + ")"
input_layer = 10
c.attr(color="white")
for i in range(0, input_layer):
HAPPY_COLORS_PALETTE = ["#01BEFE", "#FFDD00", "#FF7D00", "#FF006D", "#93D30C", "#8F00FF"]
n += 1
c.node(str(n))
c.attr(labeljust="1")
c.attr(label=the_label, labelloc="bottom")
c.attr(rank="same")
c.node_attr.update(
width="0.65",
style="filled",
shape="circle",
color=HAPPY_COLORS_PALETTE[3],
fontcolor=HAPPY_COLORS_PALETTE[3],
)
for i in range(0, hidden_layers_nr):
with g.subgraph(name="cluster_" + str(i + 1)) as c:
HAPPY_COLORS_PALETTE = ["#01BEFE", "#FFDD00", "#FF7D00", "#FF006D", "#93D30C", "#8F00FF"]
if layer_types[i] == "Dense":
c.attr(color="white")
c.attr(rank="same")
the_label = f'Hidden Layer {i + 1}'
if layers[i].out_features > 10:
the_label += " (+" + str(layers[i].out_features - 10) + ")"
hidden_layers[i] = 10
c.attr(labeljust="right", labelloc="b", label=the_label)
for j in range(0, hidden_layers[i]):
n += 1
c.node(
str(n),
width="0.65",
shape="circle",
style="filled",
color=HAPPY_COLORS_PALETTE[0],
fontcolor=HAPPY_COLORS_PALETTE[0],
)
for h in range(nodes_up - last_layer_nodes + 1, nodes_up + 1):
g.edge(str(h), str(n))
last_layer_nodes = hidden_layers[i]
nodes_up += hidden_layers[i]
else:
raise Exception("Hidden layer type not supported")
with g.subgraph(name="cluster_output") as c:
HAPPY_COLORS_PALETTE = ["#01BEFE", "#FFDD00", "#FF7D00", "#FF006D", "#93D30C", "#8F00FF"]
if type(layers[-1]) == torch.nn.Linear:
c.attr(color="white")
c.attr(rank="same")
c.attr(labeljust="1")
for i in range(1, output_layer + 1):
n += 1
c.node(
str(n),
width="0.65",
shape="circle",
style="filled",
color=HAPPY_COLORS_PALETTE[4],
fontcolor=HAPPY_COLORS_PALETTE[4],
)
for h in range(nodes_up - last_layer_nodes + 1, nodes_up + 1):
g.edge(str(h), str(n))
c.attr(label="Output Layer", labelloc="bottom")
c.node_attr.update(
color="#2ecc71", style="filled", fontcolor="#2ecc71", shape="circle"
)
g.attr(arrowShape="none")
g.edge_attr.update(arrowhead="none", color="#707070", penwidth="2")
if view is True:
g.view()
return g
net = NaiveNet(X_train.shape[1])
ann_viz(net,view=True)
激活函数可视化
# ReLU函数
ax = plt.gca()
plt.plot(
np.linspace(-1, 1, 5),
F.relu(torch.linspace(-1, 1, steps=5)).numpy()
)
ax.set_ylim([-1.5, 1.5]);
# sigmod函数
ax = plt.gca()
plt.plot(
np.linspace(-10, 10, 100),
torch.sigmoid(torch.linspace(-10, 10, steps=100)).numpy()
)
ax.set_ylim([-0.5, 1.5]);
训练神经网络
criterion = nn.BCELoss()
优化器
optimizer = optim.Adam(net.parameters(),lr=0.001) # lr=learning rate
在GPU上计算
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
X_train = X_train.to(device)
y_train = y_train.to(device)
X_test = X_test.to(device)
y_test = y_test.to(device)
net = net.to(device)
criterion = criterion.to(device)
寻找最优参数
def calculate_accuracy(y_true,y_pred):
predicted = y_pred.ge(.5).view(-1)
return (y_true == predicted).sum().float() / len(y_true)
开始训练
def round_tensor(t, decimal_places=3):
return round(t.item(), decimal_places)
for epoch in range(1000):
y_pred = net(X_train)
y_pred = torch.squeeze(y_pred)
train_loss = criterion(y_pred, y_train)
if epoch % 100 == 0:
train_acc = calculate_accuracy(y_train, y_pred)
y_test_pred = net(X_test)
y_test_pred = torch.squeeze(y_test_pred)
test_loss = criterion(y_test_pred, y_test)
test_acc = calculate_accuracy(y_test, y_test_pred)
print(f'''epoch {epoch}
Train set - loss: {round_tensor(train_loss)}, accuracy: {round_tensor(train_acc)}
Test set - loss: {round_tensor(test_loss)}, accuracy: {round_tensor(test_acc)}''')
optimizer.zero_grad() # 清零梯度缓存
train_loss.backward() # 反向传播误差
optimizer.step() # 更新参数
epoch 0
Train set - loss: 4.709, accuracy: 0.784
Test set - loss: 5.482, accuracy: 0.799
epoch 100
Train set - loss: 0.556, accuracy: 0.784
Test set - loss: 0.548, accuracy: 0.799
epoch 200
Train set - loss: 0.537, accuracy: 0.784
Test set - loss: 0.531, accuracy: 0.799
epoch 300
Train set - loss: 0.528, accuracy: 0.784
Test set - loss: 0.524, accuracy: 0.799
epoch 400
Train set - loss: 0.526, accuracy: 0.784
Test set - loss: 0.521, accuracy: 0.799
epoch 500
Train set - loss: 0.525, accuracy: 0.784
Test set - loss: 0.519, accuracy: 0.799
epoch 600
Train set - loss: 0.524, accuracy: 0.784
Test set - loss: 0.518, accuracy: 0.799
epoch 700
Train set - loss: 0.524, accuracy: 0.784
Test set - loss: 0.517, accuracy: 0.799
epoch 800
Train set - loss: 0.523, accuracy: 0.784
Test set - loss: 0.515, accuracy: 0.799
epoch 900
Train set - loss: 0.523, accuracy: 0.784
Test set - loss: 0.514, accuracy: 0.799
保存模型
MODEL_PATH = 'model path'
torch.save(net,MODEL_PATH)
加载模型
net = torch.load(MODEL_PATH)
因为一个模型训练时间是很长的,所以有时候我们需要保存我们的模型,喜爱在继续加载进行来
评估
target1 = ['不好','好']
y_pred = net(X_test)
y_pred = y_pred.ge(.5).view(-1).cpu()
y_test = y_test.cpu()
print(classification_report(y_test,y_pred,target_names=target1))
precision recall f1-score support
不好 0.00 0.00 0.00 84
好 0.80 1.00 0.89 334
accuracy 0.80 418
macro avg 0.40 0.50 0.44 418
weighted avg 0.64 0.80 0.71 418
C:\Users\kingS\anaconda3\envs\pytorch_gpu\lib\site-packages\sklearn\metrics\_classification.py:1308: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, msg_start, len(result))
C:\Users\kingS\anaconda3\envs\pytorch_gpu\lib\site-packages\sklearn\metrics\_classification.py:1308: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, msg_start, len(result))
C:\Users\kingS\anaconda3\envs\pytorch_gpu\lib\site-packages\sklearn\metrics\_classification.py:1308: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, msg_start, len(result))
# 中文字体设置
plt.rcParams['font.sans-serif'] = ['FangSong']
plt.rcParams['axes.unicode_minus'] = False
fig,ax = plt.subplots()
cm = confusion_matrix(y_test,y_pred)
df_cm = pd.DataFrame(cm,index=target1,columns=target1)
hmap = sns.heatmap(df_cm,annot=True,fmt='d')
ax.set_ylabel('True labels')
ax.set_xlabel('Predicted labels')
Text(0.5, 28.453125, 'Predicted labels')