🍨 本文为[🔗365天深度学习训练营]中的学习记录博客
🍖 原作者:[K同学啊]
运行代码为:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, r2_score, mean_absolute_error, \
mean_absolute_percentage_error, mean_squared_error
warnings.filterwarnings('ignore')
# 导入数据
data = pd.read_csv("D:/研究生课题/深度学习-代码/weatherAUS.csv")
df = data.copy()
print(data.head())
print(data.describe())
print(data.dtypes)
data['Date'] = pd.to_datetime(data['Date'])
print(data['Date'])
# 增加年、月、日三个时间特征
data['year'] = data['Date'].dt.year
data['Month'] = data['Date'].dt.month
data['day'] = data['Date'].dt.day
print(data.head())
data.drop('Date', axis=1, inplace=True)
print(data.columns)
# 可视化相关性矩阵
plt.figure(figsize=(15, 13))
ax = sns.heatmap(data.corr(), square=True, annot=True, fmt='.2f')
ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
plt.show()
plt.savefig('相关性分析.png')
# 是否会下雨的统计图
sns.set(style="darkgrid")
plt.figure(figsize=(4, 3))
sns.countplot(x='RainTomorrow', data=data)
plt.savefig('明天下雨与否数量统计.png')
plt.figure(figsize=(4, 3))
sns.countplot(x='RainToday', data=data)
plt.savefig('今天下雨与否数量统计.png', bbox_inches='tight')
# 地理位置与下雨的关系
x = pd.crosstab(data['RainTomorrow'], data['RainToday'])
print(x)
y = x / x.transpose().sum().values.reshape(2, 1) * 100
print(y)
y.plot(kind="bar", figsize=(4, 3), color=['#006666', '#d279a6'])
plt.savefig('是否会下雨条形图.png', bbox_inches='tight')
x = pd.crosstab(data['Location'], data['RainToday'])
y = x / x.transpose().sum().values.reshape(-1, 1) * 100
y = y.sort_values(by='Yes', ascending=True)
print(y)
fig, ax = plt.subplots(figsize=(15, 20))
color = ['#cc6699', '#006699', '#006666', '#862d86', '#ff9966']
y['Yes'].plot(kind='barh', ax=ax, color=color)
plt.savefig('地理位置与下雨的关系.png')
# 湿度和压力对下雨的影响
plt.figure(figsize=(8, 6))
sns.scatterplot(data=data, x='Pressure9am', y='Pressure3pm', hue='RainTomorrow')
plt.savefig('压力对下雨的影响.png')
plt.figure(figsize=(8, 6))
sns.scatterplot(data=data, x='Humidity9am', y='Humidity3pm', hue='RainTomorrow')
plt.savefig('湿度对下雨的影响.png')
# 气温对下雨的影响
plt.figure(figsize=(8, 6))
sns.scatterplot(x='MaxTemp', y='MinTemp', data=data, hue='RainTomorrow')
plt.savefig('气温对下雨的影响.png')
# 处理缺失值
data.isnull().sum() / data.shape[0] * 100
lst = ['Evaporation', 'Sunshine', 'Cloud9am', 'Cloud3pm']
for col in lst:
fill_list = data[col].dropna()
data[col] = data[col].fillna(pd.Series(np.random.choice(fill_list, size=len(data.index))))
s = (data.dtypes == "object")
object_cols = list(s[s].index)
print(object_cols)
for i in object_cols:
data[i].fillna(data[i].mode()[0], inplace=True)
t = (data.dtypes == "float64")
num_cols = list(t[t].index)
print(num_cols)
for i in num_cols:
data[i].fillna(data[i].median(), inplace=True)
print(data.isnull().sum())
# 构建数据集
label_encoder = LabelEncoder()
for i in object_cols:
data[i] = label_encoder.fit_transform(data[i])
X = data.drop(['RainTomorrow', 'day'], axis=1).values
y = data['RainTomorrow'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=101)
scaler = MinMaxScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)
# 定义神经网络模型
class NeuralNet(nn.Module):
def __init__(self):
super(NeuralNet, self).__init__()
self.fc1 = nn.Linear(X_train.shape[1], 24)
self.fc2 = nn.Linear(24, 18)
self.fc3 = nn.Linear(18, 23)
self.dropout1 = nn.Dropout(0.5)
self.fc4 = nn.Linear(23, 12)
self.dropout2 = nn.Dropout(0.2)
self.fc5 = nn.Linear(12, 1)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
x = torch.tanh(self.fc1(x))
x = torch.tanh(self.fc2(x))
x = torch.tanh(self.fc3(x))
x = self.dropout1(x)
x = torch.tanh(self.fc4(x))
x = self.dropout2(x)
x = self.sigmoid(self.fc5(x))
return x
model = NeuralNet()
# 定义损失函数和优化器
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)
# 模型训练
num_epochs = 10
for epoch in range(num_epochs):
model.train()
optimizer.zero_grad()
outputs = model(X_train)
loss = criterion(outputs, y_train)
loss.backward()
optimizer.step()
if epoch % 2 == 0:
print(f'Epoch {epoch}/{num_epochs}, Loss: {loss.item()}')
# 模型评估
model.eval()
with torch.no_grad():
train_outputs = model(X_train)
test_outputs = model(X_test)
train_loss = criterion(train_outputs, y_train).item()
test_loss = criterion(test_outputs, y_test).item()
print(f'Training Loss: {train_loss}')
print(f'Test Loss: {test_loss}')
# 可视化结果
train_acc = ((train_outputs > 0.5).float() == y_train).float().mean().item()
test_acc = ((test_outputs > 0.5).float() == y_test).float().mean().item()
print(f'Training Accuracy: {train_acc}')
print(f'Test Accuracy: {test_acc}')
运行结果
运行结果代码
C:\Users\dell\anaconda3\envs\pytorch-gpu\python.exe C:\Users\dell\PycharmProjects\pythonProject5\rnn.py
Date Location MinTemp ... Temp3pm RainToday RainTomorrow
0 2008-12-01 Albury 13.4 ... 21.8 No No
1 2008-12-02 Albury 7.4 ... 24.3 No No
2 2008-12-03 Albury 12.9 ... 23.2 No No
3 2008-12-04 Albury 9.2 ... 26.5 No No
4 2008-12-05 Albury 17.5 ... 29.7 No No
[5 rows x 23 columns]
MinTemp MaxTemp ... Temp9am Temp3pm
count 143975.000000 144199.000000 ... 143693.000000 141851.00000
mean 12.194034 23.221348 ... 16.990631 21.68339
std 6.398495 7.119049 ... 6.488753 6.93665
min -8.500000 -4.800000 ... -7.200000 -5.40000
25% 7.600000 17.900000 ... 12.300000 16.60000
50% 12.000000 22.600000 ... 16.700000 21.10000
75% 16.900000 28.200000 ... 21.600000 26.40000
max 33.900000 48.100000 ... 40.200000 46.70000
[8 rows x 16 columns]
Date object
Location object
MinTemp float64
MaxTemp float64
Rainfall float64
Evaporation float64
Sunshine float64
WindGustDir object
WindGustSpeed float64
WindDir9am object
WindDir3pm object
WindSpeed9am float64
WindSpeed3pm float64
Humidity9am float64
Humidity3pm float64
Pressure9am float64
Pressure3pm float64
Cloud9am float64
Cloud3pm float64
Temp9am float64
Temp3pm float64
RainToday object
RainTomorrow object
dtype: object
0 2008-12-01
1 2008-12-02
2 2008-12-03
3 2008-12-04
4 2008-12-05
...
145455 2017-06-21
145456 2017-06-22
145457 2017-06-23
145458 2017-06-24
145459 2017-06-25
Name: Date, Length: 145460, dtype: datetime64[ns]
Date Location MinTemp MaxTemp ... RainTomorrow year Month day
0 2008-12-01 Albury 13.4 22.9 ... No 2008 12 1
1 2008-12-02 Albury 7.4 25.1 ... No 2008 12 2
2 2008-12-03 Albury 12.9 25.7 ... No 2008 12 3
3 2008-12-04 Albury 9.2 28.0 ... No 2008 12 4
4 2008-12-05 Albury 17.5 32.3 ... No 2008 12 5
[5 rows x 26 columns]
Index(['Location', 'MinTemp', 'MaxTemp', 'Rainfall', 'Evaporation', 'Sunshine',
'WindGustDir', 'WindGustSpeed', 'WindDir9am', 'WindDir3pm',
'WindSpeed9am', 'WindSpeed3pm', 'Humidity9am', 'Humidity3pm',
'Pressure9am', 'Pressure3pm', 'Cloud9am', 'Cloud3pm', 'Temp9am',
'Temp3pm', 'RainToday', 'RainTomorrow', 'year', 'Month', 'day'],
dtype='object')
RainToday No Yes
RainTomorrow
No 92728 16858
Yes 16604 14597
RainToday No Yes
RainTomorrow
No 84.616648 15.383352
Yes 53.216243 46.783757
RainToday No Yes
Location
Woomera 93.246406 6.753594
Uluru 92.378449 7.621551
AliceSprings 91.952507 8.047493
Mildura 89.125374 10.874626
Cobar 87.081660 12.918340
Moree 86.194814 13.805186
Nhil 84.576163 15.423837
SalmonGums 84.027073 15.972927
Katherine 83.012821 16.987179
Townsville 82.855259 17.144741
WaggaWagga 81.989247 18.010753
PearceRAAF 81.716148 18.283852
Canberra 81.597425 18.402575
Bendigo 81.476599 18.523401
PerthAirport 81.156530 18.843470
Tuggeranong 81.054036 18.945964
Richmond 81.023382 18.976618
Nuriootpa 80.286380 19.713620
BadgerysCreek 80.088798 19.911202
Penrith 79.925776 20.074224
Perth 79.799562 20.200438
Albury 79.508469 20.491531
Sale 78.566667 21.433333
MelbourneAirport 78.298438 21.701562
Adelaide 77.709479 22.290521
Brisbane 77.570389 22.429611
Launceston 76.882431 23.117569
Hobart 76.097867 23.902133
Wollongong 76.089873 23.910127
Watsonia 75.391797 24.608203
Newcastle 75.262267 24.737733
SydneyAirport 74.242928 25.757072
Ballarat 74.207398 25.792602
Sydney 74.048547 25.951453
GoldCoast 73.993289 26.006711
Melbourne 73.880903 26.119097
Darwin 73.316630 26.683370
Williamtown 72.581277 27.418723
NorahHead 72.413793 27.586207
MountGinini 71.826625 28.173375
CoffsHarbour 70.572299 29.427701
Witchcliffe 70.223577 29.776423
Albany 70.092838 29.907162
MountGambier 69.613989 30.386011
NorfolkIsland 68.994602 31.005398
Dartmoor 68.694765 31.305235
Cairns 68.206158 31.793842
Walpole 66.335580 33.664420
Portland 63.484646 36.515354
['Location', 'WindGustDir', 'WindDir9am', 'WindDir3pm', 'RainToday', 'RainTomorrow']
['MinTemp', 'MaxTemp', 'Rainfall', 'Evaporation', 'Sunshine', 'WindGustSpeed', 'WindSpeed9am', 'WindSpeed3pm', 'Humidity9am', 'Humidity3pm', 'Pressure9am', 'Pressure3pm', 'Cloud9am', 'Cloud3pm', 'Temp9am', 'Temp3pm']
Location 0
MinTemp 0
MaxTemp 0
Rainfall 0
Evaporation 0
Sunshine 0
WindGustDir 0
WindGustSpeed 0
WindDir9am 0
WindDir3pm 0
WindSpeed9am 0
WindSpeed3pm 0
Humidity9am 0
Humidity3pm 0
Pressure9am 0
Pressure3pm 0
Cloud9am 0
Cloud3pm 0
Temp9am 0
Temp3pm 0
RainToday 0
RainTomorrow 0
year 0
Month 0
day 0
dtype: int64
Epoch 0/10, Loss: 0.7397468686103821
Epoch 2/10, Loss: 0.7384240031242371
Epoch 4/10, Loss: 0.7370564341545105
Epoch 6/10, Loss: 0.7355726361274719
Epoch 8/10, Loss: 0.7342917919158936
Training Loss: 0.7325757741928101
Test Loss: 0.7322903275489807
Training Accuracy: 0.21866263449192047
Test Accuracy: 0.22059673070907593
进程已结束,退出代码为 0
心得总结
本实验使用PyTorch构建了一个多层感知器模型,预测天气数据中“明天是否会下雨”。通过数据预处理、特征工程和模型训练,模型在二分类任务中表现良好。