没有数据不要慌,我们可以自己生成气温数据
import pandas as pd
import random
# 创建一个空的DataFrame
data = pd.DataFrame(columns=["year", "month", "day", "week", "temp_2", "temp_1", "average", "actual", "friend"])
# 生成数据
start_year = 2010
end_year = 2020
selected_rows = []
# 定义间隔
interval = 10 # 每隔10天选择一次数据
for year in range(start_year, end_year + 1):
for month in range(1, 13):
for day in range(1, 28):
week = random.randint(1, 7)
temp_2 = random.randint(20, 40)
temp_1 = random.randint(20, 40)
average = random.randint(20, 40)
actual = random.randint(20, 40)
friend = random.randint(20, 40)
row = {
"year": year,
"month": month,
"day": day,
"week": week,
"temp_2": temp_2,
"temp_1": temp_1,
"average": average,
"actual": actual,
"friend": friend
}
selected_rows.append(row)
# 选择按间隔的数据
selected_data = []
for i in range(0, len(selected_rows), interval):
selected_data.append(selected_rows[i])
# 创建DataFrame
selected_data_df = pd.DataFrame(selected_data)
# 保存DataFrame为CSV文件
selected_data_df.to_csv("selected_weather_data.csv", index=False)
然后就可以用jupyter notebook导入数据做进一步分析
#导包
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.python.keras import layers
import tensorflow.python.keras
import warnings
warnings.filterwarnings("ignore")
#读取数据
features = pd.read_csv('C:\\Users\\21483\\PycharmProjects\\pythonProject9\\selected_weather_data.csv')
features.head()#查看数据的头部
print('数据维度:', features.shape)#查看数据维度
我们查看一下数据的前五行和数据维度
将列数据保存
years = features['year']
months = features['month']
days = features['day']
更改日期格式
import datetime
dates = [str(int(year)) + '-' + str(int(month)) + '-' + str(int(day)) for year, month, day in
zip(years, months, days)]
dates = [datetime.datetime.strptime(date, '%Y-%m-%d') for date in dates]
dates[:5]
features = pd.get_dummies(features)
print(features.head(5))
# 将数据画成图片
# 指定默认风格
plt.style.use('fivethirtyeight')
# 设置布局
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows=2, ncols=2, figsize=(10, 10))
fig.autofmt_xdate(rotation=45) # 表示对图中的x轴进行45度的翻转
# 标签值(实际值)
ax1.plot(dates, features['actual'])
ax1.set_xlabel('day');
ax1.set_ylabel('Temperature');
ax1.set_title('Max Temp')
# 昨天
ax2.plot(dates, features['temp_1'])
ax2.set_xlabel('day');
ax2.set_ylabel('Temperature');
ax2.set_title('Previous Max Temp')
# 前天
ax3.plot(dates, features['temp_2'])
ax3.set_xlabel('day');
ax3.set_ylabel('Temperature');
ax3.set_title('Two Days Prior Max Temp')
# 我的朋友
ax4.plot(dates, features['friend'])
ax4.set_xlabel('day');
ax4.set_ylabel('Temperature');
ax4.set_title('Friend Estimate')
plt.show()
画图(由于是自己用random生成的数据,可能有些混乱)
# 进行独热编码
features = pd.get_dummies(features)
features.head(5)
# 将actual标签值存储为NumPy数组
labels = np.array(features['actual'])
# 从特征中删除标签actual列
features = features.drop('actual', axis=1)
# 获取特征列的名称列表
feature_list = list(features.columns)
# 将特征数据存储为NumPy数组
features = np.array(features)
features.shape
导入机器学习SK-learn库,进行数据的预处理,再构建神经网络,并训练模型
# 数据标准化
from sklearn import preprocessing
input_features = preprocessing.StandardScaler().fit_transform(features)
# 创建一个Sequential模型
model = tf.keras.Sequential()
# 添加神经网络层,包括输入层、中间层和输出层
model.add(layers.Dense(128, kernel_initializer='random_normal', kernel_regularizer=tf.keras.regularizers.l2(0.03)))
model.add(layers.Dense(256, kernel_initializer='random_normal', kernel_regularizer=tf.keras.regularizers.l2(0.03)))
model.add(layers.Dense(1, kernel_initializer='random_normal', kernel_regularizer=tf.keras.regularizers.l2(0.03)))
# 编译模型,设置优化器和损失函数
model.compile(optimizer=tf.keras.optimizers.SGD(0.001), loss='mean_squared_error')
# 训练模型
model.fit(input_features, labels, validation_split=0.25, epochs=100, batch_size=64)
# 进行模型预测
predict = model.predict(input_features)
# 转换日期
dates = [str(int(year)) + '-' + str(int(month)) + '-' + str(int(day)) for year, month, day in zip(years, months, days)]
dates = [datetime.datetime.strptime(date, '%Y-%m-%d') for date in dates]
# 创建一个表格来存储日期和对应的实际标签数值
true_data = pd.DataFrame(data={'日期': dates, '实际值': labels})
# 创建另一个表格来存储日期和对应的模型预测值
months = features[:, feature_list.index('month')]
days = features[:, feature_list.index('day')]
year = features[:, feature_list.index('year')]
test_dates = [str(int(year)) + '-' + str(int(month)) + '-' + str(int(day)) for year, month, day in zip(years, months, days)]
test_dates = [datetime.datetime.strptime(date, '%Y-%m-%d') for date in test_dates]
predictions_data = pd.DataFrame(data={'日期': test_dates, '预测值': predict.reshape(-1)})
绘制预测结果
# 绘制真实值
plt.ylim(0, 60)
plt.plot(true_data['日期'], true_data['实际值'], 'b-', label='实际值')
# 绘制预测值
plt.plot(predictions_data['日期'], predictions_data['预测值'], 'ro', label ='预测值')
plt.xticks(rotation='vertical')
plt.legend()
# 设置图标题和标签
plt.xlabel('日期')
plt.ylabel('最高温度(华氏度)')
plt.title('实际值和预测值')
# 显示图形
plt.show()