根据提供负荷数据,进行数据处理和负荷预测
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import datetime
# 导入数据
df = pd.read_csv('load_data.csv')
# 特征工程
df['date'] = pd.to_datetime(df['date'])
df['hour'] = df['date'].dt.hour
df['dayofweek'] = df['date'].dt.dayofweek
df['quarter'] = df['date'].dt.quarter
# 标记春夏秋冬
df['season'] = 0
df.loc[df['quarter'] == 1, 'season'] = 1 # 春
df.loc[df['quarter'] == 2, 'season'] = 2 # 夏
df.loc[df['quarter'] == 3, 'season'] = 3 # 秋
df.loc[df['quarter'] == 4, 'season'] = 4 # 冬
# 标记早中晚
df['timeofday'] = 0
df.loc[df['hour'].between(7, 11), 'timeofday'] = 1 # 早
df.loc[df['hour'].between(11, 17), 'timeofday'] = 2 # 中
df.loc[df['hour'].between(17, 23), 'timeofday'] = 3 # 晚
# 标记工作日和节假日
cal = pd.DataFrame({'ds':[pd.to_datetime(x, format='%Y-%m-%d') for x in df['date'].unique()]})
cal['year'] = cal.ds.dt.year
cal['month'] = cal.ds.dt.month
cal['day'] = cal.ds.dt.day
cal['weekday'] = cal.ds.dt.weekday
holidays = ['2020-01-01', '2020-01-24', '2020-01-25', '2020-01-26',
'2020-04-05', '2020-05-01', '2020-06-25', '2020-10-01']
cal.loc[cal['ds'].isin(holidays), 'holiday'] = True
cal['holiday'] = cal['holiday'].fillna(False)
df = pd.merge(df, cal[['ds','weekday','holiday']], on='ds', how='left')
df['weekday'] = df['weekday'].map({0: 'Mon', 1: 'Tue', 2:'Wed', 3:'Thu',
4:'Fri', 5:'Sat', 6:'Sun'})
df['isholiday'] = df['holiday'].map({True: 1, False: 0})
# 划分训练集和测试集
X = df.drop(['load', 'date'], axis=1)
y = df['load']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 训练预测模型
model = LinearRegression()
model.fit(X_train, y_train)
# 进行预测
preds = model.predict(X_test)
# 计算RMSE
rmse = np.sqrt(mean_squared_error(y_test, preds))
print('RMSE:', rmse)
# 未来一周的预测
future_dates =[datetime.datetime.now() + datetime.timedelta(days=x) for x in range(0,7)]
future_df = pd.DataFrame({'ds': future_dates})
future_df['date'] = future_dates
future_df['hour'] = future_df['ds'].dt.hour
future_df['dayofweek'] = future_df['ds'].dt.dayofweek
future_df['season'] = 3 # 使用秋天的特征
future_df['timeofday'] = 2 # 使用中午的特征
future_df['weekday'] = future_df['dayofweek'].map({0: 'Mon', 1: 'Tue', 2:'Wed', 3:'Thu',
4:'Fri', 5:'Sat', 6:'Sun'})
future_df['isholiday'] = 0
future_preds = model.predict(future_df.drop('ds', axis=1))
print('未来一周预测的负荷:\n', future_preds)
主要步骤包括:
1、生成时间特征如小时、日期、季节等
2、标记工作日、节假日、春夏秋冬、早中晚等类别
3、划分训练和测试集,训练LinearRegression模型
4、计算RMSE评估模型效果
5、使用模型预测未来一周的负荷
6、可以根据需求调整特征和模型,进一步提高预测准确度。