主要目的:上分
学习内容:
通过时间序列挖掘并进行可视化分析,对有价值的信息进行分析提取,有助于对题目的理解。
代码:
import pandas as pd
from sklearn.linear_model import LinearRegression
import numpy as np
from pathlib import Path
# 设置基础路径
base_path = Path("data")
# 读取市场数据
electricity_price = pd.read_csv(base_path / "electricity price.csv")
# 读取单位数据
unit = pd.read_csv(base_path / "unit.csv")
# 检查并处理缺失值
# 如果存在缺失值,先用均值填充
if electricity_price["clearing price (CNY/MWh)"].isna().any():
mean_value = electricity_price["clearing price (CNY/MWh)"].mean()
electricity_price["clearing price (CNY/MWh)"].fillna(mean_value, inplace=True)
# 如果需要,可以选择删除含有缺失值的行
# electricity_price.dropna(subset=["clearing price (CNY/MWh)"], inplace=True)
# 准备示例提交数据
sample_submit = electricity_price[electricity_price["clearing price (CNY/MWh)"].isna()].drop(columns="demand")
sample_submit.to_csv(base_path / "sample_submit.csv", index=False)
# 预处理时间戳
electricity_price["timestamp"] = pd.to_datetime(
electricity_price["day"] + " " + electricity_price["time"].str.replace("24:00:00", "00:00"))
mask = electricity_price['timestamp'].dt.time == pd.Timestamp('00:00:00').time()
# 对于这些行,增加一天
electricity_price.loc[mask, 'timestamp'] += pd.Timedelta(days=1)
# 排序单位数据
sorted_unit = unit.sort_values(by='Capacity(MW)')
# 预计算累积容量
sorted_unit['cumulative_capacity'] = sorted_unit['Capacity(MW)'].cumsum()
prices = []
# 找到满足总需求的最后报价单位
for demand in electricity_price["demand"]:
price = sorted_unit[sorted_unit['cumulative_capacity'] >= demand]["coal consumption (g coal/KWh)"].iloc[0]
prices.append(price)
# 创建线性回归模型实例
model = LinearRegression()
# 训练模型
train_length = 55392
# 将价格列表转换为 NumPy 数组并重塑
prices = np.array(prices).reshape(-1, 1)
# 训练特征矩阵 X,由价格数据组成
X = prices[:train_length]
# 训练目标变量 y,清算价格
y = electricity_price["clearing price (CNY/MWh)"].iloc[:train_length].values.reshape(-1, 1)
# 拟合线性回归模型
model. Fit(X, y)
y_pred = model.predict(prices[train_length:])
y_pred = y_pred.flatten() # 2维矩阵转为1维
y_pred[:5]
sample_submit["clearing price (CNY/MWh)"] = y_pred
sample_submit.head()
sample_submit.to_csv("sample_submit1.csv", index=False)