# 导入必要的库
import logging

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_percentage_error

import xgboost as lgb

import copy

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# 解决中文显示问题
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
import warnings

warnings.filterwarnings('ignore')
from sklearn.preprocessing import StandardScaler
# import lightgbm as lgb
from sklearn.metrics import mean_squared_error


def get_MAPE(real_data, predict_data):
    real_data = real_data.flatten()
    predict_data = predict_data.flatten()
    new_real = []
    new_pred = []
    for i in range(len(real_data)):
        if real_data[i] != 0:
            new_real.append(real_data[i])
            new_pred.append(predict_data[i])
    new_real, new_pred = np.array(new_real), np.array(new_pred)
    # MAPE1 = np.mean(np.abs(real_data - predict_data) / np.mean(real_data))
    # MAPE = np.mean(np.abs(new_real - new_pred) / new_real)
    MAPE = mean_absolute_percentage_error(real_data,predict_data)
    return round((1 - abs(MAPE)), 2)


def get_MAE(real_data, predict_data):
    real_data = real_data.flatten()
    predict_data = predict_data.flatten()
    new_real = []
    new_pred = []
    for i in range(len(real_data)):
        if real_data[i] != 0:
            new_real.append(real_data[i])
            new_pred.append(predict_data[i])
    new_real, new_pred = np.array(new_real), np.array(new_pred)
    # MAPE1 = np.mean(np.abs(real_data - predict_data) / np.mean(real_data))
    MAE = np.sum(np.abs(new_real - new_pred)) / len(new_real)
    return round(MAE, 2)


def get_RMSE(real_data, predict_data):
    real_data = real_data.flatten()
    predict_data = predict_data.flatten()
    max_real = np.max(real_data)
    RMSE = np.sqrt((np.mean(real_data - predict_data)/max_real) ** 2)
    return 1-round(RMSE, 2)


url = f'./.csv'


def get_train_test_data(url, feature):
    data = pd.read_csv(url)  # 假设数据集保存在名为data.csv的CSV文件中
    data.set_index('time', inplace=True)
    data.index = pd.to_datetime(data.index)
    while data['value'].isnull().any():
        # todo:处理确失值
        df_15_min_shift_1 = data['value'].shift(1, freq='D')
        data['value'].fillna(df_15_min_shift_1, inplace=True)
    df = data.copy()
    df['time'] = df.index

    df['hour'] = df['time'].dt.hour
    df['period'] = df['time'].dt.hour * 4 + df['time'].dt.minute // 15
    df['day_of_week'] = df['time'].dt.weekday
    df['shift_value'] = df['value'].shift(96)

    df['rolling_H_mean'] = df['shift_value'].rolling(window='60T').mean()
    df['value_max'] = df['shift_value'].resample('D').max()
    df['value_min'] = df['shift_value'].resample('D').min()

    df['value_min'].fillna(method='ffill', inplace=True)
    df['value_max'].fillna(method='ffill', inplace=True)

    df['diff_4'] = df['shift_value'].diff(4)
    df['diff_4'].fillna(method='bfill', inplace=True)

    df_trian_test = df[96:]
    df_trian_test.to_csv('./re.csv')


    df_trian = df_trian_test[:'2024-03-31']
    df_test = df_trian_test['2024-04-01':]
    X_train = df_trian[feature]
    Y_train = df_trian['value']

    X_test = df_test[feature]
    Y_test = df_test['value']

    return X_train, Y_train, X_test, Y_test


# df_trian_test[colums_to_normalize] = scaler.fit_transform(df_trian_test[colums_to_normalize])
from sklearn.model_selection import GridSearchCV
def get_model():
    best_n_es = 450
    best_max_depth = 4
    best_sample_split = 5
    best_min_samples_leaf = 4
    # 创建 XGBRegressor 实例
    rf = RandomForestRegressor(n_estimators=best_n_es,
                               max_depth=best_max_depth,
                               min_samples_split=best_sample_split,
                               min_samples_leaf=best_min_samples_leaf,
                               random_state=42)
    return rf


# 创建LightGBM模型
# lgbm_model = lgb.LGBMRegressor(objective='regression', metric='l2', learning_rate=0.05, n_estimators=100)

def test(lgbm_model, X_test, Y_test):
    mape_list = []
    mae_list = []
    rmse_list = []
    time_list = []


    # 定义子图的行数和列数
    rows, cols = 5, 7

    # 创建一个图形和一组子图
    # fig, axs = plt.subplots(rows, cols, figsize=(20, 15))

    for i in range(len(Y_test) // 96):
        time_list.append(str(X_test.index[i * 96])[:10])
        label = X_test[i * 96:(i + 1) * 96]
        y = Y_test[i * 96:(i + 1) * 96]
        # print(label)
        # print(y)
        pred = lgbm_model.predict(label)
        df_pred = pd.DataFrame(pred, index=y.index)

        df_pred = df_pred.resample('60T').mean()
        y = y.resample('60T').mean()

        mape = get_MAPE(real_data=np.array(y), predict_data=np.array(df_pred))
        # print(mape)
        if mape == 'Nan':
            print("空值")
            # print(str(X_test.index[i * 96])[:10])
        mape_list.append(mape)
        mae = get_MAE(real_data=np.array(y), predict_data=np.array(df_pred))
        mae_list.append(mae)
        rmse = get_RMSE(real_data=np.array(y), predict_data=np.array(df_pred))
        rmse_list.append(rmse)


        # 绘图
        # row = i // cols
        # col = i % cols
        # axs[row,col].plot(df_pred, label='预测数据')
        # axs[row,col].plot(y, label='真实数据')
        # axs[row,col].set_title(str(X_test.index[i * 96])[:10] + ' MAPE:' + str(mape) + '  MAE:' + str(mae) + '  1-rmse' + str(rmse))
        # plt.grid(True)
        # plt.legend(loc='right')
        # plt.gcf().autofmt_xdate()



    # plt.tight_layout()
    # plt.title('4月份结果')
    # plt.show()
        # print(df_pred)
        # print(i)
        # plt.plot(df_pred, label='预测数据' + '  MAPE:' + str(mape) + '  1-rmse:' + str(1 - rmse))
        # plt.plot(y, label='真实数据')
        # plt.legend(loc='right')
        # plt.title(str(X_test.index[i * 96])[:10] + ' MAPE:' + str(mape) + '  MAE:' + str(mae) + '  rmse' + str(rmse))
        # plt.grid(True)
        # plt.gcf().autofmt_xdate()
        # plt.show()
        # plt.close()

    mape_df = pd.DataFrame(mape_list, columns=['mape'], index=time_list)
    mae_df = pd.DataFrame(mae_list, columns=['mae'], index=time_list)
    rmse_df = pd.DataFrame(rmse_list, columns=['rmse'], index=time_list)

    mape_df = mape_df.set_index(pd.to_datetime(time_list))
    mae_df = mae_df.set_index(pd.to_datetime(time_list))
    rmse_df = rmse_df.set_index(pd.to_datetime(time_list))

    rest = pd.concat([mape_df, mae_df, rmse_df], axis=1)
    # rest.to_csv('./结果mape.csv')
    return mape_df, mae_df, rmse_df


def trian_test(lgbm_model, X_train, Y_train, ):
    # 训练模型
    lgbm_model.fit(X_train, Y_train)
    return lgbm_model


def show_importance(model, feat_list=None):
    """
    打印特征重要性
    :return:
    """
    if feat_list is None:
        feat_list = model.feature_name_
    impo_list = model.feature_importances_
    pair_list = [(fe, round(im, 2)) for fe, im in zip(feat_list, impo_list)]
    pair_list = sorted(pair_list, key=lambda x: x[1], reverse=True)
    logger.info(f'feature importance: {pair_list}')

    return pair_list


def valadation(url,feature_list):
    X_train, Y_train, X_test, Y_test = get_train_test_data(url, feature_list)

    lgbm_model = get_model()

    lgbm_model = trian_test(lgbm_model, X_train, Y_train)

    print(feature_list)
    mape_df, mae_df, rmse_df = test(lgbm_model, X_test, Y_test)

    mape_df = mape_df[mape_df>=0.01]
    # mae_df = mae_df[mape_df[mape_df>=0.01].index]
    # rmse_df = rmse_df[mape_df[mape_df>=0.01].index]

    print('-----------2024-04-----------')
    print("1-MAPE:\t", np.mean(mape_df['2024-04']))
    print("MAE:\t", np.mean(mae_df['2024-04']))
    print("RMSE:\t", np.mean(rmse_df['2024-04']))
    print('-----------2024-05-----------')
    print("1-MAPE:\t", np.mean(mape_df['2024-05']))
    print("MAE:\t", np.mean(mae_df['2024-05']))
    print("RMSE:\t", np.mean(rmse_df['2024-05']))
    print('-----------2024-06-----------')
    print("1-MAPE:\t", np.mean(mape_df['2024-06']))
    print("MAE:\t", np.mean(mae_df['2024-06']))
    print("RMSE:\t", np.mean(rmse_df['2024-06']))
    importance_info = show_importance(lgbm_model)
    print(importance_info)
    return np.mean(mape_df)


if __name__ == '__main__':
    url = f'./周娟_AllFeature.csv'
    Necessary_Feature = [
        'period',
        'shift_value',
        # 'temperature_2m',
    ]

    Time_Feature = [
        'h_d',
        'day_of_week',
        'shortwave_radiation',
        'surface_pressure',
        'wind_speed_10m',
        'relative_humidity_2m'
    ]

    feature = [
        'period',
        'shift_value',
        'diff_4',
        'rolling_H_mean',
        'value_min',
        # 'value_max',
        'relative_humidity_2m',  # 湿度
        'wind_speed_10m',  # 风速
        'temperature_2m',  # 温度
        'shortwave_radiation'  # 辐射
    ]



    best_features = copy.deepcopy(Necessary_Feature)
    best_mape = valadation(url,feature)

    # for f1i in Time_Feature:
    #     temp_fetures = copy.deepcopy(best_features)
    #     temp_fetures.append(f1i)
    #     temp_mape = valadation(url,temp_fetures)
    #     if float(temp_mape)>float(best_mape):
    #         print('最佳1-MAPE:\t',best_mape)
    #         print('目前1-MAPE:\t',temp_mape)
    #         best_features = copy.deepcopy(temp_fetures)
    #         best_mape = temp_mape
    #         print('找到更好的特征{}'.format(f1i))
    #     else:
    #         temp_fetures.pop()

    # for f2i in Load_Feature:
    #     temp_fetures = copy.deepcopy(best_features)
    #     temp_fetures.append(f2i)
    #     temp_mape = valadation(url,temp_fetures)
    #     if float(temp_mape)>float(best_mape):
    #         print('最佳1-MAPE:\t', best_mape)
    #         print('目前1-MAPE:\t', temp_mape)
    #         best_features = copy.deepcopy(temp_fetures)
    #         print('找到更好的特征{}'.format(f2i))
    #     else:
    #         temp_fetures.pop()
    #
    # for f3i in Weather_Feature:
    #     temp_fetures = copy.deepcopy(best_features)
    #     temp_fetures.append(f3i)
    #     temp_mape = valadation(url,temp_fetures)
    #     if float(temp_mape)>float(best_mape):
    #         print('最佳1-MAPE:\t', best_mape)
    #         print('目前1-MAPE:\t', temp_mape)
    #         best_features = copy.deepcopy(temp_fetures)
    #         print('找到更好的特征{}'.format(f3i))
    #     else:
    #         temp_fetures.pop()


    # print('最好的特征',best_features)
  • 1.
  • 2.
  • 3.
  • 4.
  • 5.
  • 6.
  • 7.
  • 8.
  • 9.
  • 10.
  • 11.
  • 12.
  • 13.
  • 14.
  • 15.
  • 16.
  • 17.
  • 18.
  • 19.
  • 20.
  • 21.
  • 22.
  • 23.
  • 24.
  • 25.
  • 26.
  • 27.
  • 28.
  • 29.
  • 30.
  • 31.
  • 32.
  • 33.
  • 34.
  • 35.
  • 36.
  • 37.
  • 38.
  • 39.
  • 40.
  • 41.
  • 42.
  • 43.
  • 44.
  • 45.
  • 46.
  • 47.
  • 48.
  • 49.
  • 50.
  • 51.
  • 52.
  • 53.
  • 54.
  • 55.
  • 56.
  • 57.
  • 58.
  • 59.
  • 60.
  • 61.
  • 62.
  • 63.
  • 64.
  • 65.
  • 66.
  • 67.
  • 68.
  • 69.
  • 70.
  • 71.
  • 72.
  • 73.
  • 74.
  • 75.
  • 76.
  • 77.
  • 78.
  • 79.
  • 80.
  • 81.
  • 82.
  • 83.
  • 84.
  • 85.
  • 86.
  • 87.
  • 88.
  • 89.
  • 90.
  • 91.
  • 92.
  • 93.
  • 94.
  • 95.
  • 96.
  • 97.
  • 98.
  • 99.
  • 100.
  • 101.
  • 102.
  • 103.
  • 104.
  • 105.
  • 106.
  • 107.
  • 108.
  • 109.
  • 110.
  • 111.
  • 112.
  • 113.
  • 114.
  • 115.
  • 116.
  • 117.
  • 118.
  • 119.
  • 120.
  • 121.
  • 122.
  • 123.
  • 124.
  • 125.
  • 126.
  • 127.
  • 128.
  • 129.
  • 130.
  • 131.
  • 132.
  • 133.
  • 134.
  • 135.
  • 136.
  • 137.
  • 138.
  • 139.
  • 140.
  • 141.
  • 142.
  • 143.
  • 144.
  • 145.
  • 146.
  • 147.
  • 148.
  • 149.
  • 150.
  • 151.
  • 152.
  • 153.
  • 154.
  • 155.
  • 156.
  • 157.
  • 158.
  • 159.
  • 160.
  • 161.
  • 162.
  • 163.
  • 164.
  • 165.
  • 166.
  • 167.
  • 168.
  • 169.
  • 170.
  • 171.
  • 172.
  • 173.
  • 174.
  • 175.
  • 176.
  • 177.
  • 178.
  • 179.
  • 180.
  • 181.
  • 182.
  • 183.
  • 184.
  • 185.
  • 186.
  • 187.
  • 188.
  • 189.
  • 190.
  • 191.
  • 192.
  • 193.
  • 194.
  • 195.
  • 196.
  • 197.
  • 198.
  • 199.
  • 200.
  • 201.
  • 202.
  • 203.
  • 204.
  • 205.
  • 206.
  • 207.
  • 208.
  • 209.
  • 210.
  • 211.
  • 212.
  • 213.
  • 214.
  • 215.
  • 216.
  • 217.
  • 218.
  • 219.
  • 220.
  • 221.
  • 222.
  • 223.
  • 224.
  • 225.
  • 226.
  • 227.
  • 228.
  • 229.
  • 230.
  • 231.
  • 232.
  • 233.
  • 234.
  • 235.
  • 236.
  • 237.
  • 238.
  • 239.
  • 240.
  • 241.
  • 242.
  • 243.
  • 244.
  • 245.
  • 246.
  • 247.
  • 248.
  • 249.
  • 250.
  • 251.
  • 252.
  • 253.
  • 254.
  • 255.
  • 256.
  • 257.
  • 258.
  • 259.
  • 260.
  • 261.
  • 262.
  • 263.
  • 264.
  • 265.
  • 266.
  • 267.
  • 268.
  • 269.
  • 270.
  • 271.
  • 272.
  • 273.
  • 274.
  • 275.
  • 276.
  • 277.
  • 278.
  • 279.
  • 280.
  • 281.
  • 282.
  • 283.
  • 284.
  • 285.
  • 286.
  • 287.
  • 288.
  • 289.
  • 290.
  • 291.
  • 292.
  • 293.
  • 294.
  • 295.
  • 296.
  • 297.
  • 298.
  • 299.
  • 300.
  • 301.
  • 302.
  • 303.
  • 304.
  • 305.
  • 306.
  • 307.
  • 308.
  • 309.
  • 310.
  • 311.
  • 312.
  • 313.
  • 314.
  • 315.
  • 316.
  • 317.
  • 318.
  • 319.
  • 320.
  • 321.
  • 322.
  • 323.
  • 324.
  • 325.
  • 326.
  • 327.
  • 328.
  • 329.
  • 330.
  • 331.
  • 332.