空气质量预测模型-CSDN博客

本文链接：https://blog.csdn.net/suqieer/article/details/128203157
用python编写的机器学习模型，此模型不包含模型训练。模型的训练会在下一篇文章发布。
# -*- codeing = utf-8 -*-
# @Time : :
# @Author:zzs
# @File:机器学习模型.py
import tensorflow as tf
import seaborn
import xgboost as xgb
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import pandas as pd
import os
import datetime
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import  pickle
from sklearn.metrics import roc_auc_score, classification_report, f1_score, recall_score, precision_score
from keras.callbacks import Callback
from keras import backend as K
import time
from sklearn import metrics
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import Dropout
import matplotlib.gridspec as gridspec

def main():
    model_path = "./pre_model515_ml.h5"
    plt.rcParams['font.sans-serif'] = ['SimHei']
    plt.rcParams['axes.unicode_minus'] = False
    filepath = './beijing.csv'  # 文件路径
    data = pd.read_csv(filepath, index_col=0)
    print(data.shape)
    print(data.head(10))  # 输出前10行数据
    # 构造数据集
    X_data = data[['AQI指数', 'PM2.5', 'PM10', 'SO2', 'NO2', 'CO', 'O3']]  # 提取特征数据
    Y_data = data[['AQI指数']]  #
    print(X_data.shape)
    print(Y_data.shape)
    X = np.zeros((X_data.shape[0]-3,3,7))
    Y = np.zeros((Y_data.shape[0]-3,1))
    print(X.shape)
    print(Y.shape)
    for i in range(0,2603):
        X[i, :, :] = X_data.iloc[i: i + 3]
        Y[i] = Y_data.iloc[i + 3]
    X=X.reshape((2603,21))
    print(X.shape)
    print(Y.shape)
    X_train = X[:int(X.shape[0] * 0.8)]
    Y_train = Y[:int(X.shape[0] * 0.8)]
    X_val = X[int(X.shape[0] * 0.8)::]
    Y_val = Y[int(X.shape[0] * 0.8)::]

    X_mean = X_train.mean(axis=0)
    X_std = X_train.std(axis=0)
    Y_mean = Y_train.mean(axis=0)
    Y_std = Y_train.std(axis=0)

    print('X_train的类型是' + str(type(X_train)))
    print('X_train的shape' + str(X_train.shape))
    print('X_mean的类型是' + str(type(X_mean)))
    print('X_mean的shape' + str(X_mean.shape))
    print('X_train的类型是' + str(type(X_train)))
    print('X_train的shape' + str(X_train.shape))
    X_train_norm = (X_train - X_mean) / X_std
    Y_train_norm = (Y_train - Y_mean) / Y_std
    X_val_norm = (X_val - X_mean) / X_std
    Y_val_norm = (Y_val - Y_mean) / Y_std
    X_all_norm = (X - X_mean) / X_std

    if os.path.exists(model_path):
        # 导入训练好的模型
        model = pickle.load(open("xgb.pickle.dat", "rb"))
    else:
        model = xgb.XGBRegressor(n_estimators=200, learning_rate=0.1, gamma=0, subsample=0.9, \
                                 colsample_bytree=0.9, max_depth=10,
                                 reg_lambda=1)  # , objective ='reg:squarederror' n——estimators是迭代次数，gamma是最小的分裂损失变化，subsample是随机采样率，colsample_bytree是每棵树采样的列的占比
        model.fit(X_train_norm, Y_train_norm)
        pickle.dump(model, open("xgb.pickle.dat", "wb"))
        print('模型保存完毕')
    # 预测

    model_pred = model.predict(X_val_norm)
    val_pred = model_pred * Y_std + Y_mean  # 别忘了，数据进行了标准化处理，因此预测值需要处理，再计算R方
    # 计算R2
    R_2_0 = metrics.r2_score(Y_val[:], val_pred[:])  # 计算0时预测的R方
    plt.plot(range(Y_val.shape[0]), Y_val[:], 'b-', label='AQI实际图')
    plt.plot(range(Y_val.shape[0]), val_pred[:], 'r-', label='AQI预测图')
    plt.legend(loc='best')
    plt.text(150, 400, '拟合R2：{0}%'.format(round(R_2_0 * 100, 2)))
    plt.show()

    model_pred2 = model.predict(X_all_norm)
    val_pred2 = model_pred2 * Y_std + Y_mean  # 别忘了，数据进行了标准化处理，因此预测值需要处理，再计算R方
    R_2_1 = metrics.r2_score(Y[:], val_pred2[:])
    plt.plot(range(Y_val.shape[0]), Y_val[:], 'b-', label='AQI实际图')
    plt.plot(range(Y_val.shape[0]), val_pred[:], 'r-', label='AQI预测图')
    plt.legend(loc='best')
    plt.text(150, 400, '拟合R2：{0}%'.format(round(R_2_1 * 100, 2)))
    plt.show()
    df_val = pd.DataFrame(val_pred2)
    df_val.to_csv('val_pre.csv')

if __name__ == '__main__':
    main()
python机器学习算法