网址 : https://ai.futurelab.tv/tournament/2
### 1. 导入需要的工具包并查看数据
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.metrics import mean_squared_error as mse
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Activation
from keras.regularizers import l2
import warnings
warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)
# 读入并查看数据
dateparse = lambda dates: pd.datetime.strptime(dates, '%Y%m%d')
df = pd.read_csv('bd2019-weather-prediction-training-20190608.csv',
parse_dates=True, date_parser=dateparse, index_col='date')
df.head(5)
### 2. 定义的一些方法
def getMetrics(y_true, y_pred):
'''
结果评估函数
y_hat : 预测结果
y : 真实结果
'''
plt.figure(figsize=(18,6))
plt.plot(y_true)
plt.plot(y_pred, color='red')
plt.show()
r2 = 1 - np.sum(np.square(np.array(y_pred)-np.array(y_true)))/np.sum(np.square(np.array(y_true)-np.mean(y_true)))
print('R2 拟合度为: {} , MSE: {} '.format(r2, mse(y_true, y_pred)))
def getWinDire(wind_dire):
'''
wind_direction 字段处理函数
'''
if wind_dire == 999999 or wind_dire == 999998:
return 165.691
if wind_dire > 361:
return (wind_dire % 100 - 1) * 22.5
else:
return wind_dire
# (temperature, humidity, rain20, rain08)缺失值初步处理, 可改进使用拉格朗日插值法修补缺失值
temperature_mean = np.mean([_ for _ in df.temperature if _ < 888889])
df