之前参加数学建模简单的用sklearn进行了MLP以及随机森林进行了回归建模,现将代码进行记录:
MLP算法
import pandas as pd
from sklearn import preprocessing
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error
from sklearn.externals import joblib
# 载入数据
last_data = pd.read_csv("./sample.csv")
# 读入目标值
y = last_data.iloc[0:100, 1]
y = y.values.reshape(-1, 1)
# 读入特征值
x = last_data.iloc[0:100, 0]
x = x.values.reshape(-1, 1)
# 按照2:8的比例分割测试集、训练集
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
c = y_test
# 标准化特征值
MM_x = preprocessing.StandardScaler()
x_train = MM_x.fit_transform(x_train)
x_test = MM_x.transform(x_test)
# 标准化目标值
MM_y = preprocessing.StandardScaler()
y_train = MM_y.fit_transform(y_train)
y_test = MM_y.transform(y_test)
# 建立MLP分为三个隐藏层,分别有500、150、20个神经元,迭代次数为10000次,激活函数为tanh
fit1 = MLPRegressor(hidden_layer_sizes=(500, 150, 20), random_state=0, max_iter=10000,
shuffle=True, activation='tanh')
fit1.fit(x_train, y_train)
# 对测试集进行预测
a = fit1.predict(x_test)
a = -a
'''惩罚系数为0.01,最大迭代次数为200'''
print("fitting model right now")
fit1.fit(x_train, y_train)
pred1_train = fit1.predict(x_train)
'''计算训练集 MSE'''
mse_1 = mean_squared_error(pred1_train, y_train)
print("Train ERROR = ", mse_1)
'''计算测试集mse'''
pred1_test = fit1.predict(x_test)
mse_2 = mean_squared_error(pred1_test, y_test)
print("Test ERROR = ", mse_2)
'''计算莫模型准确率'''
print(fit1.score(x_train, y_train))
'''保存模型'''
path = "./MLP_1.model"
joblib.dump(fit1, path)
'''结果可视化'''
xx = range(0, len(x_test))
plt.figure(figsize=(8, 6))
plt.plot(xx, x_test, color="red", label="true", linewidth=2)
plt.plot(xx, a, color="orange", label="test", linewidth=2)
plt.legend()
plt.show()
RF代码:
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.externals import joblib
# 载入数据
last_data = pd.read_csv("./sample.csv")
# 读入目标值
y = last_data.iloc[:, 2]
y = y.values.reshape(-1, 1)
# 读入特征值
X = last_data.iloc[:, 0]
X = X.values.reshape(-1, 1)
# 按照2:8的比例分割测试集、训练集
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)
# 建立TF
rf = RandomForestRegressor(n_estimators=850,
max_depth=20,
n_jobs=-1,
oob_score=True,
bootstrap=True,
random_state=40)
rf.fit(X_train, y_train)
pred1_train = rf.predict(X_train)
'''计算训练集mse'''
mse_1 = mean_squared_error(pred1_train, y_train)
print("Train ERROR = ", mse_1)
'''计算测试集mse'''
pred1_test = rf.predict(X_valid)
mse_2 = mean_squared_error(pred1_test, y_valid)
print("Test ERROR = ", mse_2)
'''计算莫模型准确率'''
print(rf.score(X_train, y_train))
'''保存模型'''
path = "./MLP_1.model"
joblib.dump(rf, path)
'''结果可视化'''
c = y_valid
a = rf.predict(X_valid)
xx = range(0, len(y_valid))
plt.figure(figsize=(8, 6))
plt.plot(xx, c, color="red", label="true", linewidth=2)
plt.plot(xx, a, color="orange", label="test", linewidth=2)
plt.legend()
plt.show()