看了录播后照着代码敲了一遍 sklearn常用分类回归算法简介 对能了解SKlearn常规套路,但模型具体的参数需要进一步了解。
# 引入必要的第三方包
from sklearn.cross_validation import train_test_split
from sklearn import metrics
import pandas as pd
import time
# 读数据,并进行处理
data = pd.read_csv('/home/whn/Downloads/all_window.csv').fillna(0,axis=1)
X = data.drop('label',axis=1)
# min_max_scale = StandardScaler()
# X = min_max_scale.fit_transform(X)
y = data['label']
history = []
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)
# 线性回归:LR、Rigde(L2) 和 Lasso(L1)
from sklearn import linear_model
start = time.time()
reg = linear_model.LinearRegression()
reg.fit(X_train, y_train)
end = time.time()
y_pred = reg.predict(X_test)
loss = metrics.mean_squared_error(y_test, y_pred)
name = 'LinearRegression'
history.append([name,loss,end-start])
start = time.time()
reg = linear_model.Ridge()
reg.fit(X_train, y_train)
end = time.time()
y_pred = reg.predict(X_test)
loss = metrics.mean_squared_error(y_test, y_pred)
name = 'Rigde'
history.append([name,loss,end-start])
start = time.time()
reg = linear_model.Ridge(alpha=0.5)
reg.fit(X_train, y_train)
end = time.time()
y_pred = reg.predict(X_test)
loss = metrics.mean_squared_error(y_test, y_pred)
name = 'Ridge_alpha=0.5'
history.append([name,loss,end-start])
start = time.time()
reg = linear_model.Lasso()
reg.fit(X_train, y_train)
end = time.time()
y_pred = reg.predict(X_test)
loss = metrics.mean_squared_error(y_test, y_pred)
name = 'Lasso'
history.append([name,loss,end-start])
start = time.time()
reg = linear_model.Lasso(alpha=2)
reg.fit(X_train<