一、练习题
1. 练习题1
解答:
(1) 均方损失函数
所以
(2) 绝对值损失函数
所以是中位数。
2. 练习题2
解答:
3. 练习题3
解答:
此处就不能做梯度下降了,直接取
4. 练习题4
解答:
牛顿法是求的0点,用牛顿迭代法:
由此得到:
5. 练习题5
解答:
当 ,
当,有:
当,有:
6. 练习题6
解答:
所以:
7. 练习题7
解答:
可以得到:
二、代码实现
1. GBDT回归算法
GYH老师的代码:
from sklearn.tree import DecisionTreeRegressor as DT
from sklearn.datasets import make_regression
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
import numpy as np
class GBDTRegressor:
def __init__(self, max_depth=4, n_estimator=1000, lr=0.2):
self.max_depth = max_depth
self.n_estimator = n_estimator
self.lr = lr
self.booster = []
self.best_round = None
def record_score(self, y_train, y_val, train_predict, val_predict, i):
mse_val = mean_absolute_error(y_val, val_predict)
if (i+1)%10==0:
mse_train = mean_absolute_error(y_train, train_predict)
print("第%d轮\t训练集: %.4f\t"
"验证集: %.4f"%(i+1, mse_train, mse_val))
return mse_val
def fit(self, X, y):
# 在数据集中划分训练集和验证集
X_train, X_val, y_train, y_val = train_test_split(
X, y, test_size=0.25, random_state=0)
train_predict, val_predict = 0, 0
next_fit_val = np.full(X_train.shape[0], np.median(y_train))
# 为early_stop做记录准备
last_val_score = np.infty
for i in range(self.n_estimator):
cur_booster = DT(max_depth=self.max_depth)
cur_booster.fit(X_train, next_fit_val)
train_predict += cur_booster.predict(X_train) * self.lr
val_predict += cur_booster.predict(X_val) * self.lr
# 平方损失为((y - (F_{m-1} + w)^2)/2,若记残差为r
# 即为((r - w)^2)/2,此时关于w在0点处的负梯度求得恰好为r
# 因此拟合的值就是y_train - train_predict
next_fit_val = y_train - train_predict
self.booster.append(cur_booster)
cur_val_score = self.record_score(
y_train, y_val, train_predict, val_predict, i)
if cur_val_score > last_val_score:
self.best_round = i
print("\n训练结束!最佳轮数为%d"%(i+1))
break
last_val_score = cur_val_score
def predict(self, X):
cur_predict = 0
# 在最佳验证集得分的轮数停止,防止过拟合
for i in range(self.best_round):
cur_predict += self.lr * self.booster[i].predict(X)
return cur_predict
if __name__ == "__main__":
X, y = make_regression(
n_samples=10000, n_features=50, n_informative=20, random_state=1)
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.25, random_state=0)
model = GBDTRegressor()
model.fit(X_train, y_train)
prediction = model.predict(X_test)
mse = mean_absolute_error(y_test, prediction)
print("\n测试集的MSE为 %.4f"%(mse))
2. GBDT分类算法
GYH老师的二分类代码
from sklearn.tree import DecisionTreeRegressor as DT
from sklearn.datasets import make_classification
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
import numpy as np
class GBDTClassifier:
def __init__(self, max_depth=4, n_estimator=1000, lr=0.2):
self.max_depth = max_depth
self.n_estimator = n_estimator
self.lr = lr
self.booster = []
self.best_round = None
def record_score(self, y_train, y_val, train_predict, val_predict, i):
train_predict = np.exp(train_predict) / (1 + np.exp(train_predict))
val_predict = np.exp(val_predict) / (1 + np.exp(val_predict))
auc_val = roc_auc_score(y_val, val_predict)
if (i+1)%10==0:
auc_train = roc_auc_score(y_train, train_predict)
print("第%d轮\t训练集: %.4f\t"
"验证集: %.4f"%(i+1, auc_train, auc_val))
return auc_val
def fit(self, X, y):
X_train, X_val, y_train, y_val = train_test_split(
X, y, test_size=0.25, random_state=0)
train_predict, val_predict = 0, 0
# 按照二分类比例的初始化公式计算
fit_val = np.log(y_train.mean() / (1 - y_train.mean()))
next_fit_val = np.full(X_train.shape[0], fit_val)
last_val_score = - np.infty
for i in range(self.n_estimator):
cur_booster = DT(max_depth=self.max_depth)
cur_booster.fit(X_train, next_fit_val)
train_predict += cur_booster.predict(X_train) * self.lr
val_predict += cur_booster.predict(X_val) * self.lr
next_fit_val = y_train - np.exp(
train_predict) / (1 + np.exp(train_predict))
self.booster.append(cur_booster)
cur_val_score = self.record_score(
y_train, y_val, train_predict, val_predict, i)
if cur_val_score < last_val_score:
self.best_round = i
print("\n训练结束!最佳轮数为%d"%(i+1))
break
last_val_score = cur_val_score
def predict(self, X):
cur_predict = 0
for i in range(self.best_round):
cur_predict += self.lr * self.booster[i].predict(X)
return np.exp(cur_predict) / (1 + np.exp(cur_predict))
if __name__ == "__main__":
X, y = make_classification(
n_samples=10000, n_features=50, n_informative=20, random_state=1)
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.25, random_state=0)
model = GBDTClassifier()
model.fit(X_train, y_train)
prediction = model.predict(X_test)
auc = roc_auc_score(y_test, prediction)
print("\n测试集的AUC为 %.4f"%(auc))
GYH老师的多分类源码
from sklearn.tree import DecisionTreeRegressor as DT
from sklearn.datasets import make_classification
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
import numpy as np
def one_hot(y):
res = np.zeros((y.size, y.max()+1))
res[np.arange(y.size), y] = 1
return res
class GBDTMultiClassifier:
def __init__(self, max_depth=4, n_estimator=1000, lr=0.2):
self.max_depth = max_depth
self.n_estimator = n_estimator
self.lr = lr
self.booster = []
self.n_classes = None
self.best_round = None
def get_init_val(self, y):
init_val = []
y = np.argmax(y, axis=1)
for c in range(self.n_classes):
init_val.append(np.log((y==c).mean()))
return np.full((y.shape[0], self.n_classes), init_val)
def record_score(self, y_train, y_val, train_predict, val_predict, i):
train_predict = np.exp(train_predict) / np.exp(
train_predict).sum(1).reshape(-1, 1)
val_predict = np.exp(val_predict) / np.exp(
val_predict).sum(1).reshape(-1, 1)
auc_val = roc_auc_score(y_val, val_predict)
if (i+1)%10==0:
auc_train = roc_auc_score(y_train, train_predict)
print("第%d轮\t训练集: %.4f\t"
"验证集: %.4f"%(i+1, auc_train, auc_val))
return auc_val
def fit(self, X, y):
X_train, X_val, y_train, y_val = train_test_split(
X, y, test_size=0.25, random_state=0)
self.n_classes = y.shape[1]
train_predict = np.zeros((X_train.shape[0], self.n_classes))
val_predict = np.zeros((X_val.shape[0], self.n_classes))
next_fit_val = self.get_init_val(y_train)
last_val_score = - np.infty
for i in range(self.n_estimator):
last_train = train_predict.copy()
self.booster.append([])
for m in range(self.n_classes):
cur_booster = DT(max_depth=self.max_depth)
cur_booster.fit(X_train, next_fit_val[:, m])
train_predict[:, m] += cur_booster.predict(X_train) * self.lr
val_predict[:, m] += cur_booster.predict(X_val) * self.lr
next_fit_val[:, m] = y_train[:, m] - np.exp(
last_train[:, m]) / np.exp(last_train).sum(1)
self.booster[-1].append(cur_booster)
cur_val_score = self.record_score(
y_train, y_val, train_predict, val_predict, i)
if cur_val_score < last_val_score:
self.best_round = i
print("\n训练结束!最佳轮数为%d"%(i+1))
break
last_val_score = cur_val_score
def predict(self, X):
cur_predict = np.zeros((X.shape[0], self.n_classes))
for i in range(self.best_round):
for m in range(self.n_classes):
cur_predict[:, m] += self.lr * self.booster[i][m].predict(X)
return np.exp(cur_predict) / np.exp(cur_predict).sum(1).reshape(-1, 1)
if __name__ == "__main__":
X, y = make_classification(
n_samples=10000, n_features=50, n_informative=20,
n_classes=3, random_state=1)
y = one_hot(y)
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.25, random_state=0)
model = GBDTMultiClassifier()
model.fit(X_train, y_train)
prediction = model.predict(X_test)
auc = roc_auc_score(y_test, prediction)
print("\n测试集的AUC为 %.4f"%(auc))