#now let's get started
n_times = 20
learning_rate=0.1
log_of_odds=np.zeros([n_times+1,n_samples])
residuals = np.zeros([n_times+1,n_samples])
loss = [0] * (n_times+1)
prediction = np.zeros([n_times+1,n_samples])
score = np.zeros([n_times+1])
#calculation
log_of_odds[0] = [log_of_odds0] * n_samples
prediction[0] = [probability0] * n_samples
score[0]=np.sum((prediction[0]>0.5)*1.0==y) / n_samples
residuals[0] = y - prediction[0]
loss[0] = loss0.sum()
trees = []
gamma_value = np.zeros([n_times+1,8])
gamma = np.zeros([n_times+1,n_samples])
for i in range(n_times):
dt = DecisionTreeRegressor(max_depth=2)
dt=dt.fit(X, residuals[i])
tree = dt.tree_
trees.append(tree)
leaf_indeces=dt.apply(X)
unique_leafs=np.unique(leaf_indeces)
n_leaf=len(unique_leafs)
#for leaf 1
for ileaf in range(n_leaf):
leaf_index=unique_leafs[ileaf]
n_leaf=len(leaf_indeces[leaf_indeces==leaf_index])
previous_probability = prediction[i][leaf_indeces==leaf_index]
denominator = np.sum(previous_probability * (1-previous_probability))
igamma = tree.value[ileaf+1][0][0] * n_leaf / denominator
gamma_value[i][ileaf]=igamma
gamma[i] = [gamma_value[i][np.where(unique_leafs==index)] for index in leaf_indeces]
# next
log_of_odds[i+1] = log_of_odds[i] + learning_rate * gamma[i]
prediction[i+1] = np.array([np.exp(odds)/(np.exp(odds)+1) for odds in log_of_odds[i+1]])
score[i+1]=np.sum((prediction[i+1]>0.5)*1.0==y) / n_samples
residuals[i+1] = y - prediction[i+1]
loss[i+1]=np.sum(-y * log_of_odds[i+1] + np.log(1+np.exp(log_of_odds[i+1])))
GBDT分类
最新推荐文章于 2024-05-05 00:06:22 发布