# 分位数回归（quantile regression）简介和代码实现

## statsmodels中的分位数回归

StatsModel使用的范例与scikit-learn稍有不同。但是与scikit-learn一样，对于模型对象来说，需要公开一个.fit()方法来实际训练和预测。但是不同的是scikit-learn模型通常将数据(作为X矩阵和y数组)作为.fit()的参数，而StatsModel是在初始化对象时传入数据，而fit方法只传递一些可以调试的超参数。

%matplotlib inline
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt

mod = smf.quantreg("foodexp ~ income", data)
res = mod.fit(q=0.5)
print(res.summary())


help(quant_mod.fit)


## xgboost的分位数回归

class XGBQuantile(XGBRegressor):
def __init__(self,quant_alpha=0.95,quant_delta = 1.0,quant_thres=1.0,quant_var =1.0,base_score=0.5, booster='gbtree', colsample_bylevel=1,
colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
n_jobs=1, nthread=None, objective='reg:linear', random_state=0,reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,silent=True, subsample=1):
self.quant_alpha = quant_alpha
self.quant_delta = quant_delta
self.quant_thres = quant_thres
self.quant_var = quant_var

super().__init__(base_score=base_score, booster=booster, colsample_bylevel=colsample_bylevel,
colsample_bytree=colsample_bytree, gamma=gamma, learning_rate=learning_rate, max_delta_step=max_delta_step,
max_depth=max_depth, min_child_weight=min_child_weight, missing=missing, n_estimators=n_estimators,
reg_alpha=reg_alpha, reg_lambda=reg_lambda, scale_pos_weight=scale_pos_weight, seed=seed,
silent=silent, subsample=subsample)

self.test = None

def fit(self, X, y):
super().set_params(objective=partial(XGBQuantile.quantile_loss,alpha = self.quant_alpha,delta = self.quant_delta,threshold = self.quant_thres,var = self.quant_var) )
super().fit(X,y)
return self

def predict(self,X):
return super().predict(X)

def score(self, X, y):
y_pred = super().predict(X)
score = XGBQuantile.quantile_score(y, y_pred, self.quant_alpha)
score = 1./score
return score

@staticmethod
def quantile_loss(y_true,y_pred,alpha,delta,threshold,var):
x = y_true - y_pred
grad = (x<(alpha-1.0)*delta)*(1.0-alpha)-  ((x>=(alpha-1.0)*delta)& (x<alpha*delta) )*x/delta-alpha*(x>alpha*delta)
hess = ((x>=(alpha-1.0)*delta)& (x<alpha*delta) )/delta

hess = (np.abs(x)<threshold )*hess + (np.abs(x)>=threshold )

@staticmethod
def original_quantile_loss(y_true,y_pred,alpha,delta):
x = y_true - y_pred
hess = ((x>=(alpha-1.0)*delta)& (x<alpha*delta) )/delta

@staticmethod
def quantile_score(y_true, y_pred, alpha):
score = XGBQuantile.quantile_cost(x=y_true-y_pred,alpha=alpha)
score = np.sum(score)
return score

@staticmethod
def quantile_cost(x, alpha):
return (alpha-1.0)*x*(x<0)+alpha*x*(x>=0)

@staticmethod
split_gain = list()

return np.array(split_gain)


http://jmarkhou.com/lgbqr/

10-08

01-14 2389
01-14 4019
03-09 4万+
06-30 5207
01-14 8411
01-08 3万+
11-29 1511
06-01 1万+
03-19 1889
12-16 4万+
11-12 264

### “相关推荐”对你有帮助么？

• 非常没帮助
• 没帮助
• 一般
• 有帮助
• 非常有帮助

deephub

¥2 ¥4 ¥6 ¥10 ¥20

1.余额是钱包充值的虚拟货币，按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载，可以购买VIP、C币套餐、付费专栏及课程。