ensemble基本代码



import pandas as pd
from datetime import datetime
from cmath import log
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
import xgboost as xgb
import lightgbm as lgb
from sklearn.model_selection import GridSearchCV
from xgboost.sklearn import XGBRegressor

# 分别读取训练集与测试集
train_df = pd.read_table('data/bike_train.txt', sep=',')
test_df = pd.read_table('data/bike_test.txt', sep=',')


def split_time(data):
    date = datetime.strptime(data, '%Y-%m-%d %H:%M:%S')
    year = date.year
    month = date.month
    day = date.day
    weekday = date.weekday()
    hour = date.hour
    return year, month, day, weekday, hour


# 对数据进行预处理
train_df = train_df.drop_duplicates()
train_df = train_df.dropna(how='any', axis=1)
train_df['year'], train_df['month'], train_df['day'], train_df['weekday'], train_df['hour'] = \
    zip(*train_df['datetime'].apply(func=split_time))
test_df['year'], test_df['month'], test_df['day'], test_df['weekday'], test_df['hour'] = \
    zip(*test_df['datetime'].apply(func=split_time))
train_df['y'] = train_df.apply(lambda x: x.casual + x.registered, axis=1)  # target为注册用户与未注册用户的单车总使用量
# train_df['y'] = train_df['total_demand'].apply(func=log_demand)
train_df['y'] = train_df['y'].apply(lambda x: log(x + 1))
# train_df.corr()
train_df = train_df.drop(columns=['datetime', 'casual', 'registered', 'count', 'atemp'])
train_df['y'] = train_df['y'].astype('float')
feat_cols = list(train_df.columns)
feat_cols.remove('y')
train_x, test_x, train_y, test_y = train_test_split(train_df[feat_cols].values, train_df['y'].values,
                                                    test_size=0.2, random_state=0)
kfolds = KFold(n_splits=10, random_state=0)


# 利用回归树进行预测
DTModel = DecisionTreeRegressor(max_depth=12, min_samples_split=10)
DTModel.fit(train_x, train_y)
y_pred1 = DTModel.predict(test_x)
mse1 = mean_squared_error(y_pred1, test_y)
# y_pred1 = cross_val_score(DTModel, train_df[feat_cols], train_df['y'], cv=kfolds)
# err1 = 1 - y_pred1.mean()


# 利用随机森林进行预测
# rf = RandomForestRegressor(n_estimators=1000, min_samples_split=11, oob_score=True)
rf = RandomForestRegressor(n_estimators=150, oob_score=True, criterion='mse')
rf.fit(train_x, train_y)
y_pred2 = rf.predict(test_x)
mse2 = mean_squared_error(y_pred2, test_y)
# y_pred2 = cross_val_score(rf, train_df[feat_cols], train_df['y'], cv=kfolds)
# err2 = 1 - y_pred2.mean()

# 利用Adaboost进行预测
AdaModel = AdaBoostRegressor(n_estimators=300, learning_rate=0.1)
AdaModel.fit(train_x, train_y)
y_pred3 = AdaModel.predict(test_x)
mse3 = mean_squared_error(y_pred3, test_y)
# y_pred3 = cross_val_score(AdaModel, train_df[feat_cols], train_df['y'], cv=kfolds, scoring='neg_mean_squared_error')
# y_pred3.mean()

# 利用GBDT进行预测
gbdtModel = GradientBoostingRegressor(n_estimators=500, learning_rate=0.3, subsample=0.9, max_depth=4)
gbdtModel.fit(train_x, train_y)
y_pred4 = gbdtModel.predict(test_x)
mse4 = mean_squared_error(y_pred4, test_y)
# y_pred4 = cross_val_score(gbdtModel, train_df[feat_cols], train_df['y'], cv=kfolds)
# err4 = 1 - y_pred4.mean()

# 利用XGBoost进行预测
xgtrain = xgb.DMatrix(train_x, label=train_y)
xgtest = xgb.DMatrix(test_x, label=test_y)
params1 = {
            'objective': 'reg:linear',
            'eval_metric': 'rmse',
            'eta': 0.4,
            'gamma': 0.1,
            'max_depth': 10,
            'subsample': 1,
            'colsample_bytree': 0.7,
            'lambda': 5,
            'alpha': 0.1
}
xgbModel = xgb.train(params=params1, num_boost_round=500, dtrain=xgtrain)
y_pred5 = xgbModel.predict(xgtest).tolist()
mse5 = mean_squared_error(y_pred5, test_y)

# 利用LightGBM进行预测
params2 = {
    'num_leaves': 150,
    'objective': 'regression',
    'max_depth': 10,
    'learning_rate': 0.05,
    'max_bin': 200
}
train_lgb = lgb.Dataset(train_x, label=train_y)
lgbm = lgb.train(params=params2, train_set=train_lgb, num_boost_round=500)
y_pred6 = lgbm.predict(test_x)
mse6 = mean_squared_error(y_pred6, test_y)

# 利用CV进行自动调参
init_params = {
    'eval_metric': 'rmse',
}
from xgboost.sklearn import XGBRegressor
from sklearn.model_selection import GridSearchCV
params = {
    'objective': ('reg:linear', 'reg:logistic'),
    'max_depth': [4, 6, 8, 10],
    'gamma': [0.1, 1, 10, 100]
}
xgbModel = XGBRegressor()
reg = GridSearchCV(xgbModel, param_grid=params, scoring='neg_mean_squared_error')
reg.fit(train_x, train_y)
print reg.best_estimator_
print reg.best_params_

 

  • 1
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
以下是一个基于 MATLAB 的 Ensemble Patch Transformation 的简单实现代码: ```matlab function transformedImage = ensemblePatchTransformation(inputImage, patchSize, ensembleSize) [rows, cols] = size(inputImage); % 初始化输出图像 transformedImage = zeros(rows, cols); % 遍历图像的每个像素 for i = 1:rows for j = 1:cols % 提取当前像素位置的图像块 patch = inputImage(max(i-patchSize, 1):min(i+patchSize, rows), ... max(j-patchSize, 1):min(j+patchSize, cols)); % 随机选择ensembleSize个图像块 selectedPatches = zeros(patchSize*2+1, patchSize*2+1, ensembleSize); for k = 1:ensembleSize randomRow = randi(size(patch, 1)); randomCol = randi(size(patch, 2)); selectedPatches(:,:,k) = patch(randomRow-patchSize:randomRow+patchSize, ... randomCol-patchSize:randomCol+patchSize); end % 对选定的图像块求平均值作为当前像素位置的输出值 transformedImage(i, j) = mean(selectedPatches(:)); end end end ``` 这段代码实现了 Ensemble Patch Transformation 的基本算法。输入参数包括原始图像 `inputImage`、图像块大小 `patchSize` 和 ensemble 大小 `ensembleSize`。函数将遍历输入图像的每个像素,并对每个像素位置提取一个图像块。然后,它从该图像块中随机选择 ensembleSize 个图像块,并计算它们的平均值作为当前像素位置的输出值。最终,函数返回一个经过 Ensemble Patch Transformation 处理后的图像。 请注意,这只是一个简单的实现示例,可能需要根据具体的需求进行修改和优化。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值