''' 0.attention机制 https://www.jianshu.com/p/0f0c674837e3 1.双样本T检验 https://www.jianshu.com/p/7555c4311a57 1.迁移学习demo: 2.capsule胶囊网络 https://www.jianshu.com/p/271d5f1f0e25 3.wide and Deep 4.使用部分神经网络 保留模型特征 5.多标签分类 https://www.jianshu.com/p/76f9e4c0d0a2 多标签CNN分类 6.catboost树模型 https://www.jianshu.com/p/49ab87122562 7.知识蒸馏 https://www.jianshu.com/p/5c38872cdc0f 8.双向lstm 与 简单的attention策略 与maxpoolling padding策略 与自定义层策略 使用lambda层让你的keras更加灵活 https://mp.weixin.qq.com/s/FvVr44RVsbKotITH29u1CQ keras attention https://github.com/philipperemy/keras-attention-mechanism keras attention https://www.jianshu.com/p/31c0acf94e0e keras attention https://blog.csdn.net/u010041824/article/details/78855435 9.LightGBM + LR + FM https://mp.weixin.qq.com/s/Qpaw8TsnX46hFZ0htYq7qA 10.PMML跨平台的机器学习模型上线 https://mp.weixin.qq.com/s/hOjriQe__z4_dL0l6bRyCA 11.gan的发展 https://mp.weixin.qq.com/s/b4Ep6NmWP92bKBGDyJs4rA 12. GBDT的模型解释 https://yq.aliyun.com/articles/594969?utm_content=m_50745 GBDT模型解释 13. 阿里巴巴的神经网络特征重要性解释 https://cloud.tencent.com/developer/news/330520 神经网络判断特征重要性 14. tensorflow实现gan 15. Linux命令行 16.谱聚类 https://mp.weixin.qq.com/s/29XFWMAzmyvZwLOs0KJTIw ''' from keras.datasets import mnist from keras.layers import Input, Dense, Reshape, Flatten, Dropout, multiply, GaussianNoise from keras.layers import BatchNormalization, Activation, Embedding, ZeroPadding2D from keras.layers.advanced_activations import LeakyReLU from keras.layers.convolutional import UpSampling2D, Conv2D from keras.models import Sequential, Model from keras.optimizers import Adam from keras import losses from keras.utils import to_categorical from keras.layers import LSTM, Embedding import keras.backend as K from keras import initializers import matplotlib.pyplot as plt import tensorflow as tf if __name__ == '__main__': '''capsule胶囊网络 https://www.jianshu.com/p/271d5f1f0e25''' '''Capsule是深度学习之父hinton在2017年提出来的一个较为轰动的网络结构。 capsule这个结构主要的特点是:Vector in Vector out——向量进,向量出, 而普通的神经元(Neuron)是Vector in Scalar out——向量进,标量出。 capsule输出的向量比Neuron输出的标量表达出更丰富的特征。''' if __name__ == '__main__': #GBDT生成特征 #https://scikit-learn.org/stable/auto_examples/ensemble/plot_feature_transformation.html#example-ensemble-plot-feature-transformation-py #https://blog.csdn.net/shine19930820/article/details/71713680#generate-features-for-ffm import pandas as pd import numpy as np x = np.random.random((20000,10)) y = np.random.randint(3,size=(20000,1)) #print(y.ravel()) from sklearn.ensemble import GradientBoostingClassifier from sklearn.preprocessing import OneHotEncoder gbdt = GradientBoostingClassifier(max_depth=3,min_samples_leaf=10,n_estimators=100,learning_rate=0.2,random_state=2) gbdt.fit(x,y.ravel()) gbdt_enc = OneHotEncoder() #print(x) print(gbdt.apply(x)[:,:,0]) print(np.array(gbdt.apply(x)[:,:,0]).shape) gbdt_enc.fit(gbdt.apply(x)[:,:,0]) from sklearn.linear_model import LogisticRegression lr = LogisticRegression() lr.fit(np.array(gbdt.apply(x)[:,:,0]),y.ravel()) lr_predict = lr.predict(np.array(gbdt.apply(x)[:,:,0])) from sklearn.metrics import accuracy_score print(accuracy_score(lr_predict, y.ravel())) inputs = Input(shape=(100,)) keras_model = Embedding(input_dim=100,output_dim=200)(inputs) keras_model = LSTM(48,activation='relu')(keras_model) keras_model = Dense(48,activation='relu')(keras_model) out_model = Dense(3,activation='relu')(keras_model) model = Model(inputs=inputs,outputs=out_model) model.summary() model.compile(loss='categorical_crossentropy',optimizer=Adam(0.00001),metrics=['accuracy']) keras_y = to_categorical(y) #model.fit(np.array(gbdt.apply(x)[:,:,0]),keras_y,batch_size=12,epochs=20) if __name__ == '__main__': import warnings warnings.filterwarnings("ignore") #双向LSTM + 简单的attention策略 + 自定义策略 +(maxplooing/padding) import pandas as pd import numpy as np dicts = dict(zip(np.arange(5),[[] for x in range(5)])) x = np.random.random((10000,20)) y = np.random.randint(5,size=(10000,1)) import lightgbm as lgb x_train = pd.DataFrame(x) y_train = pd.DataFrame(y.ravel()) lgb_train = lgb.Dataset(x_train, y_train) def grid_searchs(modeol_names,lgb_train): import warnings warnings.filterwarnings("ignore") from sklearn.model_selection import GridSearchCV import xgboost as xgb import lightgbm as lgb if modeol_names == 'LGB': lgb_params = { 'boosting_type': 'gbdt', 'max_depth': list(range(4, 20, 2)), # 提高精度 'num_leaves': list(range(10, 100, 10)), # 提高精度 'min_child_samples': list(range(15, 20, 1)), # 降低过拟合 'min_child_weight': [0.0005, 0.001, 0.0015, 0.002, 0.0025, 0.003], # 降低过拟合 'learning_rate': [i / 10.0 for i in range(0, 8)], #'n_estimators' : [10,30,60,90], 'num_round': 1000, 'metric' : ['recall','accuracy'], 'feature_fraction': 0.5, 'bagging_fraction': 0.8, 'bagging_freq': 12, 'subsample': 0.6 } import lightgbm as lgb cv_results = lgb.cv(lgb_params, lgb_train, num_boost_round=1000, nfold=5, stratified=False, shuffle=True, metrics='auc', early_stopping_rounds=50, verbose_eval=50, show_stdv=True, seed=0) print('best n_estimators:', len(cv_results['auc-mean'])) print('best cv score:', cv_results['auc-mean'][-1]) print(cv_results) grid_searchs('LGB',lgb_train) '''最优化LGB或GBDT构造特征;输入神经网络中做FM层;在通过Eembeding层;在通过自定义的padding与pooling;在通过双向LSTM;在通过CNN卷积,再输出结果''' '''DeepFM https://blog.csdn.net/songbinxu/article/details/80151814 的原理及实现''' '''XGB的参数搜索与LGB的参数搜索''' if __name__ == '__main__': import sklearn import xgboost as xgb import lightgbm as lgb from sklearn.model_selection import GridSearchCV def grid_searchs(modeol_names,x,y): import warnings warnings.filterwarnings("ignore") import warnings warnings.filterwarnings(module='sklearn*', action='ignore', category=DeprecationWarning) from sklearn.model_selection import GridSearchCV import xgboost as xgb import lightgbm as lgb if modeol_names == 'XGB': param_test1 = { 'max_depth': list(range(1, 10, 2)), 'min_child_weight':list(range(1,10,2)), 'gamma':[i/10.0 for i in range(0,8)], 'subsample':[i/10.0 for i in range(6,10)], 'colsample_bytree':[i/10.0 for i in range(6,10)], 'reg_alpha':[0, 0.001, 0.005, 0.01, 0.05], 'learning_rate': [i / 10.0 for i in range(0, 8)] } gsearch1 = GridSearchCV(estimator=xgb.XGBClassifier(learning_rate=0.1, n_estimators=1000, max_depth=5, min_child_weight=1, gamma=0, subsample=0.8, colsample_bytree=0.8,nthread=4, scale_pos_weight=1, seed=27), param_grid=param_test1, scoring='f1', n_jobs=-1, iid=False, cv=5,verbose=1) gsearch1.fit(x, y) print(gsearch1.cv_results_ , gsearch1.best_params_, gsearch1.best_score_) if modeol_names == 'LGB': ### 我们可以创建lgb的sklearn模型,使用上面选择的(学习率,评估器数目) params_test1 = { 'max_depth': list(range(4, 20, 2)), # 提高精度 'num_leaves': list(range(10, 100, 10)), # 提高精度 'min_child_samples': list(range(15, 20, 1)), # 降低过拟合 'min_child_weight': [0.0005, 0.001, 0.0015, 0.002, 0.0025, 0.003], # 降低过拟合 #'learning_rate': [i / 10.0 for i in range(0, 8)], #'n_estimators' : [10,30,60,90], #'metric' : ['recall','accuracy'] } gsearch1 = GridSearchCV(estimator=lgb.LGBMClassifier(), param_grid=params_test1, cv=5, verbose=-1, n_jobs=-1) gsearch1.fit(x, y) print(gsearch1.cv_results_ , gsearch1.best_params_, gsearch1.best_score_) x = np.random.random((100,20)) y = np.random.randint(3,size=(100,1)) y = y.ravel() #y = to_categorical(y) grid_searchs('LGB',x,y)
2.20190824
最新推荐文章于 2021-05-29 00:52:05 发布