# -*- coding:utf-8 -*- import pandas as pd import pymysql import sklearn from sqlalchemy import create_engine ## 加上字符集参数,防止中文乱码 dbconn = pymysql.connect( host="127.0.0.1", database="yes", user="root", password="111111", port=3306, charset='utf8') conn = create_engine('mysql+mysqldb://root:111111@localhost:3306/yes?charset=utf8') #上面这一大段等同于conn = create_engine('mysql+mysqldb://root:111111@localhost:3306/test?charset=utf8') # sql语句 sqlcmd = "select * from buildm" sqlcmd1 = "select * from tobepre_N0509" # 利用pandas 模块导入mysql数据 data = pd.read_sql(sqlcmd, dbconn) wait = pd.read_sql(sqlcmd1, dbconn)#等待预测的数据 #shujubufen X = data.drop([u'是否涨幅5个点',u'代码'],axis=1) y = data[u'是否涨幅5个点'] wait1= wait.drop([u'代码'],axis=1) #标准化 归一化# from sklearn.preprocessing import MinMaxScaler scaler = MinMaxScaler(feature_range=(0,1)) scaler.fit(X) scaler.fit(wait1) print X.shape X.fillna(value=0) wait1.fillna(value=0) #解决样本不均衡问题,大比小为10:3 from imblearn.over_sampling import RandomOverSampler ratio = {1:6000,0:14601} sm = RandomOverSampler(ratio=ratio,random_state=None) # sm = SMOTE(random_state=42,m_neighbors=5,ratio=0.3) X_res,y_res = sm.fit_sample(X,y) #分成训练集和测试集 from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.3,random_state=None)#分测试集和训练集 # #神经网络----------------z暂时没法用 # from sklearn.neural_network import MLPClassifier # MLP = MLPClassifier(hidden_layer_sizes=(450,450,300),activation='tanh',solver='adam',learning_rate_init=.1,epsilon=.000000001) # MLP.fit(X_train,y_train) # y_pred = pd.DataFrame(MLP.predict(X_train)) # y_true = y_train # y_pred1 = pd.DataFrame(MLP.predict(X_test)) # y_true1 = y_test # print 'MLP' # print sklearn.metrics.confusion_matrix(y_true, y_pred) # print sklearn.metrics.confusion_matrix(y_true1, y_pred1) #DT可用 from sklearn.tree import DecisionTreeClassifier DT = DecisionTreeClassifier(criterion='entropy',splitter='best',min_samples_split=20,min_samples_leaf=10,random_state=13) DT.fit(X_train,y_train) y_pred = pd.DataFrame(DT.predict(X_train)) y_true = y_train y_pred1 = pd.DataFrame(DT.predict(X_test)) y_true1 = y_test # print 'DT' # print sklearn.metrics.confusion_matrix(y_true, y_pred) # print sklearn.metrics.confusion_matrix(y_true1, y_pred1) #逻辑回归---------------------暂不可用 # from sklearn.linear_model import LogisticRegression # # LR = LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=2, fit_intercept=True, solver='lbfgs', multi_class='ovr',max_iter=800) # LR.fit(X_train,y_train) # # y_pred = pd.DataFrame(LR.predict(X_train)) # y_true = y_train # y_pred1 = pd.DataFrame(LR.predict(X_test)) # y_true1 = y_test # print 'LR' # print sklearn.metrics.confusion_matrix(y_true, y_pred) # print sklearn.metrics.confusion_matrix(y_true1, y_pred1) #支持向量机 from sklearn.svm import SVC SVC = SVC(C=1.0,kernel='rbf',gamma='auto',tol=0.001,probability=True) SVC.fit(X_train,y_train) y_pred = pd.DataFrame(SVC.predict(X_train)) y_true = y_train y_pred1 = pd.DataFrame(SVC.predict(X_test)) y_true1 = y_test # print 'SVC' # print sklearn.metrics.confusion_matrix(y_true, y_pred) # print sklearn.metrics.confusion_matrix(y_true1, y_pred1) #ADABOOST决策树 from sklearn.ensemble import AdaBoostClassifier ADA = AdaBoostClassifier(base_estimator=None,n_estimators=150, learning_rate=1.0, algorithm='SAMME') ADA.fit(X_train,y_train) # # y_pred = pd.DataFrame(ADA.predict(X_train)) # y_true = y_train # y_pred1 = pd.DataFrame(ADA.predict(X_test)) # y_true1 = y_test # print 'ADA' # print sklearn.metrics.confusion_matrix(y_true, y_pred) # print sklearn.metrics.confusion_matrix(y_true1, y_pred1) #随机森林 from sklearn.ensemble import RandomForestClassifier rf=RandomForestClassifier(n_estimators=150,max_depth=5) rf.fit(X_train,y_train) # # y_pred = pd.DataFrame(rf.predict(X_train)) # y_true = y_train # y_pred1 = pd.DataFrame(rf.predict(X_test)) # y_true1 = y_test # print 'rf' # print sklearn.metrics.confusion_matrix(y_true, y_pred) # print sklearn.metrics.confusion_matrix(y_true1, y_pred1) #梯度树 from sklearn.ensemble import GradientBoostingClassifier gdbt = GradientBoostingClassifier(loss='exponential',n_estimators=100,max_depth=5,min_samples_leaf=8) gdbt.fit(X_train,y_train) # # y_pred = pd.DataFrame(gdbt.predict(X_train)) # y_true = y_train # y_pred1 = pd.DataFrame(gdbt.predict(X_test)) # y_true1 = y_test # print 'gdbt' # print sklearn.metrics.confusion_matrix(y_true, y_pred) # print sklearn.metrics.confusion_matrix(y_true1, y_pred1) #knn from sklearn import neighbors from sklearn.neighbors import KNeighborsClassifier KNN = KNeighborsClassifier(weights='uniform',algorithm='kd_tree',n_jobs=4,p=2,n_neighbors=7) KNN.fit(X_train,y_train) y_pred = pd.DataFrame(KNN.predict(X_train)) y_true = y_train y_pred1 = pd.DataFrame(KNN.predict(X_test)) y_true1 = y_test print 'KNN' print sklearn.metrics.confusion_matrix(y_true, y_pred) print sklearn.metrics.confusion_matrix(y_true1, y_pred1) #开始拼接成果了 daima = pd.DataFrame(wait[u'代码'])#前面的U千万不可少 #开始拼接成果了 y1 = pd.DataFrame(DT.predict(wait1)) y1.columns=['y_DT'] # y2 = pd.DataFrame(MLP.predict(wait1)) # y2.columns=['y_MLP'] y_SVC = pd.DataFrame(SVC.predict(wait1)) y_SVC.columns=['y_SVC'] y3 = pd.DataFrame(ADA.predict(wait1)) y3.columns=['y_ADA'] y4 = pd.DataFrame(rf.predict(wait1)) y4.columns=['y_rf'] y5 = pd.DataFrame(gdbt.predict(wait1)) y5.columns=['y_gdbt'] # y6 = pd.DataFrame(LR.predict(wait1)) # y6.columns=['y_LR'] y7 = pd.DataFrame(KNN.predict(wait1)) y7.columns=['y_KNN'] yp = pd.DataFrame(gdbt.predict_proba(wait1)) yp.columns=['yp0','yp1'] #合并所需字段 jieguo = pd.DataFrame(pd.concat([daima,y_SVC,y1,y3,y4,y5,yp['yp1']],axis=1))#这一步是合并 jieguo['depiaoshu']=jieguo['y_ADA']*0.5+jieguo['y_rf']+jieguo['y_gdbt']+jieguo['y_DT']*0.8+jieguo['y_SVC'] pd.io.sql.to_sql(jieguo,"presult0509_py",con=conn,if_exists='replace')#只能说使用createegien的方式
确认过眼神,这个貌似稳定度高一些
最新推荐文章于 2022-02-27 20:27:08 发布