2021-07-21

-- coding: utf-8 --

“”"
Created on Wed Apr 8 09:46:04 2020

@author: advantech
“”"
from sklearn.externals import joblib
import pandas as pd
import pymysql as mdb
from sklearn.preprocessing import StandardScaler,MinMaxScaler
import numpy as np
import matplotlib.pyplot as plt
#%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler,MinMaxScaler

from xgboost import XGBClassifier
from sklearn import metrics
from sklearn.model_selection import GridSearchCV

from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.decomposition import PCA
from sklearn.svm import SVC

import numpy as np
from sklearn.svm import SVR
import matplotlib.pyplot as plt

from sklearn import linear_model

from sklearn import linear_model
import numpy as np

from sklearn import tree
from sklearn.ensemble import RandomForestRegressor

def main():
file_path
#names=[’
#https://wenku.baidu.com/view/0cdf0a66f524ccbff0218482.html
data_df=pd.read_csv(file_path)#(file_path,names=names)

data_df=data_df.fillna(0)
"""添加变量变形"""
#data_df['Q']=ln
"""添加异常数据块"""
print(data_df.shape)
print(data_df.columns)
print(data_df.head())
   
corr = data_df.corr()
cor_sort = corr['d'].abs().sort_values()

print(cor_sort)
# ===========================================================================================================================

# plt.plot(np.arange(0,6), cor_sort3,'x')
# plt.plot(0,cor_sort3[0],'xr')
# plt.show()
# =============================================================================
"""select 变量"""
selected_columns=pd.DataFrame(cor_sort3).index[:-1]
print(selected_columns)
"""X变量: Y变量"""
# =============================================================================
# left_pd=data_df[selected_columns]
# print(left_pd[:][:10])
# columns_list  = list(left_pd.columns)
# print(columns_list)
# ==============================================

X_Input=data_df[X_label].values
label=data_df[Y_label].values
model(X_Input,label)
X_Input=data_df[X_label2].values
label=data_df[Y_label].values
model(X_Input,label)
X_Inut=data_df[X_label3].values
label=data_df[Y_label].values
model(X_Input,label)
X_Input=data_df[X_label4].values
label=data_df[Y_label2].values
model(X_Input,label)
X_Input=data_df[X_label5].values
label=data_df[Y_label2].values
model(X_Input,label)

标准化处理特征

def model(X_Input,label):
scaler = StandardScaler()
scaler.fit(X_Input)
new_X = scaler.fit_transform(X_Input)
print(new_X)

from  sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(new_X,
                                                    label,
                                                    test_size=0.2)
print("X_train:",X_train.shape)
print("X_test :",X_test.shape)
print("y_train:",y_train.shape)
print("y_test :",y_test.shape)
"""Joint feature selection with multi-task Lasso"""
#https://scikit-learn.org/stable/auto_examples/linear_model/plot_multi_task_lasso_support.html#sphx-glr-auto-examples-linear-model-plot-multi-task-lasso-support-py

"""Lasso"""

=============================================================================

from sklearn import linear_model

import numpy as np

reg = linear_model.Lasso(alpha=0.1)
reg.fit(X_train,y_train)
predict_test = reg.predict(X_test)
predict_train = reg.predict(X_train)
corr_test=metrics.explained_variance_score(y_test, predict_test, sample_weight=None, multioutput='uniform_average')
mean_error_test=metrics.mean_absolute_error(y_test, predict_test, sample_weight=None, multioutput='uniform_average')
squared_error_test=metrics.mean_squared_error(y_test, predict_test, sample_weight=None, multioutput='uniform_average')

corr=metrics.explained_variance_score(y_train, predict_train, sample_weight=None, multioutput='uniform_average')
mean_error=metrics.mean_absolute_error(y_train, predict_train, sample_weight=None, multioutput='uniform_average')
squared_error=metrics.mean_squared_error(y_train, predict_train, sample_weight=None, multioutput='uniform_average')

print( "Lasso[corr,mean,squared] Test:%f;%f;%f" % (corr_test, mean_error_test,squared_error_test) )
print( "Lasso[[corr,mean,squared] Train:%f;%f;%f" % (corr, mean_error,squared_error) )

"""SVR"""
#print(__doc__)

=============================================================================

import numpy as np

from sklearn.svm import SVR

import matplotlib.pyplot as plt

=============================================================================

# Fit regression model
svr_rbf = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=.1)
svr_lin = SVR(kernel='linear', C=100, gamma='auto')
svr_poly = SVR(kernel='poly', C=100, gamma='auto', degree=3, epsilon=.1,
               coef0=1)
svrs = [svr_rbf, svr_lin, svr_poly]
for ix, svr in enumerate(svrs):
    svr.fit(X_train,y_train)
    predict_test = svr.predict(X_test)#.fit(X_train,y_train).predict(X_test)
    predict_train = svr.predict(X_train)#.fit((X_train,y_train).predict(X_train)
    corr_test=metrics.explained_variance_score(y_test, predict_test, sample_weight=None, multioutput='uniform_average')
    mean_error_test=metrics.mean_absolute_error(y_test, predict_test, sample_weight=None, multioutput='uniform_average')
    squared_error_test=metrics.mean_squared_error(y_test, predict_test, sample_weight=None, multioutput='uniform_average')
    corr=metrics.explained_variance_score(y_train, predict_train, sample_weight=None, multioutput='uniform_average')
    mean_error=metrics.mean_absolute_error(y_train, predict_train, sample_weight=None, multioutput='uniform_average')
    squared_error=metrics.mean_squared_error(y_train, predict_train, sample_weight=None, multioutput='uniform_average')

    print( "SVR:%f[corr,mean,squared] Test:%f;%f;%f" % (ix,corr_test, mean_error_test,squared_error_test) )
    print( "SVR:%f[[corr,mean,squared] Train:%f;%f;%f" % (ix,corr, mean_error,squared_error) )
"""Bayesian Ridge Regression is used for regression:"""

=============================================================================

from sklearn import linear_model

=============================================================================

reg = linear_model.BayesianRidge()
reg.fit(X_train,y_train)
predict_test = reg.predict(X_test)
predict_train = reg.predict(X_train)
corr_test=metrics.explained_variance_score(y_test, predict_test, sample_weight=None, multioutput='uniform_average')
mean_error_test=metrics.mean_absolute_error(y_test, predict_test, sample_weight=None, multioutput='uniform_average')
squared_error_test=metrics.mean_squared_error(y_test, predict_test, sample_weight=None, multioutput='uniform_average')

corr=metrics.explained_variance_score(y_train, predict_train, sample_weight=None, multioutput='uniform_average')
mean_error=metrics.mean_absolute_error(y_train, predict_train, sample_weight=None, multioutput='uniform_average')
squared_error=metrics.mean_squared_error(y_train, predict_train, sample_weight=None, multioutput='uniform_average')

print( "BayesianRidge[corr,mean,squared] Test:%f;%f;%f" % (corr_test, mean_error_test,squared_error_test) )
print( "BayesianRidge[corr,mean,squared] Train:%f;%f;%f" % (corr, mean_error,squared_error) )

"""tree"""
#from sklearn import tree
clf = tree.DecisionTreeRegressor()
clf = clf.fit(X_train,y_train)
joblib.dump(clf, 'kqll.pkl')
predict_test = clf.predict(X_test)
predict_train = clf.predict(X_train)
corr_test=metrics.explained_variance_score(y_test, predict_test, sample_weight=None, multioutput='uniform_average')
mean_error_test=metrics.mean_absolute_error(y_test, predict_test, sample_weight=None, multioutput='uniform_average')
squared_error_test=metrics.mean_squared_error(y_test, predict_test, sample_weight=None, multioutput='uniform_average')

corr=metrics.explained_variance_score(y_train, predict_train, sample_weight=None, multioutput='uniform_average')
mean_error=metrics.mean_absolute_error(y_train, predict_train, sample_weight=None, multioutput='uniform_average')
squared_error=metrics.mean_squared_error(y_train, predict_train, sample_weight=None, multioutput='uniform_average')

print( "tree[corr,mean,squared] Test:%f;%f;%f" % (corr_test, mean_error_test,squared_error_test) )
print( "tree[corr,mean,squared] Train:%f;%f;%f" % (corr, mean_error,squared_error) )
"""RandomForestClassifier"""
#from sklearn.ensemble import RandomForestClassifier
#from sklearn.ensemble import RandomForestRegressor
clf = RandomForestRegressor()
#clf = RandomForestClassifier(n_estimators=10)
clf = clf.fit(X_train,y_train)
predict_test = clf.predict(X_test)
predict_train = clf.predict(X_train)
corr_test=metrics.explained_variance_score(y_test, predict_test, sample_weight=None, multioutput='uniform_average')
mean_error_test=metrics.mean_absolute_error(y_test, predict_test, sample_weight=None, multioutput='uniform_average')
squared_error_test=metrics.mean_squared_error(y_test, predict_test, sample_weight=None, multioutput='uniform_average')

corr=metrics.explained_variance_score(y_train, predict_train, sample_weight=None, multioutput='uniform_average')
mean_error=metrics.mean_absolute_error(y_train, predict_train, sample_weight=None, multioutput='uniform_average')
squared_error=metrics.mean_squared_error(y_train, predict_train, sample_weight=None, multioutput='uniform_average')

print( "RandomForestRegressor[corr,mean,squared] Test:%f;%f;%f" % (corr_test, mean_error_test,squared_error_test) )
print( "RandomForestRegressor[corr,mean,squared] Train:%f;%f;%f" % (corr, mean_error,squared_error) )

if name==‘main’:
main()

=============================================================================

from sklearn import svm

X = new_X

type()

y = label

clf = svm.SVR()

reg = linear_model.Lasso(alpha=0.1)

#reg = linear_model.Lasso(alpha=0.1)

reg.fit(X)

clf.fit(X, y)

reg.predict([X_test])

=============================================================================

#SVR()

=============================================================================

clf.predict([[1, 1]])

array([1.5])

=============================================================================

=============================================================================

“”“PCA”""

from sklearn.ensemble import RandomForestClassifier

pca = PCA()

pca.fit(X_train)

X_train_reduced = pca.transform(X_train)

X_test_reduced= pca.transform(X_test)

classifier =SVC()

y_train=y_train.astype(‘int’)

classifier.fit(X_train_reduced,y_train)

predict_train = classifier.predict(X_train_reduced)

accuracy_train=metrics.accuracy_score( y_train,predict_train)

predict_test = classifier.predict(X_test_reduced)

accuracy_test=metrics.accuracy_score( y_test,predict_test)

print( “[Accuracy] Train:%f Test:%f” % (accuracy_train, accuracy_test) )

=============================================================================

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值