python 多分类_python多标签分类模版

1 from sklearn.multioutput importMultiOutputClassifier2 from sklearn.ensemble importRandomForestClassifier3 from sklearn.ensemble importExtraTreesClassifier4 importnumpy as np5 from pandas importread_csv6 importpandas as pd7

8 root1="F:/goverment/shuili2/techproblem_text_train.csv"

9 root2="F:/goverment/shuili2/techproblem_text_test.csv"

10 root3="F:/goverment/shuili2/text_train_4problem.csv"

11 root4="F:/goverment/shuili2/text_test_4problem.csv"

12

13

14 '''大类小类一起预测'''

15 #root2="./id="+str(id_num)+"_process_data.csv"

16 dataset1 = read_csv(root1) #数据转化为数组

17 dataset1=dataset1.values18 dataset2 = read_csv(root2) #数据转化为数组

19 dataset2=dataset2.values20 X_train=dataset1[:,:28]#到28之前都是变量

21 Y_train=dataset1[:,28:]#28到之后都是lable

22 X_test=dataset2[:,:28]23 Y_test=dataset2[:,28:]24

25 print('多输出多分类器真实输出分类:\n',Y_train)26 n_samples, n_features = X_train.shape #4000 29

27 n_outputs = Y_train.shape[1] #4000*8

28 n_classes = 50 #每种输出有50种分类

29 forest = RandomForestClassifier(n_estimators=500,random_state=1) #生成随机森林多分类器

30 multi_target_forest = MultiOutputClassifier(forest) #构建多输出多分类器

31 y_pred =multi_target_forest.fit(X_train, Y_train).predict(X_train)32 print('多输出多分类器预测输出分类:\n',y_pred)33 pp=multi_target_forest.predict(X_test)34 a=pp35 k=036 for i inrange(len(a)):37 if a[i][0]==Y_test[i][0] and a[i][1]==Y_test[i][1] and a[i][2]==Y_test[i][2] and a[i][3]==Y_test[i][3] and a[i][4]==Y_test[i][4] and a[i][5]==Y_test[i][5] and a[i][6]==Y_test[i][6] and a[i][7]==Y_test[i][7]:38 k+=1

39 aa=k/1328*1.0

40 print(aa)41

42

43 '''只预测大类'''

44 #root2="./id="+str(id_num)+"_process_data.csv"

45 dataset3 = read_csv(root1) #数据转化为数组

46 dataset3=dataset3.values47 dataset4 = read_csv(root2) #数据转化为数组

48 dataset4=dataset4.values49 X_train_big=dataset3[:,:28]50 Y_train_big=dataset3[:,28:32]51 X_test_big=dataset4[:,:28]52 Y_test_big=dataset4[:,28:32]53 print('只预测大类:多输出多分类器真实输出分类:\n',Y_train_big)54 n_samples, n_features = X_train_big.shape #4000 29

55 n_outputs = Y_train_big.shape[1] #4000*8

56 n_classes = 11 #每种输出有11种分类

57 forest = RandomForestClassifier(n_estimators=200,random_state=1) #生成随机森林多分类器

58 multi_target_forest = MultiOutputClassifier(forest) #构建多输出多分类器

59 y_pred =multi_target_forest.fit(X_train_big, Y_train_big).predict(X_train_big)60 print('多输出多分类器预测输出分类:\n',y_pred)61 pp=multi_target_forest.predict(X_test_big)62 a=pp63 k=064 for i inrange(len(a)):65 if a[i][0]==Y_test_big[i][0] and a[i][1]==Y_test_big[i][1] and a[i][2]==Y_test_big[i][2] and a[i][3]==Y_test_big[i][3]:66 k+=1

67 aa=k/1328*1.0

68 print(aa)69

70

71 '''只预测小类'''

72 #root2="./id="+str(id_num)+"_process_data.csv"

73 dataset4 = read_csv(root3) #数据转化为数组

74 dataset4=dataset4.values75 dataset5 = read_csv(root4) #数据转化为数组

76 dataset5=dataset5.values77 X_train_samll=dataset4[:,:28]78 Y_train_samll=dataset4[:,28:32]79 X_test_samll=dataset5[:,:28]80 Y_test_samll=dataset5[:,28:32]81 print('只预测小类:多输出多分类器真实输出分类:\n',Y_train_samll)82 n_samples, n_features = X_train_samll.shape #4000 29

83 n_outputs = Y_train_samll.shape[1] #4000*4

84 n_classes = 61 #每种输出有61种分类

85 forest = RandomForestClassifier(n_estimators=200,random_state=1) #生成随机森林多分类器

86 multi_target_forest = MultiOutputClassifier(forest) #构建多输出多分类器

87 y_pred =multi_target_forest.fit(X_train_samll, Y_train_samll).predict(X_train_samll)88 print('多输出多分类器预测输出分类:\n',y_pred)89 pp=multi_target_forest.predict(X_test_samll)90 a=pp91 k=092 for i inrange(len(a)):93 if a[i][0]==Y_test_samll[i][0] and a[i][1]==Y_test_samll[i][1] and a[i][2]==Y_test_samll[i][2] and a[i][3]==Y_test_samll[i][3]:94 k+=1

95 aa=k/1328*1.0

96 print(aa)97

98

99

100 '''

101 from pandas import read_csv102 import pandas as pd103 import numpy as np104 from skmultilearn.problem_transform import BinaryRelevance105 from sklearn.naive_bayes import GaussianNB106 from sklearn.metrics import accuracy_score107

108

109 root1="D:/Anaconda3-5.0.1-Windows-x86_64/anaconda/work/shuili2/data.csv"110 #root2="./id="+str(id_num)+"_process_data.csv"111 dataset = read_csv(root1) #数据转化为数组112 dataset=dataset.values113 x_train=dataset[:4000,:29]114 y_train=dataset[:4000,29:]115

116 x_test=dataset[4000:,:29]117 y_test=dataset[4000:,29:]118

119 # initialize binary relevance multi-label classifier120 # with a gaussian naive bayes base classifier121 classifier = BinaryRelevance(GaussianNB())122

123 # train124 classifier.fit(x_train, y_train)125

126 # predict127 predictions = classifier.predict(x_test)128 accuracy_score(y_test,predictions)129 '''

130

131

132 '''---------------------------------'''

133 '''

134 import numpy as np135 import pandas as pd136 from keras.models import Sequential137 from keras.layers import Dense, Dropout138 from keras.wrappers.scikit_learn import KerasClassifier139 from keras.utils import np_utils140 from sklearn.model_selection import train_test_split, KFold, cross_val_score141 from sklearn.preprocessing import LabelEncoder142 from pandas import read_csv143 from sklearn.naive_bayes import GaussianNB144 from sklearn.metrics import accuracy_score145

146

147 root1="D:/Anaconda3-5.0.1-Windows-x86_64/anaconda/work/shuili2/data.csv"148 #root2="./id="+str(id_num)+"_process_data.csv"149 dataset = read_csv(root1) #数据转化为数组150 dataset=dataset.values151

152 # load dataset153 dataframe = pd.read_csv("data.csv", header=None)154 dataset = dataframe.values155 X = dataset[:, 0:29].astype(float)156 Y = dataset[:, 29:]157

158 # encode class values as integers159 #encoder = LabelEncoder()160 #encoded_Y = encoder.fit_transform(Y)161 # convert integers to dummy variables (one hot encoding)162 #dummy_y = np_utils.to_categorical(encoded_Y)163

164 # define model structure165 def baseline_model():166 model = Sequential()167 model.add(Dense(output_dim=10, input_dim=29, activation='relu'))168 model.add(Dropout(0.2))169 model.add(Dense(output_dim=8, input_dim=10, activation='softmax'))170 # Compile model171 model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])172 return model173 estimator = KerasClassifier(build_fn=baseline_model, nb_epoch=200, batch_size=50)174 # splitting data into training set and test set. If random_state is set to an integer, the split datasets are fixed.175 X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.01, random_state=0)176 estimator.fit(X_train, Y_train)177

178 # make predictions179 pred = estimator.predict(X_test)180

181

182 # inverse numeric variables to initial categorical labels183 #init_lables = encoder.inverse_transform(pred)184

185 # k-fold cross-validate186 seed = 42187 np.random.seed(seed)188 kfold = KFold(n_splits=10, shuffle=True, random_state=seed)189 results = cross_val_score(estimator, X, Y, cv=kfold)190 '''

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值