from pathlib import Path import random import os import matplotlib.pyplot as plt import numpy as np from sklearn import preprocessing from sklearn.metrics import classification_report from sklearn.model_selection import train_test_split from sklearn import tree #数据存放地址 arr_path = Path('C:/Users/Administrator/Desktop/sc_data')#路径分隔符号/or\\ #获取子文件路径 l = list(arr_path.iterdir())#遍历子文件路径 # print(l) all_file_list = []#用list存放npy文件的地址 all_label_list = []#用list存放npy文件的标签,标签为子文件的名称(建议给子文件夹取名为0,1,2,...) for i in l: print(f'i==={i}') temp_file_list = list(i.glob("**/*.npy"))#返回所有npy文件的地址的list temp_label_list = [int(i.name)]*len(temp_file_list)#生成list[],[1]*3=[1,1,1]以此类推 print(f'{int(i.name)}含有样本数量:{len(temp_file_list)}') print(temp_file_list) #子文件路径 all_file_list += temp_file_list #子文件标签 all_label_list += temp_label_list # folder_list.append(str(i.name)) all_data = list(zip(all_file_list,all_label_list)) # print(all_data[:10]) data_x = []#用list存放npy文件的数值变量 data_y = []#用list存放label #按照路径读取numpy数组 for i in range(len(all_data)): arr = np.load(all_data[i][0]) ''' # print(arr.shape) # print(all_data[i][1]) # if i >10: # break # data_x.append(arr.reshape(5,224,224)) ''' data_x.append(arr) data_y.append(all_data[i][1]) # print(data_x[:5],data_y[:5]) #svm需要的数据类型为numpy.array,上面用的是list存放数据,所以要把list变成array data_feature = np.array(data_x) data_label = np.array(data_y) print(data_feature.shape,data_label.shape) data_feature = data_feature.reshape(1060,-1) print(data_feature.shape,data_label.shape) # 数据归一(可以比较一下归一化对分类结果是否有影响) data_feature = preprocessing.MinMaxScaler().fit_transform(data_feature) X_train, X_test, y_train, y_test = train_test_split(data_feature, data_label, test_size=0.3, random_state=1000) # from sklearn import svm # predictor = svm.SVC(gamma='scale', C=1.0,max_iter = 1000) # # 进行训练 # predictor.fit(X_train, y_train) # # predictions_labels = predictor.predict(X_train) # print(classification_report(y_train, predictions_labels)) # predictions_labels = predictor.predict(X_test) # print(classification_report(y_test, predictions_labels)) from sklearn.ensemble import RandomForestClassifier # 创建随机森林分类器 predictor = RandomForestClassifier(n_estimators=120) # 进行训练 predictor.fit(X_train, y_train) predictions_labels = predictor.predict(X_train) print(classification_report(y_train, predictions_labels,digits=4)) predictions_labels = predictor.predict(X_test) print(classification_report(y_test, predictions_labels,digits=4)) ''' #分类树 from sklearn import tree # 导入模块 # predictor = tree.DecisionTreeClassifier() # 实例化 predictor = tree.DecisionTreeClassifier(criterion="entropy") predictor.fit(X_train[:100], y_train[:100]) # clf = predictor.fit(X_train, y_train) # score = clf.score(X_test, y_test) # 导入测试集,从接口中调用需要的信息 # print(score) # predictions_labels = predictor.predict(X_train) # print(classification_report(y_train, predictions_labels)) # predictions_labels = predictor.predict(X_test) # print(classification_report(y_test, predictions_labels)) #将分类树可视化 tree.plot_tree(predictor, filled=True, class_names=['0','1']) plt.show() '''
泥石流图像分类
最新推荐文章于 2024-07-25 21:28:49 发布