随便玩一下啊
感受野绘制
写论文的需要就随便写了个小玩意,后面再继续更新
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import time
file = r'D:\微信\WeChat Files\wxid_8f01i8xhk84722\FileStorage\File\2021-09\identity_0829.csv'
data = pd.read_csv(file)
# data = np.random.shuffle(data)
data = np.array(data)
data_x = data[:,6:9]
data_y = data[:,9]-1
#测试集的比例占80%
x_train,x_val,y_train,y_val = train_test_split(data_x,data_y,test_size=0.99)
#训练集当中有标签的数据占20%,无标签占80%
x_train_labeled,x_train_unlabeled,y_train_labeled,y_train_unlabeled = train_test_split(x_train,y_train,test_size=0.9)
rf_cls = RandomForestClassifier()
def train(x_train,y_train,x_unlabeled,cls,x_val,y_val):
plt.ion()
for i in range(20):
print('第%d次迭代'%(i+1))
print('训练数据:', len(x_train))
print('无标签数据:', len(x_unlabeled))
cls.fit(x_train, y_train.astype('int'))
print('模型得分:',cls.score(x_val,y_val.astype('int')))
score = cls.predict_proba(x_unlabeled)
label = cls.predict(x_unlabeled)
score = np.max(score, axis=1)
index = score > 0.999
print('新加入数据:', len(x_unlabeled[index]),'\n')
# print(x_train.shape,x_unlabeled[index].shape)
x_train = np.concatenate((x_train, x_unlabeled[index]),axis=0)
y_train = np.concatenate((y_train, label[index]),axis=0)
x_unlabeled = x_unlabeled[~index]
# print(x_train.shape,y_train.shape)
plot(x_train[:,0],x_train[:,1],x_train[:,2],y_train)
# plt.scatter(x_train[:,0], x_train[:,1], c=y_train)
# plt.pause(0.5)
plt.ioff()
plt.show()
def plot(x,y,z,label):
# x,y = np.meshgrid(x,y)
ax = plt.axes(projection='3d')
ax.scatter(x,y,z,c=label, cmap='viridis',label=label)
plt.pause(1)
plt.legend(label)
# plt.show()
train(x_train_labeled,y_train_labeled,x_train_unlabeled,rf_cls,x_val,y_val)