import numpy as np import pandas as pd import matplotlib.pyplot as plt import torch from sklearn import metrics from sklearn.model_selection import train_test_split from torch import nn from torch.autograd import Variable from torch.utils.data import TensorDataset,DataLoader from torch.optim import lr_scheduler from sklearn.metrics import f1_score, accuracy_score,roc_auc_score,confusion_matrix df = pd.read_csv(r'C:\Users\zhoutao\Desktop\label.csv') # ########### X=df.drop(['label'][0], axis=1) X=X.values Y=df[['label'][0]] Y=Y.values X=X.reshape(-1,4,36) Y=Y.reshape(-1,4,1) ylist = [] for y in Y: y=y[0] if 1 in y: ylist.append((1,1,1,1)) else: ylist.append((0,0,0,0)) ylist.pop(0) ylist.append((0,0,0,0)) Y = np.array(ylist) # print(Y.shape) # print(X.shape) ############################不设置随机种子按顺序划分 x_train, x_test, y_train, y_test = train_test_split(X, Y,train_size=0.8) # train_x = torch.from_numpy(x_train).reshape(-1,4,36) train_y = torch.from_numpy(y_train).reshape(-1,4,1) class lstm(nn.Module): def __init__(self, input_size=18, hidden_size=4, output_size=1, num_layer=2): super(lstm, self).__init__() self.layer1 = nn.LSTM(input_size, hidden_size, num_layer) self.layer2 = nn.Linear(hidden_size, output_size) #dim self.layer3 = nn.Sigmoid() def forward(self, x): x, _ = self.layer1(x) s, b, h = x.size() x = x.view(s * b, h) x = self.layer2(x) x = self.layer3(x) x = x.view(s, b,-1) return x #inputsize输入特征维度 model = lstm(36, 4, 1, 2) criterion = nn.BCELoss() # criterion=nn.CrossEntropyLoss() #加入正则化weight_decay=0.01 # optimizer = torch.optim.Adam(model.parameters(), lr=1e-2,weight_decay=0.01) optimizer = torch.optim.Adam(model.parameters(), lr=1e-2) # 开始训练 scheduler = lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.1) train_x = train_x.to(torch.float32) train_y = train_y.to(torch.float32) dataset = TensorDataset(train_x,train_y) train_loader = DataLoader(dataset = dataset, batch_size =30, shuffle =True,num_workers=0) # for e in range(500): scheduler.step() lr = scheduler.get_lr() for i,data in enumerate(train_loader): #pytorch中tensor(张量)只能在CPU上运算,(variable)变量是可以用GPU加速计算 inputs,labels =data inputs,labels = Variable(inputs), Variable(labels) # var_x = Variable(train_x) # var_y = Variable(train_y) # 前向传播 out = model(inputs) loss = criterion(out, labels) # # loss=loss.clone().detach().requires_grad_() # 反向传播 optimizer.zero_grad() loss.backward() optimizer.step() if (e + 1) % 1 == 0: # 每 10次输出结果 print('Epoch: {}, Loss: {:.5f}'.format(e + 1, loss.item())) torch.save(model.state_dict(), r'C:\Users\zhoutao\Desktop\1.pkl') # # # 加载 # model = lstm(36, 4, 1, 2) # model.load_state_dict(torch.load(r'C:\Users\zhoutao\Desktop\1.pkl')) model = model.eval() # 转换成测试模式 data_X = x_test.reshape(-1, 4, 36) data_X = torch.from_numpy(data_X).to(torch.float32) var_data = Variable(data_X) pred_test = model(var_data) # 测试集的预测结果 pred_test = pred_test.view(-1,4).data.numpy() # pred=pred_test.round() yuzhi = 0.3 pred_test[pred_test > yuzhi] = 1 pred_test[pred_test <=yuzhi] = 0 pred = pred_test # 改变输出的格式 auc = roc_auc_score(y_test, pred_test) print(auc) y_test = y_test.flatten() pred = pred.flatten() tn, fp, fn, tp = confusion_matrix(y_test, pred).ravel() f1, acc = f1_score(y_test, pred), accuracy_score(y_test, pred) fpr, tpr, threshold = metrics.roc_curve(y_test, pred) roc_auc = metrics.auc(fpr, tpr) # metrics.precision_recall_curve(y_test, pred, pos_label=1, sample_weight=None) plt.figure(figsize=(6,6)) plt.title('Validation ROC') plt.plot(fpr, tpr, 'b', label='Val AUC = %0.3f' % roc_auc) plt.legend(loc='lower right') plt.plot([0, 1], [0, 1], 'r--') plt.xlim([0, 1]) plt.ylim([0, 1]) plt.ylabel('True Positive Rate') plt.xlabel('False Positive Rate') plt.show() precision, recall, thresholds = metrics.precision_recall_curve(y_test, pred, pos_label=1, sample_weight=None) print('precision:',precision) print('recall:',recall) print('thresholds:',thresholds) plt.figure("P-R Curve") plt.title('Precision/Recall Curve') plt.xlabel('Recall') plt.ylabel('Precision') plt.plot(recall,precision) plt.show()