"""多层感知器模型的创建,人力资源数据集HR.csv 预测这人近期是否会离职,二分类问题""" import torch import pandas as pd import numpy as np import matplotlib.pyplot as plt from torch import nn import torch.nn.functional as F from torch.utils.data import DataLoader, TensorDataset data = pd.read_csv('dataset/HR.csv') # print(data.info()) #data.head(),查看数据集的前五行 # print(data.part.unique()) # 查看数据集里面的部门 返回唯一值 #对数据集分组:部门对应薪资 # x = data.groupby(['salary','part']).size() # print(x) #预处理,数值化处理 #会将所有特征数值化,独热编码形式 data = data.join(pd.get_dummies(data.part)).join(pd.get_dummies(data.salary)) # print(data.head()) #data.drop(columns=['part','salary'],inplace = True) 注意这里返回的None 不能使data=。。否则data会变成空值。 del data['salary'] del data['part'] # print(data) # # #y就是left列 # print(data.left.value_counts()) #统计 Y_data =data.left.values.reshape(-1,1) Y=torch.from_numpy(Y_data).type(torch.float32)#转化成tensor类型 X_data =data[[c for c in data.columns if c !='left']].values#列表推到式,返回不是left的所有列 X=torch.from_numpy(X_data).type(torch.FloatTensor) # print(X.size,Y.shape) #创建模型 基础版本 #继承nn.model基础上 __init__ :初始化所有的层 ,forward() 前向传播 # class Model(nn.Module): # def __init__(self): # super().__init__() #继承父类所有属性 # self.liner_1 = nn.Linear(20,64) # self.liner_2 = nn.Linear(64,64) # self.liner_3 = nn.Linear(64,1) # self.relu = nn.ReLU() #返回激活层 # self.sigmoid = nn.Sigmoid() #返回sigmoid激活 # def forward(self,input): # x = self.liner_1(input) # x = self.relu(x) # x = self.liner_2(x) # x = self.relu(x) # x = self.liner_3(x) # x = self.sigmoid(x) # return x # print(model) #另外一种激活函数方法 ,直接使用函数式API # F.relu(x) # F.sigmoid(x) # 改写模型 class Model(nn.Module): def __init__(self): super().__init__() #继承父类所有属性 self.liner_1 = nn.Linear(20,64) self.liner_2 = nn.Linear(64,64) self.liner_3 = nn.Linear(64,1) def forward(self,input): x = F.relu(self.liner_1(input)) x = F.relu(self.liner_2(x)) x = torch.sigmoid(self.liner_3(x)) return x # model = Model() # print(model) lr = 0.0001 def get_model(): model =Model() opt =torch.optim.Adam(model.parameters(),lr =lr) return model ,opt model ,optim = get_model() #得到一个模型和优化方法 loss_fn=nn.BCELoss() batch = 64 no_of_batches = len(data)//batch epochs =100 第一种: # for epoch in range(epochs): # for i in range(no_of_batches): # start = i*batch # end = start +batch # x=X[start: end] # y=Y[start: end] # y_pred =model(x) # loss = loss_fn(y_pred,y) # optim.zero_grad() # loss.backward() # optim.step() # with torch.no_grad(): # print('epoch:',epoch,'loss:',loss_fn(model(X),Y).data.item()) #包装张量dataset 进行重构 # HRdataset = TensorDataset(X,Y) # print(HRdataset[2:5]) 第二种: # for epoch in range(epochs): # for i in range(no_of_batches): # start = i*batch # end = start +batch # x,y =HRdataset[i*batch: i*batch+batch ]#重构 # y_pred =model(x) # loss = loss_fn(y_pred,y) # optim.zero_grad() # loss.backward() # optim.step() # with torch.no_grad(): # print('epoch:',epoch,'loss:',loss_fn(model(X),Y).data.item()) #使用dataloader重构,会自动batch HR_ds =TensorDataset(X,Y) # 其中shuffle表示的是是否打乱数据进行训练 HR_dl =DataLoader(HR_ds,batch_size=batch,shuffle=True) 第三种: for epoch in range(epochs): for x,y in HR_dl : #每一次迭代都会返回一个批次的x,以及一个批次的y y_pred =model(x) loss = loss_fn(y_pred,y) optim.zero_grad() loss.backward() optim.step() with torch.no_grad(): print('epoch:',epoch,'loss:',loss_fn(model(X),Y).data.item())
11-03
3019
07-19
312
08-07
6422
04-09
7485
08-21
814