导入基本库
import torch
import torch.nn as nn
from sklearn import datasets
from sklearn.preprocessing import StandardScaler#缩放特征
from sklearn.model_selection import train_test_split#分离测试与训练数据
导入乳腺癌数据集
打印出数据集的样本值和特征值
bc=datasets.load_breast_cancer()
X,y=bc.data,bc.target#X是基本数据,y是已知结果
n_samples,n_features=X.shape#X的样本值和特征值
print(n_samples,n_features)
输出结果
569 30
569个样本和30个特征
对导入的数据进行处理
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=1234)#放入测试部分是20%,random_state的值相当于一种规则,通过设定为相同的数,每次分割的结果都是相同的
sc=StandardScaler()#使特征具有零均值和单位变量
X_train=sc.fit_transform(X_train)
X_test=sc.transform(X_test)
X_train=torch.from_numpy(X_train.astype(np.float32))#将X_train数据转换为tensor且是float32型的数据
X_test=torch.from_numpy(X_test.astype(np.float32))
y_train=torch.from_numpy(y_train.astype(np.float32))
y_test=torch.from_numpy(y_test.astype(np.float32))
y_train=y_train.view(y_train.shape[0],1)#重塑y的tensor
y_test=y_test.view(y_test.shape[0],1)#重塑y的tensor,让他从一行变成一列
搭建模型
这里只需要一个线性层
class LogisticRegression(nn.Module):
def __init__(self,n_input_features):
super(LogisticRegression, self).__init__()
self.linear=nn.Linear(n_input_features,1)#我们只需要一个标签
def forward(self,x):
y_pred=torch.sigmoid(self.linear(x))
return y_pred
搭建优化器和损失函数
model=LogisticRegression(n_features)#30个输入特征和一个输出特征
criterian=nn.BCELoss()#二分类交叉熵损失
optimizer=torch.optim.SGD(model.parameters(),lr=0.01)#随机梯度下降法
对每一轮计算Loss
epochs=100
for epoch in range(epochs):
y_pred=model(X_train)
loss=criterian(y_pred,y_train)
loss.backward()
optimizer.step()
optimizer.zero_grad()
if(epoch+1)%10==0:
print(f'epoch:{epoch+1},loss={loss.item():.4f}')
评估模型准确度
with torch.no_grad():#对模型评估
y_pred = model(X_test)
y_pred_cls=y_pred.round()#因为之前sigmoid函数已经将其放入了0-1之间,round的作用是将其以0.5为依据变成0或1
acc=y_pred_cls.eq(y_test).sum()/float(y_test.shape[0])#判断预测值与实际值是否相等,相等的话就加1
print(f'accuracy={acc:.4f}')
输出结果
epoch:10,loss=0.5711
epoch:20,loss=0.4670
epoch:30,loss=0.4019
epoch:40,loss=0.3575
epoch:50,loss=0.3251
epoch:60,loss=0.3003
epoch:70,loss=0.2806
epoch:80,loss=0.2645
epoch:90,loss=0.2511
epoch:100,loss=0.2396
accuracy=0.9123
可以更改训练次数和学习率来提高模型准确度