note2

4 逻辑回归与多层感知

 

 

计算分类问题时用交叉熵损失值,可以放大损失值

交叉熵损失也是度量两个概率分布距离的方法

import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from torch import nn
data = pd.read_csv('daatset/credit-a.csv',header=None)  # 读取csv格式数据集,没有表头
print(data)
# data.info()
X = data.iloc[:, :-1]   # iloc表示按位置取值, :代表取所有行, :-1表示取除了最后一列的所有列
Y = data.iloc[:, -1].replace(-1, 0)    # -1表示取最后一列,并将最后一列中的-1替换为0
print(Y.unique())
# XY转换成tensor,数据预处理
X = torch.from_numpy(X.values).type(torch.float32)
print(X)
Y = torch.from_numpy(Y.values.reshape(-1,1)).type(torch.float32)
# 创建模型
model = nn.Sequential(
    nn.Linear(15, 1),  # 输入15层特征,输出一层
    nn.Sigmoid()       # 激活层
)  # 将多个层顺序联合到一起
print(model)
# Sequential(
#   (0): Linear(in_features=15, out_features=1, bias=True)
#   (1): Sigmoid()
# )
# 计算损失函数
loss_fn = nn.BCELoss()
# 优化函数
opt = torch.optim.Adam(model.parameters(), lr=0.0001)  # Adam是一种优化函数,parameters函数取出模型中的变量
# 小批量训练
batches = 16
no_of_batch = 653//16    # number of batch
epoches = 1000

for epoch in range(epoches):
    for i in range(no_of_batch):
        start = i*batches
        end = start + batches    # 每批次训练16
        x = X[start: end]
        y = Y[start: end]
        y_pred = model(x)
        loss = loss_fn(y_pred, y)  # 计算损失
        opt.zero_grad()    # 清零梯度
        loss.backward()    #计算梯度
        opt.step()         # 用step方法优化

a = model.state_dict()  # sigmiod(w1*x1+w2*x2+...+w15*x15+b)
print(a)
b = ((model(X).data.numpy()>0.5).astype('int') == Y.numpy()).mean() #和Y比较 查看正确率
print(b)

 多层感知器 (神经网络)

 

 

 

 常用的激活函数

 

 

#多层感知器 (神经网络)
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from torch import nn
import torch.nn.functional as F   # 可以不用初始化,直接调用F.relu()之类

data = pd.read_csv('daatset/HR.csv')
# data.info()
print(data.part.unique())   # 返回part列内容的唯一值
# ['sales' 'accounting' 'hr' 'technical' 'support' 'management' 'IT'
#  'product_mng' 'marketing' 'RandD']

# print(data.groupby(['salary','part']).size())   # 分组运算
'''
salary  part       
high    IT               83
        RandD            51
        accounting       74
        hr               45
        management      225
        marketing        80
        product_mng      68
        sales           269
        support         141
        technical       201
low     IT              609
        RandD           364
        accounting      358
        hr              335
        management      180
        marketing       402
        product_mng     451
        sales          2099
        support        1146
        technical      1372
medium  IT              535
        RandD           372
        accounting      335
        hr              359
        management      225
        marketing       376
        product_mng     383
        sales          1772
        support         942
        technical      1147
dtype: int64

'''
data = data.join(pd.get_dummies(data.salary))  # 转换为独热(one hot)编码,并添加进data中
del data['salary']
data = data.join(pd.get_dummies(data.part))
del data['part']
# print(data)

data.left.value_counts()

Y_data = data.left.values.reshape(-1, 1)
Y = torch.from_numpy(Y_data)
X_data = data[[c for c in data.columns if c !='left']].values     # 这里用了列表推导式,当data列里没有left时放入列表
X = torch.from_numpy(X_data).type(torch.FloatTensor)
'''
自定义模型:
nn.Module:基础,继承这个类
__init__:初始化所有的层
forward:定义模型的运算过程
'''
# 创建一个类
class Model(nn.Module):
    # 定义一个类,从nn.Module中继承
    def __init__(self):  # 初始化
        super().__init__()    # 继承父类中所有属性
        self.liner_1 = nn.Linear(20, 64)  # 输出20层  输出到隐藏层,创建64个隐藏层
        self.liner_2 = nn.Linear(64, 64)
        self.liner_3 = nn.Linear(64, 1)   # 输出层1
        self.relu = nn.ReLU()   # 激活层
        self.sigmoid = nn.Sigmoid()
    def forward(self, input):
        # 在forward中使用上面的层,参数为self,对input进行处理
        x = self.liner_1(input)  # 第一层调用
        x = self.relu(x)    # relu激活
        x = self.liner_2(x)
        x = self.relu(x)
        x = self.liner_3(x)
        x = self.sigmoid(x)   # 因为是逻辑回归的输出,所以最后用sigmoid输出
        return x
'''改写模型
import torch.nn.functional as F   # 可以不用初始化,直接调用F.relu()之类
class Model(nn.Module):
    # 定义一个类,从nn.Module中继承
    def __init__(self):  # 初始化
        super().__init__()    # 继承父类中所有属性
        self.liner_1 = nn.Linear(20, 64)  # 输出20层  输出到隐藏层,创建64个隐藏层
        self.liner_2 = nn.Linear(64, 64)
        self.liner_3 = nn.Linear(64, 1)   # 输出层1
        
    def forward(self, input):
        # 在forward中使用上面的层,参数为self,对input进行处理
        x = F.relu(self.liner_1(input))  # 第一层调用,直接用F调用relu
        x = F.relu(self.liner_2(x))
        x = F.sigmoid(self.liner_3(x))
        
        return x
'''
model = Model()
print(model)
'''
Model(
  (liner_1): Linear(in_features=20, out_features=64, bias=True)
  (liner_2): Linear(in_features=64, out_features=64, bias=True)
  (liner_3): Linear(in_features=64, out_features=1, bias=True)
  (relu): ReLU()
  (sigmoid): Sigmoid()
)
'''

lr = 0.0001
def get_model():
    model = Model()
    opt = torch.optim.Adam(model.parameters(), lr=lr)
    return model, opt
model, optim = get_model()

# 定义损失函数
loss_fn = nn.BCELoss()
batch = 64
no_of_batches = len(data) //batch
epochs = 300

for epoch in range(epochs):
    for i in range(no_of_batches):
        start = i * batch
        end = start + batch
        x = X[start: end]
        y = Y[start: end]
        y = y.to(torch.float32)
        y_pred = model(x)
        y_pred = y_pred.to(torch.float32)
        loss = loss_fn(y_pred, y)
        optim.zero_grad()
        loss.backward()
        optim.step()  # 优化
    with torch.no_grad():
        print('epoch', epoch, 'loss: ', loss_fn(model(X).to(torch.float32), Y.to(torch.float32)).data.item())   #.item() 将tensor值转换为python数值

# 使用dataset类重构

from torch.utils.data import TensorDataset  # 可以包装张量

# HRdataset = TensorDataset(X, Y)
# model, optim = get_model()
# for epoch in range(epochs):
#     for i in range(no_of_batches):
#         x,y = HRdataset[i*batch: i*batch+batch]
#         y = y.to(torch.float32)
#         y_pred = model(x)
#         y_pred = y_pred.to(torch.float32)
#         loss = loss_fn(y_pred, y)
#         optim.zero_grad()
#         loss.backward()
#         optim.step()  # 优化
#     with torch.no_grad():
#         print('epoch', epoch, 'loss: ', loss_fn(model(X).to(torch.float32), Y.to(torch.float32)).data.item())   #.item() 将tensor值转换为python数值

# 使用dataloader类重构
'''
from torch.utils.data import DataLoader

HR_ds = TensorDataset(X, Y)
HR_dl = DataLoader(HR_ds, batch_size=batch, shuffle=True)  # 每次从HR_ds中提供一个batch的数据,shuffle为乱序处理
model, optim = get_model()
for epoch in range(epochs):
    for x, y in HR_dl:
        y_pred = model(x)
        loss = loss_fn(y_pred.to(torch.float32), y.to(torch.float32))
        optim.zero_grad()
        loss.backward()
        optim.step()  # 优化
    with torch.no_grad():
        print('epoch', epoch, 'loss: ', loss_fn(model(X).to(torch.float32), Y.to(torch.float32)).data.item())
'''

 

# 使用dataset类重构

from torch.utils.data import TensorDataset  # 可以包装张量

# HRdataset = TensorDataset(X, Y)
# model, optim = get_model()
# for epoch in range(epochs):
#     for i in range(no_of_batches):
#         x,y = HRdataset[i*batch: i*batch+batch]
#         y = y.to(torch.float32)
#         y_pred = model(x)
#         y_pred = y_pred.to(torch.float32)
#         loss = loss_fn(y_pred, y)
#         optim.zero_grad()
#         loss.backward()
#         optim.step()  # 优化
#     with torch.no_grad():
#         print('epoch', epoch, 'loss: ', loss_fn(model(X).to(torch.float32), Y.to(torch.float32)).data.item())   #.item() 将tensor值转换为python数值
# 使用dataloader类重构
from torch.utils.data import DataLoader

HR_ds = TensorDataset(X, Y)
HR_dl = DataLoader(HR_ds, batch_size=batch, shuffle=True)  # 每次从HR_ds中提供一个batch的数据,shuffle为乱序处理
model, optim = get_model()
for epoch in range(epochs):
    for x, y in HR_dl:
        y_pred = model(x)
        loss = loss_fn(y_pred.to(torch.float32), y.to(torch.float32))
        optim.zero_grad()
        loss.backward()
        optim.step()  # 优化
    with torch.no_grad():
        print('epoch', epoch, 'loss: ', loss_fn(model(X).to(torch.float32), Y.to(torch.float32)).data.item())

 添加验证   了解过拟合与欠拟合

sklearn 是经典学习库

 

 

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值