二分类问题。
数据集
变量名 解释
CaseId 案例编号,没有实际意义
Q1 理赔员现场勘察采集的信息,Q1代表第一个问题的信息。信息被编码成数字,数字的大小不代表真实的关系。
Qk 同上,Qk代表第k个问题的信息。一共36个问题。
Evaluation 表示最终审核结果。0表示授予理赔,1表示未通过理赔审核。在test.csv中,这是需要被预测的标签。
http://sofasofa.io/competition.php?id=2
代码
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import KFold
import pandas as pd
class my_module(nn.Module):
def __init__(self, input_dim=36):
super(my_module, self).__init__()
self.fc1 = nn.Linear(input_dim, 16)
self.relu1 = nn.ReLU()
self.fc2 = nn.Linear(16, 1)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
out = self.fc1(x)
out = self.relu1(out)
out = self.fc2(out)
out = self.sigmoid(out)
return out
# 上一篇中,我们手写的k_fold,这里我们使用sklearn库
def k_fold(model, x, y, k, num_epochs, batch_size, learning_rate):
kf = KFold(n_splits=k, shuffle=True, random_state=42)
val_losses = []
for fold, (trian_index, val_index) in enumerate(kf.split(x, y)):
print(f'fold {fold + 1}')
# 创建训练集和验证集
train_data = TensorDataset(x[trian_index], y[trian_index])
val_data = TensorDataset(x[val_index], y[val_index])
trian_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=len(val_data), shuffle=False)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
criterion = nn.BCEWithLogitsLoss()
# train
model.train()
for epoch in range(num_epochs):
for inputs, targets in trian_loader:
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
model.eval()
val_loss = 0.0
with torch.no_grad():
for inputs, targets in val_loader:
outputs = model(inputs)
loss = criterion(outputs, targets)
val_loss += loss.item()
val_loss /= len(val_loader.dataset)
val_losses.append(val_loss)
print(f'val loss : {val_loss:.4f}')
return val_losses
# 获取数据
train_data = pd.read_csv("E:\\李宏毅深度学习\\sofa_data\\交通事故理赔审核\\train.csv")
train_y = train_data['Evaluation']
train_x = train_data.iloc[:,1:-1]
x_tensor = torch.tensor(train_x.values, dtype=torch.float32)
y_tensor = torch.tensor(train_y.values.reshape(-1,1), dtype=torch.float32)
# def k_fold(model, x, y, k, num_epochs, batch_size, learning_rate)
model = my_module()
batch_size = 64
k = 5
num_epochs = 5
learning_rate = 0.02
val_losses = k_fold(model, x_tensor, y_tensor, k, num_epochs, batch_size, learning_rate)
# 训练结果好像是翻车了,唉!!!哭了
“”“
fold 1
val loss : 0.0000
fold 2
val loss : 0.0000
fold 3
val loss : 0.0000
fold 4
val loss : 0.0000
fold 5
val loss : 0.0000
[1.73370361328125e-05, 1.7333531379699707e-05, 1.7332372069358826e-05, 1.7331457138061523e-05, 1.733108460903168e-05]
”“”