这是一个kaggle入门级比赛。程序基于pytorch,目的是利用数据集train_data训练一个神经网络,用以预测test_data的结果。上传kaggle后最终得分0.76(我很菜)。数据链接如下
https://www.kaggle.com/c/titanic/data
代码如下:
import torch
from torch.utils.data import Dataset
import numpy as np
import pandas as pd
import matplotlib as mlp
class TatanicDataset(Dataset):
def __init__(self,filepath):
# 从原始数据集中取五个特征
features = ["Pclass", "Sex", "SibSp", "Parch", "Fare"]
data = pd.read_csv(filepath)
self.len = data.shape[0] # shape(多少行,多少列)
# data[features]的类型是DataFrame,进行onehot表示
self.x_data = torch.from_numpy(np.array(pd.get_dummies(data[features])))