DL学习5--房价预测

该代码示例展示了如何利用PyTorch构建一个简单的线性模型来预测房价。首先,它从指定URL下载并预处理数据,包括标准化数值特征和处理缺失值。然后,定义了一个MSELoss函数用于损失计算,构建了一个线性网络,并使用Adam优化器进行训练。在训练过程中,监测了训练损失和对数均方根误差(logRMSE)作为性能指标。
摘要由CSDN通过智能技术生成

房价预测

import hashlib
import os
import tarfile
import zipfile
import requests

DATA_HUB = dict()
DATA_URL = 'http://d2l-data.s3-accelerate.amazonaws.com/'

def download(name, cache_dir=os.path.join('.', 'data')):
    """下载一个DATA_HUB中的文件,返回本地文件名"""
    assert name in DATA_HUB, f"{name} 不存在于 {DATA_HUB}"
    url, sha1_hash = DATA_HUB[name]
    os.makedirs(cache_dir, exist_ok=True)
    fname = os.path.join(cache_dir, url.split('/')[-1])
    if os.path.exists(fname):
        sha1 = hashlib.sha1()
        with open(fname, 'rb') as f:
            while True:
                data = f.read(1048576)
                if not data:
                    break
                sha1.update(data)
        if sha1.hexdigest() == sha1_hash:
            return fname
    print(f'正在从{url}下载{fname}...')
    r = requests.get(url, stream=True, verify=True)
    with open(fname, 'wb') as f:
        f.write(r.content)
    return fname

def download_extract(name, folder=None):
    """下载并解压zip/tar文件"""
    fname = download(name)
    base_dir = os.path.dirname(fname)
    data_dir, ext = os.path.splitext(fname)
    if ext == '.zip':
        fp = zipfile.ZipFile(fname, 'r')
    elif ext in ('.tar', '.gz'):
        fp = tarfile.open(fname, 'r')
    else:
        assert False, '只有zip/tar文件可以被解压缩'
    fp.extractall(base_dir)
    return os.path.join(base_dir, folder) if folder else data_dir

def download_all():
    """下载DATA_HUB中的所有文件"""
    for name in DATA_HUB:
        download(name)
%matplotlib inline
import numpy as np
import pandas as pd
import torch
from torch import nn
from d2l import torch as d2l

DATA_HUB['kaggle_house_train'] = (
    DATA_URL + 'kaggle_house_pred_train.csv',
    '585e9cc93e70b39160e7921475f9bcd7d31219ce')

DATA_HUB['kaggle_house_test'] = (
    DATA_URL + 'kaggle_house_pred_test.csv',
    'fa19780a7b011d9b009e8bff8e99922a8ee2eb90')

train_data = pd.read_csv(download('kaggle_house_train'))
test_data = pd.read_csv(download('kaggle_house_test'))

all_feature = pd.concat((train_data.iloc[:,1:-1],train_data.iloc[:,1:]))
numeric_features = all_feature.dtypes[all_feature.dtypes != 'object'].index
all_feature[numeric_features] = all_feature[numeric_features].apply(
    lambda x: (x - x.mean() / (x.std()))
)
all_feature[numeric_features] = all_feature[numeric_features].fillna(0)
all_feature = pd.get_dummies(all_feature,dummy_na=True)
train_data_split = train_data.shape[0]
train_dataset = torch.tensor(all_feature[:train_data_split].values,dtype=torch.float32)
test_dataset = torch.tensor(all_feature[train_data_split:].values,dtype=torch.float32)
train_labels = torch.tensor(train_data.SalePrice.values.reshape(-1,1), dtype=torch.float32)

dataloader

import torch.utils.data as data
import torch
train_d = torch.cat([train_dataset,train_labels],dim=1)
train_dataloader = data.DataLoader(train_d,batch_size=126,shuffle=True)

定义loss和net

loss = nn.MSELoss()
in_features = train_dataset.shape[1]

def get_net():
    net = nn.Sequential(nn.Linear(in_features,1))
    return net
net = get_net()
optimizer = torch.optim.Adam(net.parameters(), lr=0.01)

定义相对误差公式

# 相对误差 ( y - 'y ) / y
def log_rmse(net,features,labels):
    clipped_preds = torch.clamp(net(features),1,float('inf'))
    preb_mse = torch.sqrt(loss(torch.log(clipped_preds), torch.log(labels)))
    return preb_mse.item()

模型训练

epochs = 100
train_ls, log_mse = [],[]
for epoch in range(epochs):
    if net == nn.Module():
        net.train()
    ls_train,ls_test = 0,0
    for X in train_dataloader:
        data = X[:,:-1]
        label = X[:,-1].reshape(-1,1)
        optimizer.zero_grad()
        preb = net(data)
        l = loss(preb,label)
        l.backward()
        optimizer.step()

        ls_train+=l.item()
    train_ls.append(ls_train)

    preb_mse = log_rmse(net,train_dataset,train_labels)
    log_mse.append(preb_mse)
    print("epoch is ",epoch," Mse loss is ",ls_train, " log_mse is ",preb_mse)

训练结果

epoch is  0  Mse loss is  462676588544.0  log_mse is  4.277717590332031
epoch is  1  Mse loss is  450499411968.0  log_mse is  3.5420501232147217
epoch is  2  Mse loss is  440024502272.0  log_mse is  3.127312183380127
epoch is  3  Mse loss is  425981331456.0  log_mse is  2.835536241531372
epoch is  4  Mse loss is  414730248192.0  log_mse is  2.611187696456909
epoch is  5  Mse loss is  403009087488.0  log_mse is  2.431793689727783
epoch is  6  Mse loss is  394215604224.0  log_mse is  2.2810003757476807
epoch is  7  Mse loss is  380868847616.0  log_mse is  2.1509878635406494
epoch is  8  Mse loss is  374684563456.0  log_mse is  2.0380849838256836
epoch is  9  Mse loss is  364873803776.0  log_mse is  1.937126874923706
...........................

绘制结果

%matplotlib inline
import matplotlib.pyplot as plt
plt.figure()
plt.plot(train_ls)
plt.show()

在这里插入图片描述

%matplotlib inline
import matplotlib.pyplot as plt
plt.figure()
plt.plot(log_mse)
plt.show()

在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值