房屋价格预测-数据分析(python)

房屋价格预测-数据分析(python)

dataset download(https://c.d2l.ai/stanford-cs329p/assignments.html#assignment-1)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from IPython import display
display.set_matplotlib_formats('svg')

data = pd.read_feather('house_sales.ftr')
# data = pd.read_csv('house_sales.zip')
# print(data.shape)
# print(data.head)

null_sum = data.isnull().sum()
print(data.columns[null_sum < len(data) * 0.3])
data.drop(columns=data.columns[null_sum > len(data) * 0.3], inplace=True)

print(data.dtypes)

currency = ['Sold Price', 'Listed Price', 'Tax assessed value', 'Annual tax amount']
for c in currency:
    data[c] = data[c].replace(
        r'[$,-]', '', regex=True).replace(
        r'^\s*$', np.nan, regex=True).astype(float)

areas = ['Total interior livable area', 'Lot size']
for c in areas:
    acres = data[c].str.contains('Acres') == True
    col = data[c].replace(r'\b sqft\b|\b Acres\b|\b,\b', '', regex=True).astype(float)
    col[acres] *= 43560
    data[c] = col

print(data.describe())

# simple handle, handle the too small or too big data
abnormal = (data[areas[1]] < 10) | (data[areas[1]] > 1e4)
data = data[~abnormal]
print('abnormal: ', sum(abnormal))

# ax = sns.histplot(np.log10(data['Sold Price']))
# ax.set_xlim([3, 8])
# ax.set_xticks(range(3, 9))
# ax.set_xticklabels(['%.0e'%a for a in 10**ax.get_xticks()])
# plt.show()

print(data['Type'].value_counts()[0:20])

types = data['Type'].isin(['SingleFamily', 'Condo', 'MultiFamily', 'Townhouse'])
# sns.displot(pd.DataFrame({
#     'Sold Price': np.log10(data[types]['Sold Price']),
#     'Type': data[types]['Type']
# }), x='Sold Price', hue='Type', kind='kde')
# plt.show()

data['Price per living sqft'] = data['Sold Price'] / data['Total interior livable area']
ax = sns.boxplot(x='Type', y='Price per living sqft', data=data[types], fliersize=0)
ax.set_ylim([0, 2000])
# plt.show()

d = data[data['Zip'].isin(data['Zip'].value_counts()[:20].keys())]
ax = sns.boxplot(x='Zip', y='Price per living sqft', data=d, fliersize=0)
ax.set_ylim([0, 2000])
ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
# plt.show()

_, ax = plt.subplots(figsize=(6, 6))
columns = ['Sold Price', 'Listed Price', 'Annual tax amount', 'Price per living sqft']
sns.heatmap(data[columns].corr(), annot=True, cmap='RdYlGn', ax=ax)
plt.show()

请添加图片描述
请添加图片描述
请添加图片描述
请添加图片描述
请添加图片描述

  • 0
    点赞
  • 12
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值