0 数据
广州市二手房价数据
大概有500条广州市二手房价数据
python数据导入
import numpy as np
import pandas as p
#画图包导入
import matplotlib.pyplot as plt
plt.style.use(style="ggplot")
import missingno as msno
import seaborn as sn
plt.rcParams['font.sans-serif'] = ['SimHei'] # 中文字体设置-黑体
plt.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题
sns.set(font='SimHei',style="whitegrid",palette="binary") # 解决Seaborn中文显示问题
#读取数据
train_names = ["总价(万元)",
"均价(元/平方米)",
"房间数",
"大厅数",
"所在楼层",
"总楼层",
"朝向",
"房屋结构",
"装修",
"面积(平方米)",
"建成时间",
"楼龄",
"所在区域"]
train = pd.read_csv("data_guangzhou.csv",names=train_names,encoding='gb2312')
#train = train.drop(0)
#train = train.dropna()
#直接读取的数据是文本类型,改为数字类型
train['总价(万元)'] = pd.to_numeric(train['总价(万元)'])
train['均价(元/平方米)'] = pd.to_numeric(train['均价(元/平方米)'])
train['面积(平方米)'] = pd.to_numeric(train['面积(平方米)'])
train['房间数'] = pd.to_numeric(train['房间数'])
train['大厅数'] = pd.to_numeric(train['大厅数'])
train['总楼层'] = pd.to_numeric(train['总楼层'])
train['楼龄'] = pd.to_numeric(train['楼龄'])
1 数据分析
房价分布
plt.figure(figsize = (10,5))
print("skew: ",train["总价(万元)"].skew())
sns.distplot(train["总价(万元)"],