直接上代码
import pandas as pd
from pyecharts.charts import Map, Bar, Line, Grid, Pie, Scatter
from pyecharts import options as opts
if __name__ == '__main__':
""" 分析二手房数据,使用pyecharts进行可视化 """
# 1.读取数据
dataDF = pd.read_csv('二手房数据.csv', encoding='GB18030')
print(dataDF.head().to_string()) # 打印数据前几行
# 2.查看表格数据,一共有23677条数据
print(dataDF.describe()) # 打印数据的描述统计信息
# 3.查看是否缺失
print(dataDF.isnull().sum()) # 打印每列缺失值的数量
# 4.将看到电梯数据缺失8257行,将缺失数据填充为“未知”
dataDF['电梯'].fillna('未知', inplace=True)
print(dataDF.isnull().sum()) # 再次打印每列缺失值的数量,确认缺失值已处理
# 5.统计各城区二手房数量
g = dataDF.groupby('市区')
df_region = g['小区'].count()
region = df_region.index.tolist()
list_region = [row + '区' for row in region] # 为每个市区添加“区”字
count = df_region.values.tolist()
print(df_region)
# 实列化Map对象
map = (
Map()
.add(series_name='', data_pair=[list(z) for z in zip(list_region, count)], maptype='北京')
.set_global_opts(
title_opts=opts.TitleOpts(title='北京市二手房各区分布'),
visualmap_opts=opts.VisualMapOpts(is_show=True, min_=0, max_=3000)
)
)
map.render('北京各城区二手房数量地图分布.html') # 生成地图并保存为HTML文件
# 6.可视化展示-北京各城区二手房数量-平均价格柱状图
mean = g.mean()
price = round(mean['价格(万元)'], 2)
mean_price = price.values.tolist()
print(mean_price)
# 实例化一个柱形图对象
bar = (Bar().add_xaxis(xaxis_data=region))
bar.add_yaxis(series_name='数量', y_axis=count)
bar.extend_axis(yaxis=opts.AxisOpts(name='价格(万元)', max_=900, min_=200, interval=100))
bar.set_global_opts(
title_opts=opts.TitleOpts(title='各城区二手房数量-平均价格柱状图'),
yaxis_opts=opts.AxisOpts(name='数量', max_=3000, min_=0),
tooltip_opts=opts.TooltipOpts(trigger='axis', axis_pointer_type='cross'),
xaxis_opts=opts.AxisOpts(axispointer_opts=opts.AxisPointerOpts(is_show=True, type_='shadow')),
)
# 实例化一个折线图
line = (Line().add_xaxis(xaxis_data=region))
line.add_yaxis(series_name='价格', y_axis=mean_price, yaxis_index=1)
# 重叠柱状图和折线图
bar.overlap(line)
grid = Grid()
grid.add(bar, grid_opts=opts.GridOpts(), is_control_axis_index=True)
grid.render('北京各城区二手房数量-平均价格柱状图.html') # 生成组合图并保存为HTML文件
# 7.可视化展示-二手房价格最高的TOP15
top_price = dataDF.sort_values(by='价格(万元)', ascending=False)[:15]
housing = top_price['小区'].values.tolist()
price_top15 = top_price['价格(万元)'].values.tolist()
# 实列化一个柱形图对象
bar1 = (Bar().add_xaxis(xaxis_data=housing))
bar1.add_yaxis(series_name='数量', y_axis=price_top15, category_gap='50%')
bar1.set_global_opts(
title_opts=opts.TitleOpts(title='二手房价格最高的TOP15'),
yaxis_opts=opts.AxisOpts(name='价格(万元)', min_=0, max_=6000, interval=1000),
xaxis_opts=opts.AxisOpts(name='小区', axispointer_opts=opts.AxisPointerOpts(is_show=True, type_='shadow')),
tooltip_opts=opts.TooltipOpts(trigger='axis', axis_pointer_type='cross'),
)
bar1.extend_axis(yaxis=opts.AxisOpts(name='价格(万元)', max_=900, min_=200, interval=100))
bar1.render('北京二手房价格最高的TOP15.html') # 生成柱状图并保存为HTML文件
# 8.装修情况-有无电梯(玫瑰图)
situation = dataDF.groupby('装修情况').count()
renovate = situation.index.tolist()
renovate_count = situation['小区'].values.tolist()
lift = dataDF.groupby('电梯').count()
lift_index = lift.index.tolist()
lift_count = lift['小区'].values.tolist()
# 实列化一个柱形图对象
bar2 = (Bar().add_xaxis(xaxis_data=renovate))
bar2.add_yaxis(series_name='', y_axis=renovate_count, category_gap='50%', label_opts=opts.LabelOpts(position='right'))
bar2.set_global_opts(
title_opts=opts.TitleOpts(title='装修情况-有无电梯(玫瑰图)'),
tooltip_opts=opts.TooltipOpts(trigger='axis', axis_pointer_type='cross'),
xaxis_opts=opts.AxisOpts(name='数量'),
yaxis_opts=opts.AxisOpts(name='装修情况', axispointer_opts=opts.AxisPointerOpts(is_show=True, type_='shadow'))
)
bar2.reversal_axis()
# 实列化一个饼图对象
pie = Pie()
pie.add(
series_name='', data_pair=[list(z) for z in zip(lift_index, lift_count)],
radius=['30%', '65%'],
center=['50%', '50%'],
rosetype='radius',
)
pie.set_series_opts(label_opts=opts.LabelOpts(is_show=True, formatter='{b}: {c}({d}%)'))
# 组合柱状图和玫瑰图
grid = Grid()
grid.add(bar2, grid_opts=opts.GridOpts())
grid.add(pie, grid_opts=opts.GridOpts(pos_left="60%", pos_right="75%"))
grid.render('装修情况-有无电梯(玫瑰图).html') # 生成组合图并保存为HTML文件
# 9.二手房总价与面积(散点图)
total_price = dataDF['价格(万元)'].values.tolist()
total_area = dataDF['面积(㎡)'].values.tolist()
# 实列化一个散点图对象
scatter = Scatter()
scatter.add_xaxis(xaxis_data=total_area)
scatter.add_yaxis(series_name='', y_axis=total_price)
scatter.set_global_opts(
title_opts=opts.TitleOpts(title='二手房总价与面积(散点图)'),
xaxis_opts=opts.AxisOpts(name='面积(㎡)', type_='value'),
yaxis_opts=opts.AxisOpts(name='价格(万元)')
)
scatter.render('二手房总价与面积(散点图).html') # 生成散点图并保存为HTML文件
效果展示