第一步数据的获取
从某瓣复制粘贴科幻片类型的top100电影信息
电影信息存入sqlite
啥也不能发出来,无语嘞,具体复制粘贴是怎么来的你们懂的
开始可视化工作
数据包括(排名,电影名,评分,评论数,演员)使用pyecharts制作评分的分布饼图以及演员的词云和评论数的柱状图,最终呈现如下
from pyecharts import options as opts # options配置项
from pyecharts.charts import *
from pyecharts.globals import SymbolType
from pyecharts.globals import ThemeType
from collections import Counter
def get_data():
conn = sqlite3.connect('douban_movie.db')
c = conn.cursor()
sql = f'select * from movies_rank limit 100'
c.execute(sql)
result = c.fetchall()
conn.close()
datas = [[i[0], i[1], i[2], i[3]] for i in result]
actor_temp = []
for ele in result:
actor_temp += ele[4].split(',')
actor_count = Counter(actor_temp).most_common()
visualize(datas, actor_count)
def visualize(datas, words):
page = Page(layout=Page.SimplePageLayout)
rows = datas
# headers = ["排名", "影名", "评分", "评价数"]
c = (
WordCloud()
.add("", words, word_size_range=[20, 100], shape=SymbolType.DIAMOND)
.set_global_opts(title_opts=opts.TitleOpts(title="top100科幻电影演员"))
)
page.add(c)
barx_data = [i[1] for i in rows]
bary_data = [i[3] for i in rows]
c1 = (
Bar(init_opts=opts.InitOpts(theme=ThemeType.LIGHT))
.add_xaxis(barx_data)
.add_yaxis("影名", bary_data)
.reversal_axis() # 翻转 XY 轴数据
.set_series_opts(label_opts=opts.LabelOpts(position="right"))
.set_global_opts(title_opts=opts.TitleOpts(title="电影评论数"), datazoom_opts=[
opts.DataZoomOpts(
orient="vertical",
is_show=False,
type_="inside", # 内部缩放
yaxis_index=[0, 1], # 可缩放的x轴坐标编号
range_start=0, range_end=100, # 初始显示范围
),
opts.DataZoomOpts(
orient="vertical",
is_show=True, # 显示滑块
type_="slider", # 滑块缩放
yaxis_index=[0, 1], # 可缩放的x轴坐标编号
# pos_="80%",
range_start=0, range_end=100, # 初始显示范围
),
])
)
page.add(c1)
x_data = ['8.0', '8.1', '8.2', '8.3', '8.4', '8.5', '8.6', '8.7', '8.8', '8.9', '9.0', '9.1', '9.2', '9.3', '9.4']
pie_hash = {'8.0': 0, '8.1': 0, '8.2': 0, '8.3': 0, '8.4': 0, '8.5': 0, '8.6': 0, '8.7': 0, '8.8': 0, '8.9': 0, '9.0': 0, '9.1': 0, '9.2': 0, '9.3': 0, '9.4': 0}
for ele in rows:
pie_hash[str((ele[2]*10)/10)]+=1
y_data = [pie_hash[key] for key in pie_hash]
data_pair = [list(z) for z in zip(x_data, y_data)]
data_pair.sort(key=lambda x: x[1])
pie1 = (
Pie(init_opts=opts.InitOpts(width="800px", height="400px", bg_color="#2c343c"))
.add(
series_name="Top100评分分布情况",
data_pair=data_pair,
rosetype="radius",
radius="55%",
center=["50%", "50%"],
label_opts=opts.LabelOpts(is_show=False, position="center"),
)
.set_global_opts(
title_opts=opts.TitleOpts(
title="Top100评分分布情况",
pos_left="center",
pos_top="20",
title_textstyle_opts=opts.TextStyleOpts(color="#fff"),
),
legend_opts=opts.LegendOpts(is_show=False),
)
.set_series_opts(
tooltip_opts=opts.TooltipOpts(
trigger="item", formatter="{a} <br/>{b}: {c} ({d}%)"
),
label_opts=opts.LabelOpts(color="rgba(255, 255, 255, 0.3)"),
)
)
page.add(pie1)
page.render('movies.html')
os.system('movies.html')