01 统计电影类别数据
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from matplotlib import font_manager
plt.rcParams['font.sans-serif']=['SimHei']
my_font = font_manager.FontProperties(fname="C:\Windows\Fonts\msyh.ttf")
file_path = "E:\python数据源\豆瓣电影.xlsx"
mv_list = pd.read_excel(file_path)
mv_type_a = mv_list["类型"].str.split(",").tolist()
mv_type = list(set([i for j in mv_type_a for i in j]))
zeros_df = pd.DataFrame(np.zeros([250,len(mv_type)]),columns=mv_type)
for i in range(250):
zeros_df.loc[i,mv_type_a[i]] = 1
type_count = zeros_df.sum(axis = 0)
type_count = type_count.sort_values(ascending=False)
_x = type_count.index
_y = type_count.values
plt.figure(figsize=(30,10),dpi=120)
plt.bar(range(len(_x)),_y)
plt.xlabel("类型",fontproperties=my_font)
plt.ylabel("数量",fontproperties=my_font)
plt.xticks(range(len(_x)),_x,fontproperties=myfont)
plt.show()