数据来源
import pandas as pd
from matplotlib import pyplot as plt
p=pd.read_csv('./IMDB-Movie-Data.csv')
# print(p)
print(p['Rating'].mean())#获取平均评分
print(len(set(p['Director'].tolist())))
print(len(p['Director'].unique()))#导演人数
#获取演员数量
temp_actors_list=p['Actors'].str.split(',').tolist()
actors_list=[i for j in temp_actors_list for i in j]
actors_num=len(set(actors_list))
print(actors_num)
# print(p['Rating'].values)
# print(p['Runtime (Minutes)'].values
runtime_data=p['Runtime (Minutes)'].values
print(runtime_data)#打印数据
max_runtime_data=runtime_data.max()#获取最大值
min_runtime_data=runtime_data.min()#获取最小值
print(max_runtime_data)
jicha=max_runtime_data-min_runtime_data#获得极差
print(jicha)
num_bin=jicha//10#组距
plt.figure(figsize=(20,8),dpi=80)
plt.hist(runtime_data,num_bin,normed=True)
plt.xticks(range(min(runtime_data),max(runtime_data)+10,10))
# plt.grid()
plt.show()