#%%
import pandas as pd
import matplotlib.pyplot as plt
reviews = pd.read_csv('fandango_scores.csv')
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue']
norm_reviews = reviews[cols]
norm_reviews[:5]
#%% md
# value_counts()解析
#%% md
![title](value_sort().png)
#%%
fandango_distribution = norm_reviews['Fandango_Ratingvalue'].value_counts() # pandas中的计数函数
fandango_distribution
#%%
fandango_distribution = fandango_distribution.sort_index() # 即按照索引进行排序
fandango_distribution
#%%
imdb_distribution = norm_reviews['IMDB_norm'].value_counts()
imdb_distribution = imdb_distribution.sort_index()
imdb_distribution
#%% md
## Matplotlib中统计直方图的画法-hist()
#%%
fig, ax = plt.subplots()
ax.hist(norm_reviews['Fandango_Ratingvalue'],color="blue")
# ax.hist(norm_reviews['Fandango_Ratingvalue'],bins=20) # bins:指定bins划分的格
# ax.hist(norm_reviews['Fandango_Ratingvalue'], range=(4, 5),bins=20)
plt.show()
#%%
fig, ax = plt.subplots()
# ax.hist(norm_reviews['Fandango_Ratingvalue'],color="blue")
ax.hist(norm_reviews['Fandango_Ratingvalue'],bins=20) # bins:指定bins划分的格
# ax.hist(norm_reviews['Fandango_Ratingvalue'], range=(4, 5),bins=20)
plt.show()
#%%
fig, ax = plt.subplots()
# ax.hist(norm_reviews['Fandango_Ratingvalue'],color="blue")
# ax.hist(norm_reviews['Fandango_Ratingvalue'],bins=20) # bins:指定bins划分的格
ax.hist(norm_reviews['Fandango_Ratingvalue'], range=(4, 5),bins=20) #range=(4, 5): 指定绘制的区间
plt.show()
#%%
fig = plt.figure(figsize=(5,20))
ax1 = fig.add_subplot(4,1,1)
ax2 = fig.add_subplot(4,1,2)
ax3 = fig.add_subplot(4,1,3)
ax4 = fig.add_subplot(4,1,4)
ax1.hist(norm_reviews['Fandango_Ratingvalue'], bins=20, range=(0, 5))
ax1.set_title('Distribution of Fandango Ratings')
ax1.set_ylim(0, 50) # 设置y轴刻度的范围
ax2.hist(norm_reviews['RT_user_norm'], 20, range=(0, 5))
ax2.set_title('Distribution of Rotten Tomatoes Ratings')
ax2.set_ylim(0, 50)
ax3.hist(norm_reviews['Metacritic_user_nom'], 20, range=(0, 5))
ax3.set_title('Distribution of Metacritic Ratings')
ax3.set_ylim(0, 50)
ax4.hist(norm_reviews['IMDB_norm'], 20, range=(0, 5))
ax4.set_title('Distribution of IMDB Ratings')
ax4.set_ylim(0, 50)
plt.show()
#%% md
![title](hist().png)
#%% md
## Matplotlib中箱线图画法
#%% md
![title](matplatlib_box.jpg)
![title](box_pic.png)
#%%
fig, ax = plt.subplots()
ax.boxplot(norm_reviews['RT_user_norm'])
ax.set_xticklabels(['Rotten Tomatoes'])
ax.set_ylim(0, 5)
plt.show()
#%%
num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue']
fig, ax = plt.subplots()
ax.boxplot(norm_reviews[num_cols].values)
ax.set_xticklabels(num_cols, rotation=45)
ax.set_ylim(0,5)
plt.show()
#%%