Ten thousand books, one million ratings. Also books marked to read, and tags.
数据来源:https://www.kaggle.com/zygmunt/goodbooks-10k
统计图书出版年份与数量及评分的关系
会用到book_id
original_publication_year
average_rating
import pandas as pd
from matplotlib import pyplot as plt
file_path = './books_data/books.csv'
df = pd.read_csv(file_path, encoding='ansi')
# 去除有NAN的行
data = df[pd.notnull(df['original_publication_year'])]
# 按年份算书的均分
grouped = data['average_rating'].groupby(data['original_publication_year']).mean()
print(grouped)
# 按年份算书的数量
grouped1 = data.groupby(data['original_publication_year']).count()['book_id']
print(grouped1)
year = grouped.index
rating = grouped.values
year1 = grouped1.index
books_num = grouped1.values
plt.rcParams['font.sans-serif'] = ['SimHei']
fig = plt.figure(figsize=(15,