绘制电影分类统计条形图
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
file_path = "DataAnalysis/IMDB-Movie-Data.csv"
df = pd.read_csv(file_path)
temp_list = df["Genre"].str.split(",").tolist()
genre_list = list(set([i for j in temp_list for i in j]))
zero_df = pd.DataFrame(np.zeros((df.shape[0], len(genre_list))), columns=genre_list)
for i in range(df.shape[0]):
zero_df.loc[i, temp_list[i]] = 1
genre_count = zero_df.sum(axis=0)
genre_count = genre_count.sort_values()
_x = genre_count.index
_y = genre_count.values
plt.bar(range(len(_x)), _y)
plt.xticks(range(len(_x)), _x)
plt.show()
统计星巴克数量
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
file_path = "DataAnalysis/starbucks_store_worldwide.csv"
df = pd.read_csv(file_path)
grouped = df.groupby("Country")
print(grouped["Brand"].count()["CN"])
print(grouped["Brand"].count()["US"])
china_data = df[df["Country"]=="CN"]
grouped = china_data.groupby("State/Province")["Brand"].count()
每个中国每个城市的店铺数量
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from matplotlib import font_manager
font = font_manager.FontProperties(fname="/usr/share/fonts/truetype/arphic-gbsn00lp/gbsn00lp.ttf")
file_path = "DataAnalysis/starbucks_store_worldwide.csv"
df = pd.read_csv(file_path)
df = df[df["Country"]=="CN"]
data1 = df.groupby(by="City")["Brand"].count().sort_values(ascending=False)[:50]
_x = data1.index
_y = data1.values
plt.bar(range(len(_x)), _y)
plt.xticks(range(len(_x)), _x, fontproperties=font, rotation=45)
plt.show()
统计不同年份书的数量
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from matplotlib import font_manager
from pandas.core import groupby
file_path = "DataAnalysis/books.csv"
df = pd.read_csv(file_path)
df = df[pd.notnull(df["original_publication_year"])]
grouped = df.groupby(by="original_publication_year")["title"].count()
统计不同年份书的平均评分情况
grouped = df["average_rating"].groupby(by=df["original_publication_year"]).mean()
_x = grouped.index
_y = grouped.values
plt.plot(range(len(_x)), _y)
plt.xticks(list(range(len(_x)))[::10], _x[::10].astype(int), rotation=45)
plt.show()