import pandas as pd
unrate = pd.read_csv("UNRATE.csv")
unrate["DATE"]=pd.to_datetime(unrate["DATE"])
#to_datetime 转换为当前的日期格式
unrate.head()
DATE为日期,VALUE为当年当月的失业人数占总人数的百分比
绘制折线图import matplotlib.pyplot as plt 导入matplotlib库
plt.plot() plot(要绘制的数据)
plt.show() 显示折线图
plt.plot(unrate["DATE"].head(12),unrate["VALUE"].head(12))
plt.show()
plt.plot(unrate["DATE"].head(12),unrate["VALUE"].head(12))
plt.xticks(rotation=45)#调整横坐标的值的角度为45度
plt.show()
plt.plot(unrate["DATE"].head(12),unrate["VALUE"].head(12))
plt.xlabel("Month") #给x,Y轴添加名称
plt.ylabel("Unemployment Rate")
plt.title("Monthly Unemployment Trends,1948") #给折线图命名
plt.show()
子图的操作import matplotlib.pyplot as plt
fig = plt.figure() #指定作图域
ax1 = fig.add_subplot(2,2,1) #添加一个2行2列的在第一个位置的子图
ax2 = fig.add_subplot(2,2,2)
ax3 = fig.add_subplot(2,2,4)
plt.show()
import matplotlib.pyplot as plt
fig = plt.figure()
plt.plot(unrate[0:12]["DATE"],unrate[0:12]["VALUE"],c="red")
plt.plot(unrate[12:24]["DATE"],unrate[12:24]["VALUE"],c="blue")
plt.show()
import matplotlib.pyplot as plt
fig = plt.figure()
colors = ['red', 'blue', 'green', 'orange', 'black']for i in range(5):
start_index = i*12
end_index = (i+1)*12
plt.plot(unrate[start_index:end_index]['DATE'], unrate[start_index:end_index]['VALUE'], c=colors[i])
plt.show()
import matplotlib.pyplot as plt
fig = plt.figure()
colors = ['red', 'blue', 'green', 'orange', 'black']for i in range(5):
start_index = i*12
end_index = (i+1)*12
label = str(1948 + i)
plt.plot(unrate[start_index:end_index]['DATE'], unrate[start_index:end_index]['VALUE'], c=colors[i],label=label)
plt.legend(loc='best')
plt.xlabel("Year")
plt.ylabel("Rate")
plt.title("Year Rate,1948-1952")
plt.show()
3条形图与散点图import pandas as pd #导入pandas库
reviews = pd.read_csv("fandango_score_comparison.csv") #导入fandango_score_comparison.csv数据
cols=['FILM','RT_user_norm','Metacritic_user_nom','IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars'] #创建cols列表
norm_reviews=reviews[cols] #取出表中的cols列
norm_reviews.head()
import matplotlib.pyplot as plt
from numpy import arange
num_cols=['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']#获取的列中数据即分别对应条形图的高度
bar_heights = norm_reviews.loc[0,num_cols].values
#print(bar_heights)#[4.3 3.55 3.9 4.5 5.0]bar_positions=arange(5)+0.75
#print(bar_positions)#[0.75 1.75 2.75 3.75 4.75]fig, ax = plt.subplots()
ax.bar(bar_positions,bar_heights,0.5)
plt.show()
bar()方法有两个必需的参数,左边和高度。
我们使用左参数来指定条形图左侧的x坐标。
我们使用高度参数来指定每个栏的高度
fig,ax = plt.subplots()等价于:
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
#柱形图横着画
ax.bar(bar_positions,bar_heights,0.5)
#柱形图竖着画
#ax.barh(bar_positions,bar_heights,0.5)
num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']bar_heights=norm_reviews.loc[0,num_cols].values
bar_positions = arange(5)+0.75
tick_positions=range(1,6)
fig, ax = plt.subplots()
ax.bar(bar_positions,bar_heights,0.5)
ax.set_xticks(tick_positions)
ax.set_xticklabels(num_cols,rotation=45)
ax.set_xlabel("Rating Source")
ax.set_ylabel("Average Rating")
ax.set_title("Average User Rating For Avengers: Age of Ultron (2015)")
plt.show()
fig,ax=plt.subplots()
ax.scatter(norm_reviews['Fandango_Ratingvalue'], norm_reviews['RT_user_norm'])
ax.set_xlabel("Fandango")
ax.set_ylabel("Rotten")
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
reviews=pd.read_csv("fandango_score_comparison.csv")
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue']norm_reviews=reviews[cols]norm_reviews
fandango_distribution=norm_reviews["Fandango_Ratingvalue"].value_counts()
fandango_distribution=fandango_distribution.sort_index()
imdb_distribution=norm_reviews["IMDB_norm"].value_counts()
imdb_distribution=imdb_distribution.sort_index()
print(fandango_distribution)
print(imdb_distribution)
fig,ax=plt.subplots()
#ax.hist(norm_reviews["Fandango_Ratingvalue"])
#ax.hist(norm_reviews["Fandango_Ratingvalue"],bins=20)
ax.hist(norm_reviews["Fandango_Ratingvalue"],range=(4,5),bins=20)
plt.show()
fig=plt.figure(figsize=(5,20))
ax1=fig.add_subplot(4,1,1)
ax2=fig.add_subplot(4,1,2)
ax3=fig.add_subplot(4,1,3)
ax4=fig.add_subplot(4,1,4)
ax1.hist(norm_reviews["Fandango_Ratingvalue"],bins=20,range=(0,5))
ax1.set_title("Distribution of Fandango Ratings")
ax1.set_ylim(0,50)
ax2.hist(norm_reviews["RT_user_norm"],bins=20,range=(0,5))
ax2.set_title("Distribution of Rotten Tomatoes Ratings")
ax2.set_ylim(0,50)
ax3.hist(norm_reviews["Metacritic_user_nom"],bins=20,range=(0,5))
ax3.set_title("Distribution of M Ratings")
ax3.set_ylim(0,50)
ax4.hist(norm_reviews["IMDB_norm"],bins=20,range=(0,5))
ax4.set_title("Distribution of IMB Ratings")
ax4.set_ylim(0,50)
plt.show()
fig,ax=plt.subplots()
ax.boxplot(norm_reviews["RT_user_norm"])
ax.set_xticklabels(["Rotten Tomatoes"])
ax.set_ylim(0,5)
plt.show()
num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue']fig,ax=plt.subplots()
ax.boxplot(norm_reviews[num_cols].values)
ax.set_xticklabels(num_cols,rotation=90)
ax.set_ylim(0,5)
plt.show()
作者:龙神~