一、pandas导入数据&matplotlib基本绘图
# _*_ coding: utf-8 _*_
import pandas as pd
import matplotlib.pyplot as plt
#
# Author: yz
# Date: 2017-12-3
#
'''
pandas导入数据和date格式转换
matplotlib基本的绘图:横纵坐标标签,标题,坐标值旋转等
'''
# 导入数据
unrate = pd.read_csv("data/UNRATE.csv")
unrate["DATE"] = pd.to_datetime(unrate["DATE"]) # 1948/1/1 -> 1948-01-01
# print(unrate.head(10))
'''
DATE VALUE
0 1948-01-01 3.4
1 1948-02-01 3.8
2 1948-03-01 4.0
3 1948-04-01 3.9
4 1948-05-01 3.5
5 1948-06-01 3.6
6 1948-07-01 3.6
7 1948-08-01 3.9
8 1948-09-01 3.8
9 1948-10-01 3.7
'''
# plt.plot()
# plt.show()
'''
While the y-axis looks fine, the x-axis tick labels are too close together and are unreadable
We can rotate the x-axis tick labels by 90 degrees so they don't overlap
We can specify degrees of rotation using a float or integer value.
'''
# first_twelve = unrate[0:12]
# plt.plot(first_twelve["DATE"], first_twelve["VALUE"])
# plt.xticks(rotation=45) # x坐标值太长时,可以让其旋转再显示
# # print(help(plt.xticks))
# plt.show()
'''
xlabel(): accepts a string value, which gets set as the x-axis label.
ylabel(): accepts a string value, which is set as the y-axis label.
title(): accepts a string value, which is set as the plot title.
'''
first_twelve = unrate[0:12]
plt.plot(first_twelve['DATE'], first_twelve['VALUE'])
plt.xticks(rotation=45)
plt.xlabel('Month')
plt.ylabel('Unemployment Rate')
plt.title('Monthly Unemployment Trends, 1948')
plt.show()
二、增加子图&指定图像大小和线条颜色&添加标签并指定位置
# _*_ coding: utf-8 _*_
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
#
# Author: yz
# Date: 2017-12-3
#
'''
增加子图 add_subplot(first,second,index)
指定图像大小 plt.figure(figsize=(12, 6))
指定线条的颜色 plt.plot(unrate[0:12]["MONTH"], unrate[0:12]["VALUE"], c="red")
添加标签并指定位置 plt.plot(subset["MONTH"], subset["VALUE"], c=colors[i], label=label) plt.legend(loc='upper left')
'''
'''
add_subplot(first,second,index) first means number of Row,second means number of Column.
'''
# fig = plt.figure()
# ax1 = fig.add_subplot(3, 2, 1)
# ax2 = fig.add_subplot(3, 2, 2)
# ax3 = fig.add_subplot(3, 2, 3)
# ax6 = fig.add_subplot(3, 2, 6)
# ax1.plot(np.random.randint(1, 5, 5), np.arange(5))
# ax2.plot(np.arange(10) * 3, np.arange(10))
# plt.show()
'''
指定颜色和大小
'''
unrate = pd.read_csv("data/UNRATE.csv")
unrate["DATE"] = pd.to_datetime(unrate["DATE"])
unrate["MONTH"] = unrate["DATE"].dt.month
# fig = plt.figure(figsize=(12, 6)) # figsize 图像大小
# plt.plot(unrate[0:12]["MONTH"], unrate[0:12]["VALUE"], c="red") # c 指定线条颜色
# plt.plot(unrate[12:24]["MONTH"], unrate[12:24]["VALUE"], c="green")
# plt.show()
'''
lable
'''
fig = plt.figure(figsize=(10, 6))
colors = ['red', 'blue', 'green', 'orange', 'black']
for i in range(5):
start_index = i * 12
end_index = (i + 1) * 12
label = str(1948 + i)
subset = unrate[start_index:end_index]
plt.plot(subset["MONTH"], subset["VALUE"], c=colors[i], label=label) # 添加lable
# 指定lable的位置 'best' 'upper right/left' 'lower right/left' 'right' 'center right/left' 'upper/lower center' 'center'
plt.legend(loc='upper left')
# print(help(plt.legend))
plt.xlabel('Month, Integer')
plt.ylabel('Unemployment Rate, Percent')
plt.title('Monthly Unemployment Trends, 1948-1952')
plt.show()
三、柱状图&散点图
# _*_ coding: utf-8 _*_
import pandas as pd
import matplotlib.pyplot as plt
from numpy import arange
#
# Author: yz
# Date: 2017-12-3
#
'''
柱状图 fig, ax = plt.subplots() ax.bar(bar_positions, bar_heights, 0.5)
散点图
'''
#
# 导入数据
#
reviews = pd.read_csv("data/fandango_scores.csv")
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
norm_reviews = reviews[cols]
# print(norm_reviews[:1])
#
# 柱状图
#
# num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
# bar_heights = norm_reviews.ix[0, num_cols].values
# # print(bar_heights) # [4.2999999999999998 3.5499999999999998 3.8999999999999999 4.5 5.0]
# bar_positions = arange(5) + 0.75
# # print(bar_positions) # [1 2 3 4 5]
# fig, ax = plt.subplots()
# ax.bar(bar_positions, bar_heights, 0.5)
# plt.show()
#
# 将坐标值改为标签 ax.set_xticklabels(num_cols, rotation=45)
#
# num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
# bar_heights = norm_reviews.ix[0, num_cols].values
# # print(bar_heights) # [4.2999999999999998 3.5499999999999998 3.8999999999999999 4.5 5.0]
# bar_positions = arange(5) + 0.75
# # print(bar_positions) # [1 2 3 4 5]
# fig, ax = plt.subplots()
# ax.bar(bar_positions, bar_heights, 0.5)
#
# tick_positions = range(1,6)
# ax.set_xticks(tick_positions)
# ax.set_xticklabels(num_cols, rotation=45)
#
# ax.set_xlabel('Rating Source')
# ax.set_ylabel('Average Rating')
# ax.set_title('Average User Rating For Avengers: Age of Ultron (2015)')
# plt.show()
# fig, ax = plt.subplots()
# # ax.hist(norm_reviews['Fandango_Ratingvalue'])
# # ax.hist(norm_reviews['Fandango_Ratingvalue'],bins=20)
# ax.hist(norm_reviews['Fandango_Ratingvalue'], range=(3, 5),bins=20)
# plt.show()
#
# 散点图
#
fig, ax = plt.subplots()
ax.scatter(norm_reviews['Fandango_Ratingvalue'], norm_reviews['RT_user_norm'])
# ax.scatter([4.5, 3], [4.3, 4]) # (4.5, 4.3) (3, 4)
ax.set_xlabel('Fandango')
ax.set_ylabel('Rotten Tomatoes')
plt.show()
曲线图&坐标的设置
# _*_ coding: utf-8 _*_
import pandas as pd
import matplotlib.pyplot as plt
#
# Author: yz
# Date: 2017-12-3
#
'''
曲线图
'''
women_degrees = pd.read_csv('data/percent-bachelors-degrees-women-usa.csv')
# plt.plot(women_degrees['Year'], women_degrees['Biology'])
# plt.show()
# plt.plot(women_degrees['Year'], women_degrees['Biology'], c='blue', label='Women')
# plt.plot(women_degrees['Year'], 100-women_degrees['Biology'], c='green', label='Men')
# plt.legend(loc='upper right')
# plt.title('Percentage of Biology Degrees Awarded By Gender')
# plt.show()
# fig, ax = plt.subplots()
# ax.plot(women_degrees['Year'], women_degrees['Biology'], label='Women')
# ax.plot(women_degrees['Year'], 100-women_degrees['Biology'], label='Men')
#
# ax.tick_params(bottom="off", top="off", left="off", right="off")
# ax.set_title('Percentage of Biology Degrees Awarded By Gender')
# ax.legend(loc="upper right")
#
# plt.show()
# fig, ax = plt.subplots()
# ax.plot(women_degrees['Year'], women_degrees['Biology'], c='blue', label='Women')
# ax.plot(women_degrees['Year'], 100-women_degrees['Biology'], c='green', label='Men')
# ax.tick_params(bottom="off", top="off", left="off", right="off")
#
# for key,spine in ax.spines.items():
# spine.set_visible(False)
# # End solution code.
# ax.legend(loc='upper right')
# plt.show()
major_cats = ['Biology', 'Computer Science', 'Engineering', 'Math and Statistics']
fig = plt.figure(figsize=(12, 12))
for sp in range(0,4):
ax = fig.add_subplot(2,2,sp+1)
ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c='blue', label='Women')
ax.plot(women_degrees['Year'], 100-women_degrees[major_cats[sp]], c='green', label='Men')
# Add your code here.
# Calling pyplot.legend() here will add the legend to the last subplot that was created.
plt.legend(loc='upper right')
plt.show()
major_cats = ['Biology', 'Computer Science', 'Engineering', 'Math and Statistics']
fig = plt.figure(figsize=(12, 12))
for sp in range(0,4):
ax = fig.add_subplot(2,2,sp+1)
ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c='blue', label='Women')
ax.plot(women_degrees['Year'], 100-women_degrees[major_cats[sp]], c='green', label='Men')
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(major_cats[sp])
ax.tick_params(bottom="off", top="off", left="off", right="off")
# Calling pyplot.legend() here will add the legend to the last subplot that was created.
plt.legend(loc='upper right')
plt.show()
线条的种类
# _*_ coding: utf-8 _*_
import pandas as pd
import matplotlib.pyplot as plt
#
# Author: yz
# Date: 2017-12-3
#
women_degrees = pd.read_csv('data/percent-bachelors-degrees-women-usa.csv')
major_cats = ['Biology', 'Computer Science', 'Engineering', 'Math and Statistics']
# cb_dark_blue = (0/255, 107/255, 164/255)
# cb_orange = (255/255, 128/255, 14/255)
#
# fig = plt.figure(figsize=(12, 12))
#
# for sp in range(0,4):
# ax = fig.add_subplot(2,2,sp+1)
# # The color for each line is assigned here.
# ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c=cb_dark_blue, label='Women')
# ax.plot(women_degrees['Year'], 100-women_degrees[major_cats[sp]], c=cb_orange, label='Men')
# for key,spine in ax.spines.items():
# spine.set_visible(False)
# ax.set_xlim(1968, 2011)
# ax.set_ylim(0,100)
# ax.set_title(major_cats[sp])
# ax.tick_params(bottom="off", top="off", left="off", right="off")
#
# plt.legend(loc='upper right')
# plt.show()
#Setting Line Width
cb_dark_blue = (0/255, 107/255, 164/255)
cb_orange = (255/255, 128/255, 14/255)
fig = plt.figure(figsize=(12, 12))
for sp in range(0,4):
ax = fig.add_subplot(2,2,sp+1)
# Set the line width when specifying how each line should look.
ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c=cb_dark_blue, label='Women', linewidth=10)
ax.plot(women_degrees['Year'], 100-women_degrees[major_cats[sp]], c=cb_orange, label='Men', linewidth=10)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(major_cats[sp])
ax.tick_params(bottom="off", top="off", left="off", right="off")
plt.legend(loc='upper right')
plt.show()