一、matplotlib
作用:将数据可视化
示例1:
假设一天中每隔两个小时(range(2, 26, 2))的气温分别是[15, 13, 14, 5, 17, 20, 25, 26, 26, 27, 22, 18, 15]
pyplot用来画图。
import matplotlib.pyplot as plt
fig = plt.figure(figsize = (20,8), dpi = 80)
x = list(range(2, 26 ,2))
y = [15, 13, 14.5, 17, 20, 25, 26, 26, 24, 22, 18, 15]
plt.plot(x, y)
#刻度
# _xtick_label3 = [i/2 for i in range(2, 49)] #步长1,1.5,2,2.5。。。 24
# plt.xticks(range(25, 50)) #25-50步长
plt.xticks(x)
# plt.xticks(x[::2]) #步长
# plt.yticks(range(min(y), max(y) + 1)) #y轴刻度
plt.savefig("./sig_size.png")
plt.show()
结果:
作业:
如果列表啊表示从10点到12点的每一分钟的气温,如何绘制折线图观察每分钟气温的变化情况呢?
import matplotlib.pyplot as plt
import random
import matplotlib
# 指定中文
matplotlib.rc("font", family = 'MicroSoft YaHei', weight = "bold")
#指定图像尺寸
fig = plt.figure(figsize = (20, 8), dpi = 80)
y = [random.randint(20, 35) for i in range(120)]
x = range(120)
plt.plot(x, y)
# 这个+= 为什么 前60跑完,再跑第二个11点的?
_xtick_labels = ["10点{}分".format(i) for i in range(60)]
_xtick_labels += ["11点{}分".format(i) for i in range(60)]
#第一个参数和第二个参数是绑定的
plt.xticks(x[::3], _xtick_labels[::3], rotation = 270)
plt.yticks(range(20, 36))
#x 轴
plt.xlabel("时间")
#y 轴
plt.ylabel("温度")
# 标题
plt.title("10点到12点的温度变化")
plt.show()
#指定图像尺寸和dpi(每英寸对应的点的数量)
# fig = plt.figure(figsize = (20, 8), dpi = 80)
Q:这个+= 为什么前60跑完,再跑第二个11点的?
_xtick_labels = ["10点{}分".format(i) for i in range(60)]
_xtick_labels += ["11点{}分".format(i) for i in range(60)]
作业:
假设大家在30岁的时候,根据自己的实际情况,统计出来了从11岁到30岁每年交的女朋友数量,如列表a,请绘制出该数据的折线图,以便分析自己每年交女朋友的数量走势。
a = [1,0,1,1,2,4,3,2,3,4,4,5,6,5,4,3,3,1,1,1]
要求:
y轴表示个数
x轴表示年龄
import matplotlib.pyplot as plt
import matplotlib
matplotlib.rc("font", family = 'MicroSoft YaHei', weight = "bold")
plt.figure(figsize = (20, 8) , dpi = 80)
x = range(11, 31)
y = [1, 0, 1, 1, 2, 4, 3,2,3,4,4, 5, 6, 5, 4, 3, 3, 1, 1, 1]
y2 = [1, 0, 3, 1, 2, 2, 3, 3, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1]
xtick_labels = ["{}岁".format(i) for i in range(11, 31) ]
# 自定义绘制线条风格
plt.plot(x,y, label = "自己", color = 'r', linestyle ='--', linewidth = 5, alpha = 0.5)
plt.plot(x, y2, label = "同桌")
plt.xticks(x, xtick_labels)
#绘制网格 默认情况下有多少个x,y就有多少个网格,参数为透明度,
#通过xticks和yticks调刻度,变相的调网格密度
plt.grid(alpha = 0.4)
# 添加到图例,给label显示化,loc是位置
plt.legend(loc = 'upper left')
plt.show()
day1总结:
绘制散点图:
from matplotlib import pyplot as plt
from matplotlib import font_manager
plt.figure(figsize=(20, 8), dpi = 80)
y_3 = [11,17,16,11,12,11,12,6,6,7,8,9,12,15,14,17,18,21,16,17,20,14,15,15,15,19,21,22,22,22,23]
y_10 = [26,26,28,19,21,17,16,19,18,20,20,19,22,23,17,20,21,20,22,15,11,15,5,13,17,10,11,13,12,13,6]
x = range(1, 32)
plt.scatter(x, y_3)
plt.scatter(x, y_10)
plt.show()
绘制横线直方图:
from matplotlib import pyplot as plt
from matplotlib import font_manager
import matplotlib
matplotlib.rc("font", family = "MicroSoft YaHei", weight = "bold")
plt.figure(figsize = (20, 8), dpi = 80)
a = ["战狼2","速度与激情8","功夫瑜伽","西游伏妖篇","变形金刚5:最后的骑士","摔跤吧!爸爸","加勒比海盗5:死无对证","金刚:骷髅岛","极限特工:终极回归","生化危机6:终章","乘风破浪","神偷奶爸3","智取威虎山","大闹天竺","金刚狼3:殊死一战","蜘蛛侠:英雄归来","悟空传","银河护卫队2","情圣","新木乃伊",]
b=[56.01,26.94,17.53,16.49,15.45,12.96,11.8,11.61,11.28,11.12,10.49,10.3,8.75,7.55,7.32,6.99,6.88,6.86,6.58,6.23]
plt.bar(range(len(a)), b)
plt.xticks(range(len(a)), a, rotation = 90)
plt.show()
条形图:
from matplotlib import pyplot as plt
import matplotlib
matplotlib.rc("font", family = "MicroSoft YaHei", weight = "bold")
plt.figure(figsize = (20, 8), dpi = 80)
a = ["猩球崛起3:终极之战","敦刻尔克","蜘蛛侠:英雄归来","战狼2"]
b_16 = [15746,312,4497,319]
b_15 = [12357,156,2045,168]
b_14 = [2358,399,2358,362]
bar_width = 0.2
x_14 = list(range(len(a)))
x_15 = [i+bar_width for i in x_14]
x_16 = [i+bar_width*2 for i in x_14]
plt.bar(range(len(a)), b_14, width = bar_width)
plt.bar(x_15, b_15, width=bar_width)
plt.bar(x_16, b_16, width = bar_width)
plt.show()
直方图:
plt.bar(range(12), quantity, width = 1)
二、numpy
创建数组:
import numpy as np
a = np.array([12,32])
b = np.arange(0,10)
c = np.array(range(15))
读取数据:
三、pandas
import pandas as pd
import numpy as np
data_dict = {'color' : ['black', 'white', 'black', 'white', 'black', 'white', 'black',
'white', 'black', 'white'],
'size' : ['S', 'M', 'L', 'M', 'L', 'S', 'S', 'XL', 'XL', 'M'],
'date' : pd.date_range('1/1/2019', periods = 10, freq = 'W'),#freq表示日期偏移量
'feature_1': np.random.randn(10), #函数返回一个或一组样本,具有标准正态分布,参数表示维度
'feature_2' : np.random.normal(0.5, 2, 10)} #生成高斯分布的概率密度随机数
array = [['A','B','B','B','C','A','B','A','C','C'],
['JP','CN','US','US','US','CN','CN','CA','JP','CA']]
index = pd.MultiIndex.from_arrays(array, names = ['class', 'country']) #标题
data_df = pd.DataFrame(data_dict, index = index) #复合索引
print(data_df)
3.2 分组聚合
import pandas as pd
import numpy as np
data_dict = {'color' : ['black', 'white', 'black', 'white', 'black', 'white', 'black',
'white', 'black', 'white'],
'size' : ['S', 'M', 'L', 'M', 'L', 'S', 'S', 'XL', 'XL', 'M'],
'date' : pd.date_range('1/1/2019', periods = 10, freq = 'W'),#freq表示日期偏移量
'feature_1': np.random.randn(10), #函数返回一个或一组样本,具有标准正态分布,参数表示维度
'feature_2' : np.random.normal(0.5, 2, 10)} #生成高斯分布的概率密度随机数
array = [['A','B','B','B','C','A','B','A','C','C'],
['JP','CN','US','US','US','CN','CN','CA','JP','CA']]
index = pd.MultiIndex.from_arrays(array, names = ['class', 'country']) #标题
data_df = pd.DataFrame(data_dict, index = index) #复合索引
print(data_df)
group_1 = data_df.groupby('size')
# for i in list(group_1):
# print(i)
# print(group_1.get_group('M'))
#多重分组
group_2 = data_df.groupby(['size', 'color'])
# for i in list(group_2):
# print(i)
#查看组别个数
# print(group_1.size())
# print(group_2.size())
# 通过函数分组,带有feature的列分为一组,不带有feature的列分为另一组
# def get_letter_type(letter):
# if 'feature' in letter:
# return 'feature'
# else:
# return 'other'
# for i in list(data_df.groupby(get_letter_type, axis = 1)):
# print(i)
# for i in list(data_df.groupby(level = [0, 1])):
# print(i)
# print(group_2.agg({'feature_1': np.max, 'feature_2':np.mean}))
# data_range = lambda x : x.max() - x.min()
# print(data_df.groupby('size').transform(data_range))
# data_df.iloc[1, 3:5] = np.nan
# f = lambda x : x.fillna(x.mean())
# df_trans = group_1.transform(f)
# print(df_trans)
#rolling不理解
# print(data_df.groupby('color').rolling(3).feature_1.mean())
3.3 时间序列
暂不看