Matplotlib学习_散点图&折线图_ax.scatter() 标记图例-CSDN博客

本文链接：https://blog.csdn.net/sinat_15355869/article/details/79692889
# coding: utf-8

# ## 1.1：什么是matplotlib

# In[54]:


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline  #写了这个，就不用每次画图都写 plt.show()才能打出图表')


# In[55]:


#生成x为0~9的array数组序列， 生成10个随机y（-1~+1）随机数字组成array数组
x = np.arange(10)
y = np.random.random(10)


# In[56]:


#打印x的值
x


# In[57]:


#打印y的值
y


# In[58]:


labels = list("abcdefghij")


# In[59]:


plt.bar(x, y, tick_label = labels)


# ## 1.2、两种作图方式
# 

# In[60]:


# 1）函数式作图
x = np.arange(5) #X轴数据
y = np.linspace(10, 20, 5) #Y轴数据, 把10~20之间平均分成5份，等差数列

plt.plot(x, y) #根据x，y画图, 调用了一个函数，然后就ok了

plt.xlabel("这是X轴")
plt.ylabel("这是Y轴")
plt.title("X-Y 图表") 


# In[61]:


x


# In[62]:


y


# In[63]:


# 2) 面向对象作图 - 定义坐标系各种
x = np.arange(5)
y = np.linspace(10, 20, 5)
# 官方解释：numpy.linespace --- https://docs.scipy.org/doc/numpy/reference/generated/numpy.linspace.html

fig, ax = plt.subplots() #生成一个figure对象和一个坐标系对象，用英文逗号隔开

ax.plot(x, y) #在坐标系axes上添加一个数据源x, y 的折线图

ax.set_xlabel(" i am X label") #添加X轴的标签
ax.set_ylabel(" i am Y label") #添加Y轴的标签
ax.set_title(" XY_Plot")  #添加标题


# In[64]:


# matplotlib 组成结构
# + 第一层：canvas 类似画板
# + 第二层：figure 类似画布（或理解为画图区域）
# + 第三层：axes 子图（或理解为坐标系）
# + 第四层：各类图表信息，包括：xaxis（x轴），yaxis（y轴），title（标题），legend（图例），grid（网格线），spines（边框线）,data（数据）等等


# ## 2.1 散点图学习

# In[65]:


import os


# In[66]:


# 修改路径用单引号啊
os.chdir('/Users/a1/Desktop/算法实战/Matplotlib_学习/参考资料_数据集')


# In[67]:


# 实例数据
df = pd.read_csv("mtcars.csv")
df.head()


# In[68]:


# 1）函数式作图  -- 散点图做法
plt.scatter(df["mpg"], df["wt"])


# In[69]:


# 2）面向对象的做法 -- 通过定义ax坐标系各种参数进行绘制
# 创建画布与坐标系 → 创建数据点 → 创建XY轴&标题（设置字体）  → 创建XY轴的取值范围

#1生成一个fig对象 + ax坐标系对象，并且创建一个图形区域，长宽比例为8：5，分辨率为每英寸80像素
fig, ax = plt.subplots(figsize = (8, 5), dpi = 80) 
#2 再ax对象中，添加数据源
ax.scatter(df["mpg"], df["wt"])
#3 添加X，Y轴标签, 主标题
ax.set_xlabel("mpg", fontsize = 13)
ax.set_ylabel("wt", fontsize = 13)
ax.set_title("The relationship of mpg&wt", fontsize = 16)
# X轴与Y轴的取值范围
ax.set_xlim(6, 35)
ax.set_ylim(1,7)


# In[70]:


# 上面数据的意思： 每加仑英里数 与 车重量 的关系， 车越重，开的越短


# ### 散点图的参数
# 
# ax.scatter(x, y, s=None, c=None, marker=None, cmap=None, norm=None, vmin=None, vmax=None, alpha=None, linewidths=None, verts=None, edgecolors=None, *, data=None, **kwargs)

# #### S代表点的大小

# In[71]:


# 第一个额外参数s代表点的大小,
# 创建画布
fig, ax = plt.subplots(figsize = (8, 5), dpi=80)
# 填充数据进去
# 设置第参数：气泡大小,让它由表格中参数disp来控制，s也可以设置为常数控制它的大小
ax.scatter(df["mpg"], df["wt"], s = df["disp"], alpha = 0.5)
# 设置X,Y轴的标签
ax.set_xlabel("mpg")
ax.set_ylabel("wt")
ax.set_title("The relationship of mpg and wt", fontsize = 15)
# 这里没有设置X,Y轴的取值范围，它自己默认
# 打印文本在图中
ax.text(25, 5.4, "buble size is controlled by 'disp'")


# #### C代表点的颜色

# In[72]:


# 第二个额外参数c表示填充颜色
fig, ax = plt.subplots(figsize = (8, 6), dpi=80)

# 设置了气泡图中气泡的大小s由变量disp控制，颜色c由参数分段变量VS来控制，VS中是0和1
ax.scatter(df["mpg"], df["wt"], s = 100, c = df["vs"])

# 设置X,Y轴标签，与标题
ax.set_xlabel("mpg", fontsize = 13)
ax.set_ylabel("wt", fontsize = 13)
ax.set_title("The relationship of mpg&wt", fontsize = 16)


# In[73]:


# 上面图中还需要添加图例，表示黄色的代表啥，紫色的代表啥
# 先看下vs
df["vs"]


# ### 对比Pandas中loc 与 iloc用法
#     df = pd.read_csv(filepath_or_buffer="D://movie.csv")
#     df_new = df.set_index(["country"])
#     df_new.loc[list(["Canada"])] # 1
#     df_new.loc[df_new["duration"]>160] # 2
#     df_new.loc[((df_new["duration"] > 200) & (df_new["director_facebook_likes"] > 300 )),"flage"] =1 # 3
#     df_new.loc[df_new["duration"].isin([100])] # 4
#     df_new.query("duration > 100 & index == 'UK'") # 5
# 
# #1：根据列中的元素，选取对应元素的数据集 
# #2：根据元素的选取条件来选取对应的数据集 
# #3：根据元素的选取条件来来选取对应的数据集，并在符合条件的数据行添加flage标签 
# #4：isin函数是series用来判断值是否在目标值是否在series 
# #5：query函数中用来判断条件符合的数据集并返回
# 
#     df_new.iloc[0:4]
# #iloc比较简单，它是基于索引位来选取数据集，0:4就是选取 0，1，2，3这四行，需要注意的是这里是前闭后开集合

# In[74]:


# 做法一 1️⃣
# 解释一下： 对X轴的数据进行筛选，在df["mpg"]中挑选出"vs"值为0的所有的行
# 把数据源，挑选出来
x_vs0 = df["mpg"].loc[df["vs"] == 0]
y_vs0 = df["wt"].loc[df["vs"] == 0]

x_vs1 = df["mpg"].loc[df["vs"] == 1]
y_vs1 = df["wt"].loc[df["vs"] ==1]

#创建画布,与坐标系
fig, ax = plt.subplots()
#填充数据，根据vs等于0或者1，分成两种颜色，把这两种颜色分别提取出来，填充
ax.scatter(x_vs0, y_vs0, c="red", label="vs=0")
ax.scatter(x_vs1, y_vs1, c="blue", label="vs=1")
#创建X,Y,Title
ax.set_xlabel("mpg")
ax.set_ylabel("wt")
ax.set_title("The relationship of mpg and wt")
ax.legend()


# In[77]:


#另一种做法，当颜色很多的时候

#更智能化的做法
fig,ax = plt.subplots()

unique = list(set(df["vs"]))
colors = [plt.cm.jet(i/max(unique)*0.8) for i in unique]
for i, v in enumerate(unique):                              #将vs唯一值的这个序列改造成一个带索引的序列，并建立一个关于该序列索引和值的循环
    x = [df["mpg"][j] for j in range(len(df["mpg"])) if df["vs"][j] == v] #取出对应的mpg
    y = [df["wt"][j] for j in range(len(df["wt"])) if df["vs"][j] == v]   #取出对应的wt
    ax.scatter(x, y, c=colors[i], label="vs = "+ str(v))   #根据取出的mpg和wt、i匹配的颜色、以及v对应的标签来画散点图

ax.set_xlabel("mpg")
ax.set_ylabel("wt")
ax.set_title("The relationship of mpg and wt")
ax.legend()


# In[78]:


#在上面的例子里涉及到了一个参数c，用于点的颜色设置，如果c输入的是一个固定值，那么就只是单纯的改变散点的颜色，不具备维度


# In[79]:


fig, ax = plt.subplots()

ax.scatter(df["mpg"], df["wt"], c = "red")

ax.set_xlabel("mpg")
ax.set_ylabel("wt")
ax.set_title("The relationship of mpg and wt")


# In[80]:


# 如果需要自己定义颜色，可以用RGB的方式
fig, ax = plt.subplots()

ax.scatter(df["mpg"], df["wt"], c = (29/255, 108/255, 199/255))

ax.set_xlabel("mpg")
ax.set_ylabel("wt")
ax.set_title("The relationship of mpg and wt")


# #### maker代表点的形状

# In[81]:


# 设置点的形状
fig, ax = plt.subplots()

ax.scatter(df["mpg"], df["wt"], marker = "+")

ax.set_xlabel("mpg")
ax.set_ylabel("wt")
ax.set_title("The relationship of mpg and wt")


# #### 点的边界颜色

# In[82]:


fig,ax = plt.subplots(figsize = (8,5),dpi = 80)

ax.scatter(df["mpg"],df["wt"],c="white",edgecolors=(244/255,119/255,38/255))

ax.set_xlabel("mpg")
ax.set_ylabel("wt")
ax.set_title("The relationship of mpg and wt")


# ### 绘制多个散点图

# In[87]:


# 在一张表格中，绘制多张散点图
fig = plt.figure(figsize = (10,6), dpi = 80)
ax1 = fig.add_subplot(121) #第一行，两列中，的第一个图表
ax2 = fig.add_subplot(122) #第一行，两列中，的第二个图表

ax1.scatter(df["mpg"], df["wt"])
ax1.set_xlabel("mpg")
ax1.set_ylabel("wt")
ax1.set_title("The relationship of mpg and wt")

ax2.scatter(df["mpg"], df["wt"])
ax1.set_xlabel("mpg")
ax2.set_ylabel("wt")
ax2.set_title("The relationship of mpg and wt")


# In[88]:


# 在多张表格中绘制散点图
#图1
fig1 = plt.figure()
ax1 = fig1.add_subplot(111) #一行一列一个
ax1.scatter(df["mpg"], df["wt"])
ax1.set_xlabel("mpg")
ax1.set_ylabel("wt")
ax1.set_title("The relationship of mpg and wt")
#图2
fig2 = plt.figure()
ax2 = fig2.add_subplot(111)
ax2.scatter(df["mpg"],df["qsec"])
ax1.set_xlabel("mpg")
ax1.set_ylabel("wt")
ax1.set_title("The relationship of mpg and wt")


# In[89]:


df.head()


# In[94]:


#绘制散点图矩阵, 调用pandas中的库来实现，有什么软用：了解数据集中相互关系
df_matrix = pd.plotting.scatter_matrix(df.loc[:, "mpg":"carb"], figsize=(16, 10), diagonal = "kde")


# ## 3.1 折线图学习

# In[96]:


df2 = pd.read_csv("sale.csv")
df2.head()


# In[97]:


#将data转换为日期格式
df["date"] = pd.to_datetime(df["date"])
df.head()


# ### 3.2 什么是折线图

# In[109]:


# 创建画布，创建坐标系，定义画布尺寸比例与分辨率
fig, ax = plt.subplots(figsize = (8, 5), dpi = 80)
# 填充坐标系内容点
ax.plot(df["date"],df["sale"])
# 设置XY轴，title
ax.set(xlabel="date",ylabel="sale",title="sale_plot")


# #### 2）折线图参数介绍
# 
# + 线条类型
# >参数：linestyle或者ls，可取值：
# （1）"-" 
# （2）"--"
# （3）"-."
# （4）":"
# 
# + 线条粗细
# >参数：linewidth或lw
# + 线条颜色
# >+ 参数：color或c
# >+ 颜色名称或简写 
# blue/b 
# green/g 
# red/r 
# cyan/c 
# magenta/m 
# yellow/y 
# black/k 
# white/w
# >+ (r, g, b),取值为[0, 1]之间

# In[111]:


# 2）设置线条类型，线的宽度，线条的颜色
fig, ax = plt.subplots()
ax.plot(df["date"], df["sale"], linestyle="-.", linewidth = 2, color=(222/255, 89/255, 155/255))
ax.set(xlabel = "date", ylabel="sale", title="The relationship of date & sale")


# #### 3）针对数据标记的处理
#     marker 标记类型
#     markeredgecolor或者mec 标记边界颜色
#     markeredgewidth或者mew 标记边界宽度
#     markerfacecolor或者mfc 标记填充色

# In[116]:


fig,ax = plt.subplots(figsize=(8,5),dpi=80)

ax.plot(df["date"],df["sale"],marker="o",mec="k",mfc="w",mew=0.5)
#添加圆圈形状的数据标记，数据标记的边框颜色为绿色，填充色为白色
ax.set(xlabel="date",ylabel="value",title="plot")


# In[117]:


fig,ax = plt.subplots(figsize=(8,5),dpi=80)

ax.plot(df["date"],df["sale"],color="g",label="sale")
#添加圆圈形状的数据标记，数据标记的边框颜色为绿色，填充色为白色

ax.set(xlabel="date",ylabel="sale",title="plot")
ax.legend(loc="best")


# 参数loc的可取值：
# + 0: 'best'
# + 1: 'upper right'
# + 2: 'upper left'
# + 3: 'lower left'
# + 4: 'lower right'
# + 5: 'right'
# + 6: 'center left'
# + 7: 'center right'
# + 8: 'lower center'
# + 9: 'upper center'
# + 10: 'center'

# 4）绘制网格线

# In[119]:


fig, ax=plt.subplots(figsize=(8,6), dpi = 80)
ax.plot(df["date"], df["sale"], color=(89/255, 89/255, 89/255), label= "sale")
ax.set(xlabel="date", ylabel="value", title="plot")
ax.legend(loc="best")
ax.grid(True) #设置网格线为真


# ### 3.3 在一个图中绘制多条线

# In[123]:


fig, ax = plt.subplots()

ax.plot(df["date"], df["sale"], label="sale")
ax.plot(df["date"], df["sale"] + np.random.randint(1000,5000), label="more sale")

ax.set(xlabel="date", ylabel="sale", title="plot")
ax.legend()