机器学习基础知识笔记
- 绘图是数据分析工作中最重要的任务之一,将各种数据以图形的方式表现出来更加直观。Matplotlib模块是Python数据分析、数据探索中数据可视化的重要模块。可以生成点线图plot、柱状图bar、饼状图pie、散点图scatter、直方图hist等等各种数据图表。
- 可以参考Matplotlib的官网进行各种图形的绘制:官网链接
1. 绘制点线图
import matplotlib.pyplot as plt
import numpy as np
# 绘制点线图
x = np.arange(0, 2*np.pi + 0.01, 0.01)
print(x)
y = np.sin(x)
plt.plot(x, y)
plt.show()
- 改变线的形状和颜色,会有不同的图效果:
import matplotlib.pyplot as plt
import numpy as np
# 绘制点线图
x = np.arange(0, 2*np.pi + 0.01, 0.01)
print(x)
y = np.sin(x)
plt.plot(x, y, 'b--')
plt.show()
2. 柱状图
- 绘制一个简单柱状图:
import matplotlib.pyplot as plt
import numpy as np
x = [1, 2, 3, 4, 5, 6]
y = [500, 200, 100, 800, 700, 200]
plt.bar(x, y)
plt.grid(linestyle="--") # 网格线,代表虚线
plt.show()
- 需要添加两行文字,来可以正常显示中文和负号。
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
- 为了能够为x轴和y轴以及图示命名,需要引入画布,具体的代码如下:
import matplotlib.pyplot as plt
import numpy as np
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
x = [1, 2, 3, 4, 5, 6]
y = [500, 200, 100, 800, 700, 200]
ax = plt.subplot()
ax.set_title("月度销售额(万元")
ax.set_xlabel("月份")
ax.set_ylabel("万元")
plt.bar(x, y)
plt.grid(linestyle="--") # 网格线,代表虚线
plt.show()
- 设置x的刻度和label
import matplotlib.pyplot as plt
import numpy as np
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
x = [1, 2, 3, 4, 5, 6]
y = [500, 200, 100, 800, 700, 200]
# 使用推导式
xlabel = ["{0}月".format(t) for t in x]
ax = plt.subplot()
ax.set_title("月度销售额(万元")
ax.set_xlabel("月份")
ax.set_ylabel("万元")
ax.set_xticks(x) # 设置刻度值
ax.set_xticklabels(xlabel) # 设置x轴的刻度值
plt.bar(x, y)
plt.grid(linestyle="--") # 网格线,代表虚线
plt.show()
- 可以通过函数bar.color()改变每个柱子的颜色,具体实现代码如下:
import matplotlib.pyplot as plt
import numpy as np
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
x = [1, 2, 3, 4, 5, 6]
y = [500, 200, 100, 800, 700, 200]
# 使用推导式
xlabel = ["{0}月".format(t) for t in x]
ax = plt.subplot()
ax.set_title("月度销售额(万元")
ax.set_xlabel("月份")
ax.set_ylabel("万元")
ax.set_xticks(x) # 设置刻度值
ax.set_xticklabels(xlabel) # 设置x轴的刻度值
# 设置柱子的颜色RGB : #FFFFFF
bars = plt.bar(x, y)
colors = ["#FF0000", "#00FF00", "#0000FF", "#001122", "#ee00ee", "#221100"]
for bar, color in zip(bars, colors):
bar.set_color(color)
pass
plt.grid(linestyle="--") # 网格线,代表虚线
plt.show()
- 可以使用plt.text()函数在图片上添加文字。其内部参数为text(x横坐标,y纵坐标,显示数据,水平对齐,垂直对齐)
- 在每个柱子上也可以添加文字,代码如下:
import matplotlib.pyplot as plt
import numpy as np
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
x = [1, 2, 3, 4, 5, 6]
y = [500, 200, 100, 800, 700, 200]
# 使用推导式
xlabel = ["{0}月".format(t) for t in x]
ax = plt.subplot()
ax.set_title("月度销售额(万元")
ax.set_xlabel("月份")
ax.set_ylabel("万元")
ax.set_xticks(x) # 设置刻度值
ax.set_xticklabels(xlabel) # 设置x轴的刻度值
# 设置柱子的颜色RGB : #FFFFFF
bars = plt.bar(x, y)
colors = ["#FF0000", "#00FF00", "#0000FF", "#001122", "#ee00ee", "#221100"]
for bar, color in zip(bars, colors):
bar.set_color(color)
pass
for x1, y1, in zip(x, y):
plt.text(x1, y1, "%d万元" % (y1), ha = "center", va="bottom")
pass
plt.grid(linestyle="--") # 网格线,代表虚线
plt.show()
- 还可以在这个图上继续显示折线图:
import matplotlib.pyplot as plt
import numpy as np
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
x = [1, 2, 3, 4, 5, 6]
y = [500, 200, 100, 800, 700, 200]
# 使用推导式
xlabel = ["{0}月".format(t) for t in x]
ax = plt.subplot()
ax.set_title("月度销售额(万元")
ax.set_xlabel("月份")
ax.set_ylabel("万元")
ax.set_xticks(x) # 设置刻度值
ax.set_xticklabels(xlabel) # 设置x轴的刻度值
# 设置柱子的颜色RGB : #FFFFFF
bars = plt.bar(x, y)
colors = ["#FF0000", "#00FF00", "#0000FF", "#001122", "#ee00ee", "#221100"]
for bar, color in zip(bars, colors):
bar.set_color(color)
pass
for x1, y1, in zip(x, y):
plt.text(x1, y1, "%d万元" % (y1), ha = "center", va="bottom")
pass
plt.plot(x, y)
plt.grid(linestyle="--") # 网格线,代表虚线
plt.show()
- 还可以在图上继续画出点:
import matplotlib.pyplot as plt
import numpy as np
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
x = [1, 2, 3, 4, 5, 6]
y = [500, 200, 100, 800, 700, 200]
# 使用推导式
xlabel = ["{0}月".format(t) for t in x]
ax = plt.subplot()
ax.set_title("月度销售额(万元")
ax.set_xlabel("月份")
ax.set_ylabel("万元")
ax.set_xticks(x) # 设置刻度值
ax.set_xticklabels(xlabel) # 设置x轴的刻度值
# 设置柱子的颜色RGB : #FFFFFF
bars = plt.bar(x, y)
colors = ["#FF0000", "#00FF00", "#0000FF", "#001122", "#ee00ee", "#221100"]
for bar, color in zip(bars, colors):
bar.set_color(color)
pass
for x1, y1, in zip(x, y):
plt.text(x1, y1, "%d万元" % (y1), ha = "center", va="bottom")
pass
plt.plot(x, y, 'b--')
plt.scatter(x, y)
plt.grid(linestyle="--") # 网格线,代表虚线
plt.show()
3.绘制饼状图
- explod参数的意思是,使一个块区凸出出来。
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
jobType = ['Java', '大数据', '人工智能']
jobs = [50000, 10000, 15000]
plt.pie(jobs, explode=[0.1, 0, 0], labels=jobType, colors=['#FFFF00', '#00FF00', '#0000FF'])
plt.show()
- 还可以通过自行编写label来重新显示饼状图:
import matplotlib.pyplot as plt
import numpy as np
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
jobType = ['Java', '大数据', '人工智能']
jobs =np.array([50000, 10000, 15000])
labels = ["%s:%.2f%%" % (j, t) for t, j in zip(jobs/np.sum(jobs) * 100, jobType)]
plt.pie(jobs, explode=[0.1, 0, 0], labels=labels, colors=['#FFFF00', '#00FF00', '#0000FF'])
plt.title("全国职位数量对比饼状图")
plt.show()
4. 绘制散点图
import matplotlib.pyplot as plt
import numpy as np
# 绘制散点图
x = np.random.randint(0, 100, size = (100,))
y = np.random.randint(0, 100, size = (100,))
# alpha是透明度, marker是点的形状
plt.scatter(x, y, c = "#FF0000", alpha = 0.5, marker = 'o')
plt.show()
5. 绘制子图
- 使用add_subplot函数,完成子图绘制:
import matplotlib.pyplot as plt
# 绘制子图
figure = plt.figure()
ax = figure.add_subplot(2, 2, 1)
ax.plot([1, 2, 3, 4, 5],[100, 200, 300, 50, 100])
plt.show()
这里的(2, 2, 1)指将画布分为两行两列,而1意为(1,1)位置的子图。继续增加子图,可以得到如下的效果:
import matplotlib.pyplot as plt
import numpy as np
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# 绘制子图
figure = plt.figure()
ax1 = figure.add_subplot(2, 2, 1)
ax2 = figure.add_subplot(2, 2, 2)
ax3 = figure.add_subplot(2, 1, 2)
# ===================ax1绘制饼状图========================
jobType = ['Java', '大数据', '人工智能']
jobs =np.array([50000, 10000, 15000])
labels = ["%s:%.2f%%" % (j, t) for t, j in zip(jobs/np.sum(jobs) * 100, jobType)]
ax1.pie(jobs, explode=[0.1, 0, 0], labels=labels, colors=['#FFFF00', '#00FF00', '#0000FF'])
ax1.set_title("全国职位数量对比饼状图", loc='center')
# ====================ax2绘制散点图========================
# 绘制散点图
x = np.random.randint(0, 100, size = (100,))
y = np.random.randint(0, 100, size = (100,))
# alpha是透明度, marker是点的形状
ax2.scatter(x, y, c="#FF0000", alpha=0.5, marker='o')
# =====================ax3绘制柱状图========================
x = [1, 2, 3, 4, 5, 6]
y = [500, 200, 100, 800, 700, 200]
# 使用推导式
xlabel = ["{0}月".format(t) for t in x]
ax3.set_title("月度销售额(万元", loc='left')
ax3.set_xlabel("月份")
ax3.set_ylabel("万元")
ax3.set_xticks(x) # 设置刻度值
ax3.set_xticklabels(xlabel) # 设置x轴的刻度值
# 设置柱子的颜色RGB : #FFFFFF
bars = ax3.bar(x, y)
colors = ["#FF0000", "#00FF00", "#0000FF", "#001122", "#ee00ee", "#221100"]
for bar, color in zip(bars, colors):
bar.set_color(color)
pass
for x1, y1, in zip(x, y):
plt.text(x1, y1, "%d万元" % (y1), ha = "center", va="bottom")
pass
ax3.plot(x, y, 'b--')
ax3.scatter(x, y)
ax3.grid(linestyle="--") # 网格线,代表虚线
plt.show()