2分钟学会python数据分析与机器学习知识点(三)

最新推荐文章于 2023-07-10 15:00:00 发布

刘阳洋

最新推荐文章于 2023-07-10 15:00:00 发布

阅读量580

点赞数

分类专栏：数据分析

本文链接：https://blog.csdn.net/weixin_36550048/article/details/108297288

版权

数据分析专栏收录该内容

5 篇文章 0 订阅

订阅专栏

2分钟学会python数据分析与机器学习知识点（三）

- - 第四节、Matplotlib

第四节、Matplotlib

1、matplotlib基本操作

1.1 pycharm

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


#首先画一个简单的图
#%matplotlib inline  在notebook里面需要执行这样一个东西不用调用show方法
#常用颜色
#蓝色b 绿色g 红色r 青色c 品红m 黄色y 黑色k 白色w
#常用折线图形状
#- 实线 --虚线 -.虚点线 .点线 x乘号线 d菱形线 D实心菱形 +加号线 p五角线
#2上叉线 s正方点 h六边形点1 H六边形点2 o圆点 *星形 ,像素点 :点线
# plt.plot([1,2,3,4,5],[1,16,81,256,512],'r-.')
# plt.xlabel('x')
# plt.ylabel('y')
# plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签
# plt.title('最基本的一个图')
# plt.xlabel('round',fontsize=24)
# plt.ylabel('value',fontsize=24)
#
# plt.show()

#
# numpy=np.arange(0,10,0.5)
# plt.plot(numpy,numpy,'r-')
# plt.plot(numpy,numpy**2,'c-')
# plt.plot(numpy,numpy**3,'gD')
# plt.show()


x=np.linspace(-10,10)
y=np.sin(x)

# plt.plot(x,y,'g-',linewidth=3.0,marker='o',markerfacecolor='r',markersize=10,alpha=1)
# plt.show()

#实线子图
#211表示一会要画的图是两行一列的  121 211 322
# plt.subplot(211)
# plt.plot(x,y,'r')
# plt.subplot(212)
# plt.plot(x,y,'b')
# plt.show()
#
# 用来正常显示中文标签
# 用来正常显示负号
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus']=False
plt.plot(x,y,'r-')
plt.title('绘图')
plt.xlabel('x:横坐标',fontsize=14)
plt.ylabel('y:纵坐标',fontsize=14)
#加上格子
plt.grid(True)
plt.show()

2、风格设置和条形图

2.1 pycharm

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#输出有多少种分风格
print(plt.style.available)
#构造
# x=np.linspace(-10,10)
# y=np.sin(x)
# plt.style.use('ggplot')
# plt.plot(x,y,'g-',linewidth=3.0,marker='o',markerfacecolor='r',markersize=10,alpha=1)
# plt.show()

3、条形图

3.1 pycharm

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#随机种子
np.random.seed(0)
x = np.arange(5)
y = np.random.randint(-5,5,5)
print (y)

#ncols有3列
#第一个图
# fig,axes = plt.subplots(ncols = 2)
# v_bars = axes[0].bar(x,y,color='red')
# h_bars = axes[1].barh(x,y,color='red')
#
# axes[0].axhline(0,color='grey',linewidth=2)
# axes[1].axvline(0,color='grey',linewidth=2)
# plt.show()

#第二个图
# fig,ax = plt.subplots()
# v_bars = ax.bar(x,y,color='lightblue')
# for bar,height in zip(v_bars,y):
#     if height < 0:
#         bar.set(edgecolor = 'darkred',color = 'green',linewidth = 3)
# plt.show()


#第三个图
# x = np.random.randn(100).cumsum()
# y = np.linspace(0,10,100)
#
# fig,ax = plt.subplots()
# ax.fill_between(x,y,color='lightblue')
# plt.show()

#第四个图
# x = np.linspace(0,10,200)
# y1 = 2*x +1
# y2 = 3*x +1.2
# y_mean = 0.5*x*np.cos(2*x) + 2.5*x +1.1
# fig,ax = plt.subplots()
# ax.fill_between(x,y1,y2,color='red')
# ax.plot(x,y_mean,color='black')
# plt.show()


#第五个图
# mean_values = [1,2,3]
# variance = [0.2,0.4,0.5]
# bar_label = ['bar1','bar2','bar3']
#
# x_pos = list(range(len(bar_label)))
# plt.bar(x_pos,mean_values,yerr=variance,alpha=0.3)
# max_y = max(zip(mean_values,variance))
# plt.ylim([0,(max_y[0]+max_y[1])*1.2])
# plt.ylabel('variable y')
# plt.xticks(x_pos,bar_label)
# plt.show()

#第六个图
# x1 = np.array([1,2,3])
# x2 = np.array([2,2,3])
#
# bar_labels = ['bat1','bar2','bar3']
# fig = plt.figure(figsize = (8,6))
# y_pos = np.arange(len(x1))
# y_pos = [x for x in y_pos]
#
# plt.barh(y_pos,x1,color='g',alpha = 0.5)
# plt.barh(y_pos,-x2,color='b',alpha = 0.5)
#
# plt.xlim(-max(x2)-1,max(x1)+1)
# plt.ylim(-1,len(x1)+1)
# plt.show()
#第七个图
# green_data = [1, 2, 3]
# blue_data = [3, 2, 1]
# red_data = [2, 3, 3]
# labels = ['group 1', 'group 2', 'group 3']
#
# pos = list(range(len(green_data)))
# width = 0.2
# fig, ax = plt.subplots(figsize=(8,6))
#
# plt.bar(pos,green_data,width,alpha=0.5,color='g',label=labels[0])
# plt.bar([p+width for p in pos],blue_data,width,alpha=0.5,color='b',label=labels[1])
# plt.bar([p+width*2 for p in pos],red_data,width,alpha=0.5,color='r',label=labels[2])
# plt.show()

#第八个图
# data = range(200, 225, 5)
#
# bar_labels = ['a', 'b', 'c', 'd', 'e']
#
# fig = plt.figure(figsize=(10,8))
#
# y_pos = np.arange(len(data))
#
# plt.yticks(y_pos, bar_labels, fontsize=16)
#
# bars = plt.barh(y_pos,data,alpha = 0.5,color='g')
#
# plt.vlines(min(data),-1,len(data)+0.5,linestyle = 'dashed')
# for b,d in zip(bars,data):
#     plt.text(b.get_width()+b.get_width()*0.05,b.get_y()+b.get_height()/2,'{0:.2%}'.format(d/min(data)))
# plt.show()


#第九个图
# mean_values = range(10,18)
# x_pos = range(len(mean_values))
#
# import matplotlib.colors as col
# import matplotlib.cm as cm
#
# cmap1 = cm.ScalarMappable(col.Normalize(min(mean_values),max(mean_values),cm.hot))
# cmap2 = cm.ScalarMappable(col.Normalize(0,20,cm.hot))
#
# plt.subplot(121)
# plt.bar(x_pos,mean_values,color = cmap1.to_rgba(mean_values))
#
# plt.subplot(122)
# plt.bar(x_pos,mean_values,color = cmap2.to_rgba(mean_values))
#
# plt.show()

#第十个图
patterns = ('-', '+', 'x', '\\', '*', 'o', 'O', '.')

fig = plt.gca()

mean_value = range(1,len(patterns)+1)
x_pos = list(range(len(mean_value)))

bars = plt.bar(x_pos,mean_value,color='white')

for bar,pattern in zip(bars,patterns):
    bar.set_hatch(pattern)
plt.show()

4、盒图

4.1 pycharm

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#经常会用到 箱线图

#关心的是 中位数在哪里  均值在哪里

# tang_data = [np.random.normal(0,std,100) for std in range(1,4)]
# fig = plt.figure(figsize = (8,6))
# #第一个参数数据  vert是横竖显示
# bplot = plt.boxplot(tang_data,notch=False,sym='rs',vert=True,patch_artist=True)
# #对x轴进行操作
# plt.xticks([y+1 for y in range(len(tang_data))],['x1','x2','x3'])
# plt.xlabel('x')
# plt.title('box plot')
#
# plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签
# plt.rcParams['axes.unicode_minus']=False  #纵坐标乱码更新
# plt.title('箱线图')
#
# colors = ['pink','lightblue','lightgreen']
# for pathch,color in zip(bplot['boxes'],colors):
#     pathch.set_facecolor(color)
#
# plt.show()








#小提琴图
fig,axes = plt.subplots(nrows=1,ncols=2,figsize=(12,5))
#构造一个均值为0，方差为std的一个值，构造100个
tang_data = [np.random.normal(0,std,100) for std in range(6,10)]
axes[0].violinplot(tang_data,showmeans=False,showmedians=True)
axes[0].set_title('violin plot')

axes[1].boxplot(tang_data)
axes[1].set_title('box plot')

#加上水平线
for ax in axes:
    #加上横线
    ax.yaxis.grid(True)
    ax.set_xticks([y+1 for y in range(len(tang_data))])
plt.setp(axes,xticks=[y+1 for y in range(len(tang_data))],xticklabels=['x1','x2','x3','x4'])
plt.show()

5、直方图and散点图

5.1 pycharm

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
#第一个图直方图
# data = np.random.normal(0,20,1000)
# bins = np.arange(-100,100,5)
#
# plt.hist(data,bins=bins)
# plt.xlim([min(data)-5,max(data)+5])
# plt.show()

#第二个图直方图
# data1 = [random.gauss(15,10) for i in range(500)]
# data2 = [random.gauss(5,5) for i in range(500)]
# bins = np.arange(-50,50,2.5)
#
# plt.hist(data1,bins=bins,label='class 1',alpha = 0.3)
# plt.hist(data2,bins=bins,label='class 2',alpha = 0.3)
# plt.legend(loc='best')
# plt.show()


#第一个散点图
# mu_vec1 = np.array([0,0])
# cov_mat1 = np.array([[2,0],[0,2]])
#
# x1_samples = np.random.multivariate_normal(mu_vec1, cov_mat1, 100)
# x2_samples = np.random.multivariate_normal(mu_vec1+0.2, cov_mat1+0.2, 100)
# x3_samples = np.random.multivariate_normal(mu_vec1+0.4, cov_mat1+0.4, 100)
#
# plt.figure(figsize = (8,6))
# plt.scatter(x1_samples[:,0],x1_samples[:,1],marker ='x',color='blue',alpha=0.6,label='x1')
# plt.scatter(x2_samples[:,0],x2_samples[:,1],marker ='o',color='red',alpha=0.6,label='x2')
# plt.scatter(x3_samples[:,0],x3_samples[:,1],marker ='^',color='green',alpha=0.6,label='x3')
# plt.legend(loc='best')
# plt.show()


#第二个散点图

# x_coords = [0.13, 0.22, 0.39, 0.59, 0.68, 0.74, 0.93]
# y_coords = [0.75, 0.34, 0.44, 0.52, 0.80, 0.25, 0.55]
#
# plt.figure(figsize = (8,6))
# plt.scatter(x_coords,y_coords,marker='s',s=50)
#
# for x,y in zip(x_coords,y_coords):
#     plt.annotate('(%s,%s)'%(x,y),xy=(x,y),xytext=(0,-15),textcoords = 'offset points',ha='center')
# plt.show()



#第三个散点图

mu_vec1 = np.array([0,0])
cov_mat1 = np.array([[1,0],[0,1]])
X = np.random.multivariate_normal(mu_vec1, cov_mat1, 500)
fig = plt.figure(figsize=(8,6))

R=X**2
R_sum=R.sum(axis = 1)

plt.scatter(X[:,0],X[:,1],color='grey',marker='o',s=20*R_sum,alpha=0.5)
plt.show()

6、3D图

6.1 pycharm

import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.mplot3d import Axes3D

#
# fig = plt.figure()
# ax = Axes3D(fig)
#
# x = np.arange(-4,4,0.25)
# y = np.arange(-4,4,0.25)
#
# X,Y = np.meshgrid(x,y)
#
# Z = np.sin(np.sqrt(X**2+Y**2))
# ax.plot_surface(X,Y,Z,rstride = 1,cstride = 1,cmap='rainbow')
# ax.contour(X,Y,Z,zdim='z',offset = -2 ,cmap='rainbow')
#
# ax.set_zlim(-2,2)
# plt.show()


#第二个3D

# np.random.seed(1)
# def randrange(n,vmin,vmax):
#     return (vmax-vmin)*np.random.rand(n)+vmin
#
#
# fig = plt.figure()
# ax = fig.add_subplot(111,projection = '3d')
# n = 100
# for c,m,zlow,zhigh in [('r','o',-50,-25),('b','x','-30','-5')]:
#     xs = randrange(n,23,32)
#     ys = randrange(n,0,100)
#     zs = randrange(n,int(zlow),int(zhigh))
#     ax.scatter(xs,ys,zs,c=c,marker=m)
# plt.show()


#条形3D

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

for c, z in zip(['r', 'g', 'b', 'y'], [30, 20, 10, 0]):
    xs = np.arange(20)
    ys = np.random.rand(20)
    cs = [c]*len(xs)
    ax.bar(xs,ys,zs = z,zdir='y',color = cs,alpha = 0.5)
plt.show()

7、pie图

7.1 pycharm

import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
#自定义图案标记
from matplotlib.patches import Circle, Wedge, Polygon, Ellipse
from matplotlib.collections import PatchCollection


m = 51212.
f = 40742.
m_perc = m/(m+f)
f_perc = f/(m+f)

colors = ['navy','lightcoral']
labels = ["Male","Female"]

plt.figure(figsize=(8,8))
paches,texts,autotexts = plt.pie([m_perc,f_perc],labels = labels,autopct = '%1.1f%%',explode=[0,0.05],colors = colors)

for text in texts+autotexts:
    text.set_fontsize(20)
for text in autotexts:
    text.set_color('white')
plt.show()

#设置子图布局
ax1 = plt.subplot2grid((3,3),(0,0))
ax2 = plt.subplot2grid((3,3),(1,0))
ax3 = plt.subplot2grid((3,3),(0,2),rowspan=3)
ax4 = plt.subplot2grid((3,3),(2,0),colspan = 2)
ax5 = plt.subplot2grid((3,3),(0,1),rowspan=2)
#嵌套图


# x = np.linspace(0,10,1000)
# y2 = np.sin(x**2)
# y1 = x**2
#
# fig,ax1 = plt.subplots()
#
# left,bottom,width,height = [0.22,0.45,0.3,0.35]
# ax2 = fig.add_axes([left,bottom,width,height])
#
# ax1.plot(x,y1)
# ax2.plot(x,y2)
# plt.show()


# def autolabel(rects):
#     for rect in rects:
#         height = rect.get_height()
#         ax1.text(rect.get_x() + rect.get_width() / 2., 1.02 * height,
#                  "{:,}".format(float(height)),
#                  ha='center', va='bottom', fontsize=18)
#
#
# top10_arrivals_countries = ['CANADA', 'MEXICO', 'UNITED\nKINGDOM', \
#                             'JAPAN', 'CHINA', 'GERMANY', 'SOUTH\nKOREA', \
#                             'FRANCE', 'BRAZIL', 'AUSTRALIA']
# top10_arrivals_values = [16.625687, 15.378026, 3.934508, 2.999718, \
#                          2.618737, 1.769498, 1.628563, 1.419409, \
#                          1.393710, 1.136974]
# arrivals_countries = ['WESTERN\nEUROPE', 'ASIA', 'SOUTH\nAMERICA', \
#                       'OCEANIA', 'CARIBBEAN', 'MIDDLE\nEAST', \
#                       'CENTRAL\nAMERICA', 'EASTERN\nEUROPE', 'AFRICA']
# arrivals_percent = [36.9, 30.4, 13.8, 4.4, 4.0, 3.6, 2.9, 2.6, 1.5]
#
# fig, ax1 = plt.subplots(figsize=(20, 12))
# tang = ax1.bar(range(10), top10_arrivals_values, color='blue')
# plt.xticks(range(10), top10_arrivals_countries, fontsize=18)
# ax2 = inset_axes(ax1, width=6, height=6, loc=5)
# explode = (0.08, 0.08, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05)
# patches, texts, autotexts = ax2.pie(arrivals_percent, labels=arrivals_countries, autopct='%1.1f%%', explode=explode)
#
# for text in texts + autotexts:
#     text.set_fontsize(16)
# for spine in ax1.spines.values():
#     spine.set_visible(False)
#
# autolabel(tang)
# plt.show()



#小猫

fig, ax = plt.subplots()

patches = []

# Full and ring sectors drawn by Wedge((x,y),r,deg1,deg2)
leftstripe = Wedge((.46, .5), .15, 90,100)           # Full sector by default
midstripe = Wedge((.5,.5), .15, 85,95)
rightstripe = Wedge((.54,.5), .15, 80,90)
lefteye = Wedge((.36, .46), .06, 0, 360, width=0.03)  # Ring sector drawn when width <1
righteye = Wedge((.63, .46), .06, 0, 360, width=0.03)
nose = Wedge((.5, .32), .08, 75,105, width=0.03)
mouthleft = Wedge((.44, .4), .08, 240,320, width=0.01)
mouthright = Wedge((.56, .4), .08, 220,300, width=0.01)
patches += [leftstripe,midstripe,rightstripe,lefteye,righteye,nose,mouthleft,mouthright]

# Circles
leftiris = Circle((.36,.46),0.04)
rightiris = Circle((.63,.46),0.04)
patches += [leftiris,rightiris]

# Polygons drawn by passing coordinates of vertices
leftear = Polygon([[.2,.6],[.3,.8],[.4,.64]], True)
rightear = Polygon([[.6,.64],[.7,.8],[.8,.6]], True)
topleftwhisker = Polygon([[.01,.4],[.18,.38],[.17,.42]], True)
bottomleftwhisker = Polygon([[.01,.3],[.18,.32],[.2,.28]], True)
toprightwhisker = Polygon([[.99,.41],[.82,.39],[.82,.43]], True)
bottomrightwhisker = Polygon([[.99,.31],[.82,.33],[.81,.29]], True)
patches+=[leftear,rightear,topleftwhisker,bottomleftwhisker,toprightwhisker,bottomrightwhisker]

# Ellipse drawn by Ellipse((x,y),width,height)
body = Ellipse((0.5,-0.18),0.6,0.8)
patches.append(body)

# Draw the patches
colors = 100*np.random.rand(len(patches)) # set random colors
p = PatchCollection(patches, alpha=0.4)
p.set_array(np.array(colors))
ax.add_collection(p)

# Show the figure
plt.show()

8、Pandas与sklearn结合实例

8.1 pycharm

import pandas as pd
from sklearn.impute import SimpleImputer
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from mpl_toolkits.mplot3d import Axes3D
# np.random.seed(0)
# df = pd.DataFrame({'Condition 1': np.random.rand(20),
#                    'Condition 2': np.random.rand(20)*0.9,
#                    'Condition 3': np.random.rand(20)*1.1})
# print(df.head())
#
# fig,ax = plt.subplots()
# #要不要进行堆叠
# df.plot.bar(ax=ax,stacked=True)
# plt.show()
#
#
# #占据百分比情况
#
# from matplotlib.ticker import FuncFormatter
#
# df_ratio = df.div(df.sum(axis=1),axis=0)
# fig,ax = plt.subplots()
# df_ratio.plot.bar(ax=ax,stacked=True)
# ax.yaxis.set_major_formatter(FuncFormatter(lambda y,_:'{:.0%}'.format(y)))
# plt.show()

#导入新的数据
#绝对路径
path = r'G:\nodebookPython3\lesson\data_file\risk_factors_cervical_cancer.csv'
df=pd.read_csv(path,na_values='?') #'？'变成NaN
#设置列名全部展示
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 200)
print(df.head())

print(df.info())
#然后进行均值填充
impute =  pd.DataFrame(SimpleImputer().fit_transform(df))
impute.columns = df.columns
impute.index = df.index
print(impute.head())


#直接生成数据
features = impute.drop('Dx:Cancer', axis=1)
y = impute["Dx:Cancer"]

pca = PCA(n_components=3)
X_r = pca.fit_transform(features)

#打印当前比较重要的前三个特征
print("Explained variance:\nPC1 {:.2%}\nPC2 {:.2%}\nPC3 {:.2%}"
      .format(pca.explained_variance_ratio_[0],
              pca.explained_variance_ratio_[1],
              pca.explained_variance_ratio_[2]))

fig = plt.figure()
#使用3D图画出散点图
ax = Axes3D(fig)

ax.scatter(X_r[:, 0], X_r[:, 1], X_r[:, 2], c=y, cmap=plt.cm.coolwarm)

# Label the axes
ax.set_xlabel('PC1')
ax.set_ylabel('PC2')
ax.set_zlabel('PC3')

plt.show()

刘阳洋

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
2分钟学会python数据分析与机器学习知识点(三)

2分钟学会python数据分析与机器学习知识点（三）第四节、Matplotlib1、matplotlib基本操作1.1 pycharm2、风格设置和条形图2.1 pycharm3、条形图3.1 pycharm4、盒图4.1 pycharm5、直方图and散点图5.1 pycharm6、3D图6.1 pycharm7、pie图7.1 pycharm8、Pandas与sklearn结合实例8.1 pycharm第四节、Matplotlib1、matplotlib基本操作1.1 pycharmimport
复制链接

扫一扫