扩展库pandas

简介

  • 基于numpy的数据分析模块,提供大量的标准数据模型和高效操作大型数据集所需要的工具
  • 主要提供了3种数据结构:1.Series,带标签的一维数组。2.DataFrame,带标签且大小可变的二维表格结构。3.Panel,带标签且大小可变的三维数组

生成一维数组

>>> import pandas as pd
>>> import numpy as np
>>> x= pd.Series([1,3,5,np.nan])
>>> x
0    1.0
1    3.0
2    5.0
3    NaN
dtype: float64

生成二维数组

  • pd.DataFrame第一个参数是存放在DataFrame里的数据,第二个参数index是行名,第三个参数columns是列名
>>> datas=pd.date_range(start='20210226',end='20211231',freq='D') #间隔为天
>>> datas
DatetimeIndex(['2021-02-26', '2021-02-27', '2021-02-28', '2021-03-01',
               '2021-03-02', '2021-03-03', '2021-03-04', '2021-03-05',
               '2021-03-06', '2021-03-07',
               ...
               '2021-12-22', '2021-12-23', '2021-12-24', '2021-12-25',
               '2021-12-26', '2021-12-27', '2021-12-28', '2021-12-29',
               '2021-12-30', '2021-12-31'],
              dtype='datetime64[ns]', length=309, freq='D')
>>> datas=pd.date_range(start='20210226',end='20211231',freq='M') #间隔为月
>>> datas
DatetimeIndex(['2021-02-28', '2021-03-31', '2021-04-30', '2021-05-31',
               '2021-06-30', '2021-07-31', '2021-08-31', '2021-09-30',
               '2021-10-31', '2021-11-30', '2021-12-31'],
              dtype='datetime64[ns]', freq='M')
>>> df=pd.DataFrame([[np.random.randint(1,100)for j in range(4)] for i in range(11)],index=datas,columns=list('ABCD'))
>>> df
             A   B   C   D
2021-02-28  23  26  76  71
2021-03-31  56  38  91  60
2021-04-30  84  67  75  87
2021-05-31  56  63  19  24
2021-06-30   7  55  18  16
2021-07-31  51  89  86  74
2021-08-31  77  38  22  60
2021-09-30  90  28  27  87
2021-10-31  37  37  14   5
2021-11-30  18  65  93  12
2021-12-31  88  57  60  59
>>> 
>>> df=pd.DataFrame({'A':[np.random.randint(1,100)for i in range(4)],'B':pd.date_range(start='20130101',periods=4,freq='D'),'C':pd.Series([1,2,3,4],index=['zhang','li','zhou','wang'],dtype='float32'),'D':np.array([3]*4,dtype='int32'),'E':pd.Categorical(["test","train","test","train"]),'F':'foo'})
		    
>>> df
		    
        A          B    C  D      E    F
zhang  29 2013-01-01  1.0  3   test  foo
li     72 2013-01-02  2.0  3  train  foo
zhou   80 2013-01-03  3.0  3   test  foo
wang   73 2013-01-04  4.0  3  train  foo
>>> 

二维数组数据查看

>>> df.head() #默认显示前5行
		    
        A          B    C  D      E    F
zhang  29 2013-01-01  1.0  3   test  foo
li     72 2013-01-02  2.0  3  train  foo
zhou   80 2013-01-03  3.0  3   test  foo
wang   73 2013-01-04  4.0  3  train  foo
>>> df.head(3) #查看前3行
		    
        A          B    C  D      E    F
zhang  29 2013-01-01  1.0  3   test  foo
li     72 2013-01-02  2.0  3  train  foo
zhou   80 2013-01-03  3.0  3   test  foo
>>> df.tail(2) #查看最后2行
		    
       A          B    C  D      E    F
zhou  80 2013-01-03  3.0  3   test  foo
wang  73 2013-01-04  4.0  3  train  foo

查看数据的统计信息

>>> df.describe() #返回平均值、标准差、最小值、最大值等信息
		    
               A         C    D
count   4.000000  4.000000  4.0
mean   63.500000  2.500000  3.0
std    23.273733  1.290994  0.0
min    29.000000  1.000000  3.0
25%    61.250000  1.750000  3.0
50%    72.500000  2.500000  3.0
75%    74.750000  3.250000  3.0
max    80.000000  4.000000  3.0

二维数据转置

>>> df.T
		    
                 zhang  ...                 wang
A                   29  ...                   73
B  2013-01-01 00:00:00  ...  2013-01-04 00:00:00
C                    1  ...                    4
D                    3  ...                    3
E                 test  ...                train
F                  foo  ...                  foo

[6 rows x 4 columns]

排序

>>> df.sort_index(axis=0,ascending=False) #对轴进行排序
		    
        A          B    C  D      E    F
zhou   80 2013-01-03  3.0  3   test  foo
zhang  29 2013-01-01  1.0  3   test  foo
wang   73 2013-01-04  4.0  3  train  foo
li     72 2013-01-02  2.0  3  train  foo
>>> df.sort_index(axis=1,ascending=False)
		    
         F      E  D    C          B   A
zhang  foo   test  3  1.0 2013-01-01  29
li     foo  train  3  2.0 2013-01-02  72
zhou   foo   test  3  3.0 2013-01-03  80
wang   foo  train  3  4.0 2013-01-04  73
>>> df.sort_values(by='A') #对数据进行排序
		    
        A          B    C  D      E    F
zhang  29 2013-01-01  1.0  3   test  foo
li     72 2013-01-02  2.0  3  train  foo
wang   73 2013-01-04  4.0  3  train  foo
zhou   80 2013-01-03  3.0  3   test  foo
>>> df.sort_values(by='A',ascending=False) #降序排列
		    
        A          B    C  D      E    F
zhou   80 2013-01-03  3.0  3   test  foo
wang   73 2013-01-04  4.0  3  train  foo
li     72 2013-01-02  2.0  3  train  foo
zhang  29 2013-01-01  1.0  3   test  foo

数据选择

>>> import pandas as pd
>>> import numpy as np
>>> df=pd.DataFrame({'A':[np.random.randint(1,100)for i in range(4)],'B':pd.date_range(start='20130101',periods=4,freq='D'),'C':pd.Series([1,2,3,4],index=['zhang','li','zhou','wang'],dtype='float32'),'D':np.array([3]*4,dtype='int32'),'E':pd.Categorical(["test","train","test","train"]),'F':'foo'})
>>> df
        A          B    C  D      E    F
zhang  55 2013-01-01  1.0  3   test  foo
li     52 2013-01-02  2.0  3  train  foo
zhou   83 2013-01-03  3.0  3   test  foo
wang   64 2013-01-04  4.0  3  train  foo
>>> df['A'] #选择列
zhang    55
li       52
zhou     83
wang     64
Name: A, dtype: int64
>>> df[0:2] #使用切片选择多行
        A          B    C  D      E    F
zhang  55 2013-01-01  1.0  3   test  foo
li     52 2013-01-02  2.0  3  train  foo
>>> df.loc[:,['A','C']] #选择多列
        A    C
zhang  55  1.0
li     52  2.0
zhou   83  3.0
wang   64  4.0
>>> df.loc[['zhang','zhou'],['A','D','E']] #同时指定多行与多列进行选择
        A  D     E
zhang  55  3  test
zhou   83  3  test
>>> df.at['zhang','A']
55
>>> df.at['zhang','D'] #查询指定行列位置的数据值
3
>>> df.iloc[3] #查询第3行数据
A                     64
B    2013-01-04 00:00:00
C                      4
D                      3
E                  train
F                    foo
Name: wang, dtype: object
>>> df.iloc[0:3,0:4] #查询前3行、前4列数据
        A          B    C  D
zhang  55 2013-01-01  1.0  3
li     52 2013-01-02  2.0  3
zhou   83 2013-01-03  3.0  3
>>> df.iloc[[0,2,3],[0,4]] #查询指定的多行、多列数据
        A      E
zhang  55   test
zhou   83   test
wang   64  train
>>> df.iloc[0,1] #查询指定行、列位置的数据值
Timestamp('2013-01-01 00:00:00')
>>> df[df.A>50] #按给定条件进行查询
        A          B    C  D      E    F
zhang  55 2013-01-01  1.0  3   test  foo
li     52 2013-01-02  2.0  3  train  foo
zhou   83 2013-01-03  3.0  3   test  foo
wang   64 2013-01-04  4.0  3  train  foo
>>> 

数据修改与设置

>>> df.iat[0,2]=3 #修改指定行、列位置的数据值
>>> df
        A          B    C  D      E    F
zhang  55 2013-01-01  3.0  3   test  foo
li     52 2013-01-02  2.0  3  train  foo
zhou   83 2013-01-03  3.0  3   test  foo
wang   64 2013-01-04  4.0  3  train  foo
>>> df.loc[:,'D']=[np.random.randint(50,60) for i in range(4)] #修改某列的值
>>> df
        A          B    C   D      E    F
zhang  55 2013-01-01  3.0  58   test  foo
li     52 2013-01-02  2.0  52  train  foo
zhou   83 2013-01-03  3.0  56   test  foo
wang   64 2013-01-04  4.0  57  train  foo
>>> df['C']=-df['C'] #对指定列数据取反
>>> df
        A          B    C   D      E    F
zhang  55 2013-01-01 -3.0  58   test  foo
li     52 2013-01-02 -2.0  52  train  foo
zhou   83 2013-01-03 -3.0  56   test  foo
wang   64 2013-01-04 -4.0  57  train  foo
>>> 

缺失值处理

  • 缺失值和异常值处理是大数据预处理环节中很重要的一个步骤
>>> df1=df.reindex(index={'zhang','li','zhou','wang'},columns=list(df.columns)+['G'])
>>> df1.iat[0,6]=3 #修改指定位置的元素值,该列其他元素为缺失值NAN
>>> df1
        A          B    C   D      E    F    G
wang   64 2013-01-04 -4.0  57  train  foo  3.0
zhou   83 2013-01-03 -3.0  56   test  foo  NaN
zhang  55 2013-01-01 -3.0  58   test  foo  NaN
li     52 2013-01-02 -2.0  52  train  foo  NaN
>>> pd.isnull(df1) #测试缺失值,返回值为True/False阵列
           A      B      C      D      E      F      G
wang   False  False  False  False  False  False  False
zhou   False  False  False  False  False  False   True
zhang  False  False  False  False  False  False   True
li     False  False  False  False  False  False   True
>>> df1.dropna() #返回不包含缺失值的行
       A          B    C   D      E    F    G
wang  64 2013-01-04 -4.0  57  train  foo  3.0
>>> df1['G'].fillna(5,inplace=True) #使用指定值填充缺失值
>>> df1
        A          B    C   D      E    F    G
wang   64 2013-01-04 -4.0  57  train  foo  3.0
zhou   83 2013-01-03 -3.0  56   test  foo  5.0
zhang  55 2013-01-01 -3.0  58   test  foo  5.0
li     52 2013-01-02 -2.0  52  train  foo  5.0
>>> 

数据操作

>>> df1.mean() #平均值,自动忽略缺失值
A    63.50
C    -3.00
D    55.75
G     4.50
dtype: float64
>>> df.mean(1) #横向计算平均值
zhang    36.666667
li       34.000000
zhou     45.333333
wang     39.000000
dtype: float64
>>> df1.shift(1) #数据移位
          A          B    C     D      E    F    G
wang    NaN        NaT  NaN   NaN    NaN  NaN  NaN
zhou   64.0 2013-01-04 -4.0  57.0  train  foo  3.0
zhang  83.0 2013-01-03 -3.0  56.0   test  foo  5.0
li     55.0 2013-01-01 -3.0  58.0   test  foo  5.0
>>> df1['D'].value_counts() #直方图统计
52    1
58    1
57    1
56    1
Name: D, dtype: int64
>>> df2=pd.DataFrame(np.random.randn(10,4))
>>> df2
          0         1         2         3
0 -0.247972 -0.764663  1.239467 -1.528499
1 -0.223106  1.004240 -1.043497  0.202809
2  0.950814 -1.006138 -0.184229 -0.083002
3  1.146779  1.361694 -0.841514 -0.300036
4 -1.719960  0.372381  0.609789  0.604840
5  0.398698 -0.970149 -1.904028 -0.064732
6 -1.206135 -1.365520  0.759790  0.331760
7 -2.112486 -1.878848 -1.827505  1.041786
8  0.190555  0.895478 -0.082711 -1.325795
9  1.194263 -0.642249  1.307098 -0.257796
>>> p1=df2[:3] #数据行拆分
>>> p1
          0         1         2         3
0 -0.247972 -0.764663  1.239467 -1.528499
1 -0.223106  1.004240 -1.043497  0.202809
2  0.950814 -1.006138 -0.184229 -0.083002
>>> p2=df2[3:7]
>>> p2
          0         1         2         3
3  1.146779  1.361694 -0.841514 -0.300036
4 -1.719960  0.372381  0.609789  0.604840
5  0.398698 -0.970149 -1.904028 -0.064732
6 -1.206135 -1.365520  0.759790  0.331760
>>> p3=df2[7:]
>>> p3
          0         1         2         3
7 -2.112486 -1.878848 -1.827505  1.041786
8  0.190555  0.895478 -0.082711 -1.325795
9  1.194263 -0.642249  1.307098 -0.257796
>>> df3=pd.concat([p1,p2,p3]) #数据行合并
>>> df3
          0         1         2         3
0 -0.247972 -0.764663  1.239467 -1.528499
1 -0.223106  1.004240 -1.043497  0.202809
2  0.950814 -1.006138 -0.184229 -0.083002
3  1.146779  1.361694 -0.841514 -0.300036
4 -1.719960  0.372381  0.609789  0.604840
5  0.398698 -0.970149 -1.904028 -0.064732
6 -1.206135 -1.365520  0.759790  0.331760
7 -2.112486 -1.878848 -1.827505  1.041786
8  0.190555  0.895478 -0.082711 -1.325795
9  1.194263 -0.642249  1.307098 -0.257796
>>> df2 == df3 #测试两个二维数据是否相等,返回True/False阵列
      0     1     2     3
0  True  True  True  True
1  True  True  True  True
2  True  True  True  True
3  True  True  True  True
4  True  True  True  True
5  True  True  True  True
6  True  True  True  True
7  True  True  True  True
8  True  True  True  True
9  True  True  True  True
>>> df4=pd.DataFrame({'A':[np.random.randint(1,5) for i in range(8)],'B':[np.random.randint(10,15) for i in range(8)],'C':[np.random.randint(20,30)for i in range(8)],'D':[np.random.randint(80,100)for i in range(8)]})
>>> df4
   A   B   C   D
0  4  10  21  85
1  4  10  27  93
2  3  14  25  95
3  2  10  20  82
4  2  12  22  81
5  4  14  20  90
6  3  12  27  96
7  1  14  25  99
>>> df4.groupby('A').sum() #数据分组计算
    B   C    D
A             
1  14  25   99
2  22  42  163
3  26  52  191
4  34  68  268
>>> df4.groupby(['A','B']).mean()
       C   D
A B         
1 14  25  99
2 10  20  82
  12  22  81
3 12  27  96
  14  25  95
4 10  24  89
  14  20  90
>>> 

matplotlib绘图

>>> import pandas as pd
>>> import numpy as np
>>> import matplotlib.pyplot as plt
>>> df=pd.DataFrame(np.random.randn(1000,2),columns=['B','C']).cumsum()
>>> df['A']=pd.Series(list(range(len(df))))
>>> plt.figure()
<Figure size 640x480 with 0 Axes>
>>> df.plot(x='A')
<AxesSubplot:xlabel='A'>
>>> plt.show()

在这里插入图片描述

>>> df=pd.DataFrame(np.random.rand(10,4),columns=['a','b','c','d'])
>>> df.plot(kind='bar')
<AxesSubplot:>
>>> plt.show()

在这里插入图片描述

>>> df.plot(kind='barh',stacked=True)
<AxesSubplot:>
>>> plt.show()

在这里插入图片描述

绘制正弦曲线

>>> import numpy as np
>>> import pylab as pl
>>> t=np.arange(0.0,2.0*np.pi,0.01) #生成数组,0~2π之间,以0.01为步长
>>> s=np.sin(t) #对数组中的所有元素求正弦值,得到新数组
>>> pl.plot(t,s) #画图,以t为横坐标,s为纵坐标
[<matplotlib.lines.Line2D object at 0x0000001DE0454B70>]
>>> pl.xlabel('x') #设置横坐标轴标签
Text(0.5, 0, 'x')
>>> pl.ylabel('y')
Text(0, 0.5, 'y')
>>> pl.title('sin') #设置图形标题
Text(0.5, 1.0, 'sin')
>>> pl.show()

在这里插入图片描述

>>> import numpy as np
>>> import pylab as pl
>>> a=np.arange(0,2.0*np.pi,0.1)
>>> b=np.cos(a)
>>> pl.scatter(a,b)
<matplotlib.collections.PathCollection object at 0x0000001DE094D7B8>
>>> pl.show()
>>> 

在这里插入图片描述

>>> import matplotlib.pylab as pl
>>> import numpy as np
>>> x=np.random.random(100)
>>> y=np.random.random(100)
>>> pl.scatter(x,y,s=x*500,c=u'r',marker=u'*') #s指大小,c指颜色,marker指符号形状
<matplotlib.collections.PathCollection object at 0x0000001DE3018438>
>>> pl.show()
>>> 

在这里插入图片描述

>>> import numpy as np
>>> import matplotlib.pyplot as plt
>>> labels='frogs','hogs','dogs','logs'
>>> sizes=[15,30,45,10]
>>> colors=['yellowgreen','gold','#FF0000','lightcoral']
>>> explode=(0,0.1,0,0.1) #使饼状图中第2片和第4片裂开
>>> fig=plt.figure()
>>> ax=fig.gca()
>>> ax.pie(np.random.random(4),explode=explode,labels=labels,colors=colors,autopct='%1.1f%%',shadow=True,startangle=90,radius=0.25,center=(0,0),frame=True)
>>> ax.pie(np.random.random(4),explode=explode,labels=labels,colors=colors,autopct='%1.1f%%',shadow=True,startangle=90,radius=0.25,center=(1,1),frame=True)
>>> ax.pie(np.random.random(4),explode=explode,labels=labels,colors=colors,autopct='%1.1f%%',shadow=True,startangle=90,radius=0.25,center=(0,1),frame=True)
>>> ax.pie(np.random.random(4),explode=explode,labels=labels,colors=colors,autopct='%1.1f%%',shadow=True,startangle=90,radius=0.25,center=(1,0),frame=True)
>>> ax.set_xticks([0,1]) #设置坐标轴刻度
>>> ax.set_yticks([0,1])
>>> ax.set_xticklabels(["Sunny","Cloudy"]) #设置坐标轴刻度上显示的刻度
>>> ax.set_yticklabels(["Dry","Rainy"])
>>> ax.set_xlim((-0.5,1.5)) #设置坐标轴跨度
>>> ax.set_ylim((-0.5,1.5))
>>> ax.set_aspect('equal')
>>> plt.show()

在这里插入图片描述

>>> import numpy as np
>>> import pylab as pl
>>> import matplotlib.font_manager as fm
>>> myfont=fm.FontProperties(fname=r'C:\Windows\Fonts\STKAITI.TTF')
>>> t=np.arange(0.0,2.0*np.pi,0.01) #自变量的取值范围
>>> s=np.sin(t) #计算正弦函数值
>>> z=np.cos(t) #计算余弦函数值
>>> pl.plot(t,s,label='正弦')
[<matplotlib.lines.Line2D object at 0x0000001DE3290C88>]
>>> pl.plot(t,z,label='余弦')
[<matplotlib.lines.Line2D object at 0x0000001DE3186588>]
>>> pl.xlabel('x-变量',fontproperties='STKAITI',fontsize=24) #设置x标签
Text(0.5, 0, 'x-变量')
>>> pl.xlabel('y-正弦余弦函数值',fontproperties='STKAITI',fontsize=24)
Text(0.5, 0, 'y-正弦余弦函数值')
>>> pl.title('sin-cos函数图像',fontproperties='STKAITI',fontsize=32) #图形标题
Text(0.5, 1.0, 'sin-cos函数图像')
>>> pl.legend(prop=myfont) #设置图例
<matplotlib.legend.Legend object at 0x0000001DE3290128>
>>> pl.show()

在这里插入图片描述

>>> import matplotlib.pyplot as plt
>>> x=np.linspace(0,2*np.pi,500)
>>> y=np.sin(x)
>>> z=np.cos(x*x)
>>> plt.figure(figsize=(8,5)) #标签前后加$将使用内嵌的latex引擎将其显示为公式
<Figure size 800x500 with 0 Axes>
>>> plt.plot(x,y,label='$sin(x)$',color='red',linewidth=2) #红色,2个像素宽
[<matplotlib.lines.Line2D object at 0x0000001DE4473B00>]
>>> plt.plot(x,z,'b--',label='$cos(x^2)$') #蓝色,虚线
[<matplotlib.lines.Line2D object at 0x0000001DE4473588>]
>>> plt.xlabel('Time(s)')
Text(0.5, 0, 'Time(s)')
>>> plt.ylabel('Volt')
Text(0, 0.5, 'Volt')
>>> plt.title('Sin and Cos figure using pyplot')
Text(0.5, 1.0, 'Sin and Cos figure using pyplot')
>>> plt.ylim(-1.2,1.2)
(-1.2, 1.2)
>>> plt.legend() #显示图列
<matplotlib.legend.Legend object at 0x0000001DE31989E8>
>>> plt.show()

在这里插入图片描述

>>> import numpy as np
>>> import matplotlib.pyplot as plt
>>> x=np.linspace(0,2*np.pi,500) #创建自变量数组
>>> y1=np.sin(x) #创建函数值数组
>>> y2=np.cos(x)
>>> y3=np.sin(x*x)
>>> plt.figure(1) #创建图形
<Figure size 640x480 with 0 Axes>
>>> ax1=plt.subplot(2,2,1) #第一行第一列图形
>>> ax2=plt.subplot(2,2,2) #第一行第二列图形
>>> ax3=plt.subplot(2,1,2) #第二行
>>> plt.sca(ax1) #选择ax1
>>> plt.plot(x,y1,color='red') #绘制红色曲线
[<matplotlib.lines.Line2D object at 0x0000001DE3170358>]
>>> plt.ylim(-1.2,1.2) #限制y坐标轴的范围
(-1.2, 1.2)
>>> plt.sca(ax2) #选择ax2
>>> plt.plot(x,y2,'b--') #绘制蓝色曲线
[<matplotlib.lines.Line2D object at 0x0000001DE0ACF860>]
>>> plt.ylim(-1.2,1.2)
(-1.2, 1.2)
>>> plt.sca(ax3) #选择ax3
>>> plt.plot(x,y3,'g--')
[<matplotlib.lines.Line2D object at 0x0000001DE4333550>]
>>> plt.ylim(-1.2,1.2)
(-1.2, 1.2)
>>> plt.show()
>>> 

在这里插入图片描述

绘制三维参数曲线

>>> import matplotlib as mp1
>>> from mpl_toolkits.mplot3d import Axes3D
>>> import numpy as np
>>> import matplotlib.pyplot as plt
>>> mp1.rcParams['legend.fontsize']=10 #图例字号
>>> fig=plt.figure()
>>> ax=fig.gca(projection='3d') #三维图形
>>> theta=np.linspace(-4*np.pi,4*np.pi,100)
>>> z=np.linspace(-4,4,100)*0.3 #测试数据
>>> r=z**3+1
>>> x=r*np.sin(theta)
>>> y=r*np.cos(theta)
>>> ax.plot(x,y,z,label='parametric curve')
[<mpl_toolkits.mplot3d.art3d.Line3D object at 0x0000001DE0454F98>]
>>> ax.legend()
<matplotlib.legend.Legend object at 0x0000001DE0454BA8>
>>> plt.show()

在这里插入图片描述

>>> import numpy as np
>>> import matplotlib.pyplot as plt
>>> import mpl_toolkits.mplot3d
>>> x,y=np.mgrid[-2:2:20j,-2:2:20j]
>>> z=50*np.sin(x+y) #测试数据
>>> ax=plt.subplot(111,projection='3d') #三维图形
>>> ax.plot_surface(x,y,z,rstride=2,cstride=1,cmap=plt.cm.Blues_r)
<mpl_toolkits.mplot3d.art3d.Poly3DCollection object at 0x0000001DE0AB3B38>
>>> ax.set_xlabel('X')
Text(0.5, 0, 'X')
>>> ax.set_ylabel('Y')
Text(0.5, 0, 'Y')
>>> ax.set_zlabel('Z')
Text(0.5, 0, 'Z')
>>> plt.show()
>>> 

在这里插入图片描述

>>> import numpy as np
>>> import matplotlib.pyplot as plt
>>> import mpl_toolkits.mplot3d
>>> rho,theta=np.mgrid[0:1:40j,0:2*np.pi:40j]
>>> z=rho**2
>>> x=rho*np.cos(theta)
>>> y=rho*np.sin(theta)
>>> ax=pl.subplot(111,projection='3d')
>>> ax.plot_surface(x,y,z)
<mpl_toolkits.mplot3d.art3d.Poly3DCollection object at 0x0000001DE0AC8828>
>>> pl.show()

在这里插入图片描述

文件读写

>>> df=pd.DataFrame({'A':[np.random.randint(1,100)for i in range(4)],'B':pd.date_range(start='20130101',periods=4,freq='D'),'C':pd.Series([1,2,3,4],index=['zhang','li','zhou','wang'],dtype='float32'),'D':np.array([3]*4,dtype='int32'),'E':pd.Categorical(["test","train","test","train"]),'F':'foo'})
>>> df
        A          B    C  D      E    F
zhang   4 2013-01-01  1.0  3   test  foo
li     48 2013-01-02  2.0  3  train  foo
zhou   44 2013-01-03  3.0  3   test  foo
wang   88 2013-01-04  4.0  3  train  foo
>>> df.to_excel('C:\\Users\\***\\Desktop\\test.xlsx',sheet_name='dfg') #将数据保存为Excel文件

在这里插入图片描述

创建词云

  • 扩展库wordcloud可以用来制作词云
  • pillow库提供了图像处理功能,可以结合两者创建词云头像
>>> import random
>>> import string
>>> import wordcloud
>>> def show(s):
	#创建wordcloud对象
	wc=wordcloud.WordCloud(
		r'C:\\windows\\fonts\\simfang.ttf',width=500,height=400,background_color='white',font_step=3,random_state=False,prefer_horizontal=0.9)
	#创建并显示词云
	t=wc.generate(s)
	t.to_image().save('C:\\Users\\***\\Desktop\\t.png')

	
>>> show('''hello world 董付国 董付国 董付国 abc fgh yhnbgfd 董付国 董付国 董付国董付国 Pyhton great Python Python''')

在这里插入图片描述

>>> import string
>>> import random
>>> from PIL import Image
>>> import wordcloud
>>> def create(imgFile,s):
	im=Image.open(imgFile)
	w,h=im.size
	#创建wordcloud对象
	wc=wordcloud.WordCloud(r'C:\\windows\\fonts\\simfang.ttf',width=w,height=h,background_color='white',font_step=3,random_state=False,prefer_horizontal=0.9)
	#创建并显示词云
	t=wc.generate(s)
	t=t.to_image()
	for w1 in range(w):
		for h1 in range(h):
			if im.getpixel((w1,h1))[:3] == (255,255,255):
				t.putpixel((w1,h1),(255,255,255))
	t.save('C:\\Users\\***\\Desktop\\result.png')

	
>>> chs=string.ascii_letters+string.digits+string.punctuation
>>> s=[''.join((random.choice(chs) for i in range(8))) for j in range(650)]
>>> s=''.join(s)
>>> create('C:\\Users\\***\\Desktop\\1.jpg',s)

在这里插入图片描述
在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值