pandas学习详细教程

最新推荐文章于 2023-02-14 22:45:14 发布

菇綡

最新推荐文章于 2023-02-14 22:45:14 发布

阅读量1k

点赞数 5

分类专栏： pandas 文章标签： pandas DataFrame Series

本文链接：https://blog.csdn.net/weixin_39667003/article/details/85803020

版权

pandas 专栏收录该内容

3 篇文章 0 订阅

订阅专栏

pandas基础篇

我们打算从以下几个方面进行代码练习：

**  创建Series
**  Series基本操作
**  创建DataFrame
**  DataFrame基本操作
**  DataFrame文件操作
**  Series，DataFrame和多索引
**  透视表
**  数据清洗
**  数据预处理
**  可视化

创建 Series 数据类型：

1）从列表创建 Series

import pandas as pd
print(pd.__version__)
arr=[0,1,2,3,4,5]
s1=pd.Series(arr)
print(s1)
'''
结果：如果不指定索引，默认从0开始
0    0
1    1
2    2
3    3
4    4
5    5
'''

2）从 Ndarray 创建 Series

import numpy as np
n=np.random.rand(5)
index=['a','b','c','d','e']
s2=pd.Series(n,index=index)
print(s2)

3）从字典创建 Series

d={'a':1,'b':2,'c':3,'d':4,'e':5}
s3=pd.Series(d)
print(s3)

Series基本操作

#修改 Series 索引
print(s1)
s1.index=['A','B','C','D','E','F']
print(s1)

#Series 纵向拼接
s4=s3.append(s1)
print(s4)

#Series 按指定索引删除元素
L=['A','B','C','D','E']
s5=pd.Series(L)
print(s5)
s5=s5.drop(3) #删除索引为3的值
print(s5)
#结果
'''
dtype: int64
0    A
1    B
2    C
3    D
4    E
dtype: object
0    A
1    B
2    C
4    E
dtype: object
'''

#Series 修改指定索引元素
L=['A','B','C','D','E']
s5=pd.Series(L)
print(s5)
s5[3]=8
print(s5)
"""
dtype: object
0    A
1    B
2    C
3    D
4    E
dtype: object
0    A
1    B
2    C
3    8
4    E
"""

#Series 按指定索引查找元素
print(s5[4])
'''
结果：E
'''

#Series 切片操作，例如对s5的前 3 个数据访问
print(s5[:3])
'''
0    A
1    B
2    C
dtype: object
'''

#Series 加法运算,Series 的加法运算是按照索引计算，如果索引不同则填充为 NaN（空值）。
L=['A','B','C']
#L=[1,2,3,4,6]
s5=pd.Series(L)
L1=['A','C','D']
#L1=[2,3,4,5,7]
s6=pd.Series(L1)
#print(s5)
#print(s6)
#print(s5.add(s6))
#print(s5.sub(s6))
#print(s5.mul(s6))
#print(s5.div(s6))
'''
0    AA
1    BC
2    CD
dtype: object

0     3
1     5
2     7
3     9
4    13
dtype: int64
'''
#print(s5.median())#求中位数
L=[1,2,3,4,5]
s5=pd.Series(L)
print(s5)
print(s5.sum())#求和
print(s5.max())#求最大值
print(s5.min())#求最小值
'''
0    1
1    2
2    3
3    4
4    5
dtype: int64
15
5
1
'''

创建DataFrame

#创建DataFrame数据类型，与 Sereis 不同，DataFrame 可以存在多列数据。
#通过 NumPy 数组创建 DataFrame
datas=pd.date_range('today',periods=6)
num_arr=np.random.rand(6,4)
columns=['A','B','C','D']
df1=pd.DataFrame(num_arr,index=datas,columns=columns)
print(df1)
'''
                                   A         B         C         D
2018-10-23 10:01:57.288209  0.318049  0.648244  0.824381  0.991212
2018-10-24 10:01:57.288209  0.749709  0.450722  0.421013  0.640053
2018-10-25 10:01:57.288209  0.091959  0.307871  0.833770  0.567152
2018-10-26 10:01:57.288209  0.481489  0.932971  0.085914  0.689568
2018-10-27 10:01:57.288209  0.071565  0.897287  0.062263  0.627351
2018-10-28 10:01:57.288209  0.594281  0.856175  0.110247  0.172222
'''

#通过字典数组创建 DataFrame
data={'animal':['cat', 'cat', 'snake', 'dog', 'dog', 'cat', 'snake', 'cat', 'dog', 'dog'],
      'age':[2.5, 3, 0.5, np.nan, 5, 2, 4.5, np.nan, 7, 3],
      'visits':[1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
      'priority':['yes', 'yes', 'no', 'yes', 'no', 'no', 'no', 'yes', 'no', 'no']}
labels=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
df2=pd.DataFrame(data,index=labels)
print(df2)
'''
  animal  age  visits priority
a    cat  2.5       1      yes
b    cat  3.0       3      yes
c  snake  0.5       2       no
d    dog  NaN       3      yes
e    dog  5.0       2       no
f    cat  2.0       3       no
g  snake  4.5       1       no
h    cat  NaN       1      yes
i    dog  7.0       2       no
j    dog  3.0       1       no
'''
# 查看 DataFrame 的数据类型
print(df2.dtypes)
'''
animal       object
age         float64
visits        int64
priority     object
'''

DataFrame基本操作

#预览 DataFrame 的前 5 行数据,此方法对快速了解陌生数据集结构十分有用
print(df2.head(6))#默认为前五行
'''
   animal  age  visits priority
a    cat  2.5       1      yes
b    cat  3.0       3      yes
c  snake  0.5       2       no
d    dog  NaN       3      yes
e    dog  5.0       2       no
f    cat  2.0       3       no
'''
print(df2.tail(3))#查看后三行
'''
animal  age  visits priority
h    cat  NaN       1      yes
i    dog  7.0       2       no
j    dog  3.0       1       no
'''

#查看 DataFrame 的索引
print(df2.index)
#Index(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'], dtype='object')

#查看 DataFrame 的列名
print(df2.columns) #Index(['animal', 'age', 'visits', 'priority'], dtype='object')

#查看 DataFrame 的数值
print(df2.values)
'''
[['cat' 2.5 1 'yes']
 ['cat' 3.0 3 'yes']
 ['snake' 0.5 2 'no']
 ['dog' nan 3 'yes']
 ['dog' 5.0 2 'no']
 ['cat' 2.0 3 'no']
 ['snake' 4.5 1 'no']
 ['cat' nan 1 'yes']
 ['dog' 7.0 2 'no']
 ['dog' 3.0 1 'no']]
'''

#查看 DataFrame 的统计数据
print(df2.describe())
'''
            age     visits
count  8.000000  10.000000
mean   3.437500   1.900000
std    2.007797   0.875595
min    0.500000   1.000000
25%    2.375000   1.000000
50%    3.000000   2.000000
75%    4.625000   2.750000
max    7.000000   3.000000
'''

#DataFrame 转置操作
print(df2.T)
'''
            a    b      c    d    e    f      g    h    i    j
animal    cat  cat  snake  dog  dog  cat  snake  cat  dog  dog
age       2.5    3    0.5  NaN    5    2    4.5  NaN    7    3
visits      1    3      2    3    2    3      1    1    2    1
priority  yes  yes     no  yes   no   no     no  yes   no   no
'''

#对 DataFrame 进行按列排序
print(df2.sort_values(by='age')) #按照age升序排列
'''
  animal  age  visits priority
c  snake  0.5       2       no
f    cat  2.0       3       no
a    cat  2.5       1      yes
b    cat  3.0       3      yes
j    dog  3.0       1       no
g  snake  4.5       1       no
e    dog  5.0       2       no
i    dog  7.0       2       no
d    dog  NaN       3      yes
h    cat  NaN       1      yes
'''

#对 DataFrame 数据切片
print(df2[1:3])
'''
  animal  age  visits priority
b    cat  3.0       3      yes
c  snake  0.5       2       no
'''

# 对 DataFrame 通过标签查询（单列）
print(df2['age'])
'''
a    2.5
b    3.0
c    0.5
d    NaN
e    5.0
f    2.0
g    4.5
h    NaN
i    7.0
j    3.0
Name: age, dtype: float64
'''

#对 DataFrame 通过标签查询（多列）
print(df2[['age','animal']])
'''
   age animal
a  2.5    cat
b  3.0    cat
c  0.5  snake
d  NaN    dog
e  5.0    dog
f  2.0    cat
g  4.5  snake
h  NaN    cat
i  7.0    dog
j  3.0    dog
'''
#对 DataFrame 通过位置查询
print(df2.iloc[1:3])
'''
  animal  age  visits priority
b    cat  3.0       3      yes
c  snake  0.5       2       no
'''

#DataFrame 副本拷贝,生成 DataFrame 副本，方便数据集被多个不同流程使用
df3=df2.copy()
print(df3)

#判断 DataFrame 元素是否为空
print(df3.isnull())
'''
   animal    age  visits  priority
a   False  False   False     False
b   False  False   False     False
c   False  False   False     False
d   False   True   False     False
e   False  False   False     False
f   False  False   False     False
g   False  False   False     False
h   False   True   False     False
i   False  False   False     False
j   False  False   False     False
'''

#添加列数据
num=pd.Series([0,1,2,3,4,5,6,7,8,9],index=df3.index)
df3['No.']=num
print(df3)
'''
  animal  age  visits priority  No.
a    cat  2.5       1      yes    0
b    cat  3.0       3      yes    1
c  snake  0.5       2       no    2
d    dog  NaN       3      yes    3
e    dog  5.0       2       no    4
f    cat  2.0       3       no    5
g  snake  4.5       1       no    6
h    cat  NaN       1      yes    7
i    dog  7.0       2       no    8
j    dog  3.0       1       no    9
'''

#根据 DataFrame 的下标值进行更改。
df3.iat[1,1]=4
print(df3)
'''
  animal  age  visits priority  No.
a    cat  2.5       1      yes    0
b    cat  4.0       3      yes    1
c  snake  0.5       2       no    2
d    dog  NaN       3      yes    3
e    dog  5.0       2       no    4
f    cat  2.0       3       no    5
g  snake  4.5       1       no    6
h    cat  NaN       1      yes    7
i    dog  7.0       2       no    8
j    dog  3.0       1       no    9
'''

#根据 DataFrame 的标签对数据进行修改
df3.loc['f','age']=1.5
print(df3)
'''
  animal  age  visits priority  No.
a    cat  2.5       1      yes    0
b    cat  4.0       3      yes    1
c  snake  0.5       2       no    2
d    dog  NaN       3      yes    3
e    dog  5.0       2       no    4
f    cat  1.5       3       no    5
g  snake  4.5       1       no    6
h    cat  NaN       1      yes    7
i    dog  7.0       2       no    8
j    dog  3.0       1       no    9
'''

#DataFrame 求平均值操作
print(df3.mean())
'''
age       3.5
visits    1.9
No.       4.5
dtype: float64
'''

#对 DataFrame 中任意列做求和操作
print(df3['visits'].sum()) #结果：19
#-----------------字符串操作----------------------------
# 将字符串转化为小写字母
string=pd.Series(['A', 'B', 'C', 'Aaba', 'Baca',
                    np.nan, 'CABA', 'dog', 'cat'])
print(string)
print(string.str.lower())
'''
0       A
1       B
2       C
3    Aaba
4    Baca
5     NaN
6    CABA
7     dog
8     cat
dtype: object
0       a
1       b
2       c
3    aaba
4    baca
5     NaN
6    caba
7     dog
8     cat
dtype: object
'''
print(string.str.upper())#转换为大写

#---------------------DataFrame 缺失值操作-------------------
#对缺失值进行填充
df4=df3.copy()
print(df4)
print(df4.fillna(value=3))
'''
  animal  age  visits priority  No.
a    cat  2.5       1      yes    0
b    cat  4.0       3      yes    1
c  snake  0.5       2       no    2
d    dog  NaN       3      yes    3
e    dog  5.0       2       no    4
f    cat  1.5       3       no    5
g  snake  4.5       1       no    6
h    cat  NaN       1      yes    7
i    dog  7.0       2       no    8
j    dog  3.0       1       no    9
  animal  age  visits priority  No.
a    cat  2.5       1      yes    0
b    cat  4.0       3      yes    1
c  snake  0.5       2       no    2
d    dog  3.0       3      yes    3
e    dog  5.0       2       no    4
f    cat  1.5       3       no    5
g  snake  4.5       1       no    6
h    cat  3.0       1      yes    7
i    dog  7.0       2       no    8
j    dog  3.0       1       no    9
'''

#删除存在缺失值的行
df5=df3.copy()
print(df5.dropna(how='any')) ## 任何存在 NaN 的行都将被删除
'''
  animal  age  visits priority  No.
a    cat  2.5       1      yes    0
b    cat  4.0       3      yes    1
c  snake  0.5       2       no    2
e    dog  5.0       2       no    4
f    cat  1.5       3       no    5
g  snake  4.5       1       no    6
i    dog  7.0       2       no    8
j    dog  3.0       1       no    9
'''

#DataFrame 按指定列对齐
left=pd.DataFrame({'key':['foo1','foo2','foo3'],'one':[1,2,3]})
right=pd.DataFrame({'key':['foo2','foo3','foo4'],'one':[4,5,6]})
print(left)
print(right)
print(pd.merge(left,right,on='key'))
'''
    key  one
0  foo1    1
1  foo2    2
2  foo3    3
    key  one
0  foo2    4
1  foo3    5
2  foo4    6
    key  one_x  one_y
0  foo2      2      4
1  foo3      3      5
'''

DataFrame 文件操作

#CSV文件写入
print(df3.to_csv('animal.csv'))
print("写入成功")
#CSV 文件读取
df_animal=pd.read_csv('animal.csv')
print(df_animal)
'''
写入成功
  Unnamed: 0 animal  age  visits priority  No.
0          a    cat  2.5       1      yes    0
1          b    cat  4.0       3      yes    1
2          c  snake  0.5       2       no    2
3          d    dog  NaN       3      yes    3
4          e    dog  5.0       2       no    4
5          f    cat  1.5       3       no    5
6          g  snake  4.5       1       no    6
7          h    cat  NaN       1      yes    7
8          i    dog  7.0       2       no    8
9          j    dog  3.0       1       no    9

'''

# Excel 写入操作
df3.to_excel('animal.xlsx', sheet_name='Sheet1')
print("写入成功.")
#Excel 读取操作
print(pd.read_excel('animal.xlsx', 'Sheet1', index_col=None, na_values=['NA']))

pandas进阶篇

时间序列索引

dti=pd.date_range(start='2018-01-01',end='2018-12-31',freq='D')
s=pd.Series(np.random.rand(len(dti)),index=dti)
print(s)
'''
2018-01-01    0.819138
2018-01-02    0.698994
2018-01-03    0.535303
2018-01-04    0.201711
2018-01-05    0.614942
2018-01-06    0.046608
2018-01-07    0.236259
2018-01-08    0.575685
2018-01-09    0.647902
2018-01-10    0.154899
2018-01-11    0.104855
2018-01-12    0.658812
2018-01-13    0.840483
2018-01-14    0.404469
2018-01-15    0.919582
2018-01-16    0.000066
2018-01-17    0.267015
2018-01-18    0.059417
2018-01-19    0.115044
2018-01-20    0.919654
2018-01-21    0.536861
2018-01-22    0.514372
2018-01-23    0.167115
2018-01-24    0.812080
2018-01-25    0.689845
2018-01-26    0.651800
2018-01-27    0.523809
2018-01-28    0.484807
2018-01-29    0.110739
2018-01-30    0.401936
                ...   
2018-12-02    0.254287
2018-12-03    0.038438
2018-12-04    0.868603
2018-12-05    0.320990
2018-12-06    0.630131
2018-12-07    0.012450
2018-12-08    0.538951
2018-12-09    0.303097
2018-12-10    0.330134
2018-12-11    0.109834
2018-12-12    0.828355
2018-12-13    0.079942
2018-12-14    0.596775
2018-12-15    0.936687
2018-12-16    0.944666
2018-12-17    0.510053
2018-12-18    0.506758
2018-12-19    0.019668
2018-12-20    0.260052
2018-12-21    0.813267
2018-12-22    0.581939
2018-12-23    0.058081
2018-12-24    0.268361
2018-12-25    0.992682
2018-12-26    0.167770
2018-12-27    0.253564
2018-12-28    0.845280
2018-12-29    0.744601
2018-12-30    0.262743
2018-12-31    0.415526
Freq: D, Length: 365, dtype: float64
'''

#统计s中每一个周三对应值的和
print(s[s.index.weekday==2].sum()) #23.771697397833957

#统计s中每个月值的平均值
print(s.resample('M').mean())
'''
2018-01-31    0.559565
2018-02-28    0.515923
2018-03-31    0.599616
2018-04-30    0.465701
2018-05-31    0.613371
2018-06-30    0.477651
2018-07-31    0.510719
2018-08-31    0.495345
2018-09-30    0.583058
2018-10-31    0.584103
2018-11-30    0.599802
2018-12-31    0.562433
Freq: M, dtype: float64
'''

#UTC世界时间标准
s=pd.date_range('today',periods=1,freq='D')
ts=pd.Series(np.random.rand(len(s)),s)
ts_utc=ts.tz_localize('UTC')#转换为UTC时间
print(ts_utc)
'''
2018-10-23 14:24:42.755209+00:00    0.088392
Freq: D, dtype: float64
'''
#转换为上海所在的时区
ts_shanghai=ts_utc.tz_convert('Asia/Shanghai')
print(ts_shanghai)
'''
2018-10-23 22:28:33.716209+08:00    0.237161
Freq: D, dtype: float64
'''

#不同时间表示方式的转换
rng=pd.date_range('1/1/2018',periods=5,freq='M')
ts=pd.Series(np.random.rand(len(rng)),index=rng)
print(ts)
ps=ts.to_period()
print(ps)
print(ps.to_timestamp)
'''
2018-01-31    0.794304
2018-02-28    0.165253
2018-03-31    0.231160
2018-04-30    0.296905
2018-05-31    0.847177
Freq: M, dtype: float64
2018-01    0.794304
2018-02    0.165253
2018-03    0.231160
2018-04    0.296905
2018-05    0.847177
Freq: M, dtype: float64
<bound method Series.to_timestamp of 2018-01    0.794304
2018-02    0.165253
2018-03    0.231160
2018-04    0.296905
2018-05    0.847177
Freq: M, dtype: float64>
'''

创建多重索引

#构建一个letters=['A','B','C']和numbers=liat(range(10))为索引，值为随机数的多重索引Series
letters=['A','B','C']
numbers=list(range(10))
mi=pd.MultiIndex.from_product([letters,numbers])
s=pd.Series(np.random.rand(30),index=mi)
print(s)
'''
A  0    0.293706
   1    0.887773
   2    0.856729
   3    0.435714
   4    0.997529
   5    0.732427
   6    0.166934
   7    0.717487
   8    0.389486
   9    0.679912
B  0    0.149687
   1    0.805469
   2    0.238372
   3    0.908750
   4    0.025010
   5    0.109468
   6    0.909549
   7    0.132050
   8    0.084095
   9    0.186603
C  0    0.536121
   1    0.191505
   2    0.287627
   3    0.729340
   4    0.089618
   5    0.265803
   6    0.027589
   7    0.056560
   8    0.174068
   9    0.021970
dtype: float64
'''
#多重索引Series查询
#查询索引为1,3,6的值
print(s.loc[:,[1,3,6]])
'''
A  1    0.705519
   3    0.733832
   6    0.311256
B  1    0.168374
   3    0.204631
   6    0.453191
C  1    0.935524
   3    0.306979
   6    0.597759
dtype: float64
'''
#多重索引切片
print(s.loc[pd.IndexSlice[:'B',5:]])
"""
A  5    0.745123
   6    0.316538
   7    0.647127
   8    0.152854
   9    0.726968
B  5    0.469216
   6    0.586324
   7    0.185740
   8    0.704940
   9    0.540358
dtype: float64
"""
#根据多重索引创建 DataFrame
#创建一个以 letters = ['A', 'B'] 和 numbers = list(range(6))为索引，值为随机数据的多重索引 DataFrame。
frame=pd.DataFrame(np.arange(12).reshape(6,2),index=[list('AAABBB'),list('123123')],columns=['hello','shiyanlou'])
print(frame)
'''
     hello  shiyanlou
A 1      0          1
  2      2          3
  3      4          5
B 1      6          7
  2      8          9
  3     10         11
'''
frame.index.names=['first','second']
print(frame)
'''
              hello  shiyanlou
first second                  
A     1           0          1
      2           2          3
      3           4          5
B     1           6          7
      2           8          9
      3          10         11
'''

print(frame.groupby('first').sum())
'''
       hello  shiyanlou
first                  
A          6          9
B         24         27
'''

print(frame.groupby('second').sum())
'''
 hello  shiyanlou
second                  
1           6          8
2          10         12
3          14         16
'''

# DataFrame 行列名称转换
print(frame)
print(frame.stack())
'''
first  second           
A      1       hello         0
               shiyanlou     1
       2       hello         2
               shiyanlou     3
       3       hello         4
               shiyanlou     5
B      1       hello         6
               shiyanlou     7
       2       hello         8
               shiyanlou     9
       3       hello        10
               shiyanlou    11
dtype: int32
'''

#DataFrame 索引转换
print(frame.unstack())
'''
       hello        shiyanlou       
second     1  2   3         1  2   3
first                               
A          0  2   4         1  3   5
B          6  8  10         7  9  11
'''

#DataFrame 条件查找

data = {'animal': ['cat', 'cat', 'snake', 'dog', 'dog', 'cat', 'snake', 'cat', 'dog', 'dog'],
        'age': [2.5, 3, 0.5, np.nan, 5, 2, 4.5, np.nan, 7, 3],
        'visits': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
        'priority': ['yes', 'yes', 'no', 'yes', 'no', 'no', 'no', 'yes', 'no', 'no']}

labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
df=pd.DataFrame(data,index=labels)
print(df)
'''
  animal  age  visits priority
a    cat  2.5       1      yes
b    cat  3.0       3      yes
c  snake  0.5       2       no
d    dog  NaN       3      yes
e    dog  5.0       2       no
f    cat  2.0       3       no
g  snake  4.5       1       no
h    cat  NaN       1      yes
i    dog  7.0       2       no
j    dog  3.0       1       no
'''
#查找 age 大于 3 的全部信息
print(df[df['age']>3])
'''
  animal  age  visits priority
e    dog  5.0       2       no
g  snake  4.5       1       no
i    dog  7.0       2       no
'''
#根据行列索引切片
print(df.iloc[2:4,1:3])
'''
   age  visits
c  0.5       2
d  NaN       3
'''
#查找 age<3 且为 cat 的全部数据。
print(df[(df['animal']=='cat')&(df['age']<3)])
'''
  animal  age  visits priority
a    cat  2.5       1      yes
f    cat  2.0       3       no
'''

print(df[df['animal'].isin(['cat','dog'])])
'''
  animal  age  visits priority
a    cat  2.5       1      yes
b    cat  3.0       3      yes
d    dog  NaN       3      yes
e    dog  5.0       2       no
f    cat  2.0       3       no
h    cat  NaN       1      yes
i    dog  7.0       2       no
j    dog  3.0       1       no
'''
#DataFrame 按标签及列名查询
print(df.loc[df.index[[3,4,8]],['animal','age']])
'''
  animal  age
d    dog  NaN
e    dog  5.0
i    dog  7.0
'''
#按照 age 降序，visits 升序排列
print(df.sort_values(by=['age','visits'],ascending=[False,True]))

#将 priority 列的 yes 值替换为 True，no 值替换为 False。
print(df['priority'].map({'yes':True,'no':False}))
'''
a     True
b     True
c    False
d     True
e    False
f    False
g    False
h     True
i    False
j    False
'''

print(df.groupby('animal').sum())
'''
animal              
cat      7.5       8
dog     15.0       8
snake    5.0       3
'''

#使用列表拼接多个 DataFrame
temp_df1=pd.DataFrame(np.random.randn(5,4))
temp_df2=pd.DataFrame(np.random.randn(5,4))
temp_df3=pd.DataFrame(np.random.randn(5,4))
print(temp_df1)
print(temp_df2)
print(temp_df3)
pieces=[temp_df1,temp_df2,temp_df3]
print(pd.concat(pieces))

#找出 DataFrame 表中和最小的列
df=pd.DataFrame(np.random.random(size=(5,10)),columns=list('abcdefghij'))
print(df)
print(df.sum().idxmin()) #[5 rows x 10 columns]  b

#DataFrame 中每个元素减去每一行的平均值
df = pd.DataFrame(np.random.random(size=(5, 3)))
print(df)
print(df.sub(df.mean(axis=1), axis=0))

菇綡

关注

5
点赞
踩
26

收藏

觉得还不错? 一键收藏
0
评论
pandas学习详细教程

pandas基础篇我们打算从以下几个方面进行代码练习：** 创建Series** Series基本操作** 创建DataFrame** DataFrame基本操作** DataFrame文件操作** Series，DataFrame和多索引** 透视表** 数据清洗** 数据预处理** 可视化创建 Series 数据类型：1）从列表创建 S...
复制链接

扫一扫