【Python数据处理】Pandas_关于series数据位于索引的左侧-CSDN博客

本文链接：https://blog.csdn.net/bajiaoyu517/article/details/113542302

Pandas相当于字典型的Numpy，可以给矩阵的下标命名。

1 Pandas安装

pip3 install pandas

2 主要数据结构

2.1 Series（带索引的一维数组）

Series的字符串表现形式为：索引在左边，值在右边。由于我们没有为数据指定索引。于是会自动创建一个0到N-1（N为长度）的整数型索引。

import numpy as np
import pandas as pd

#np.nan即null，类似numpy一维数组
s=pd.Series([1,3,6,np.nan,44,1])
print(s)
'''
0     1.0
1     3.0
2     6.0
3     NaN
4    44.0
5     1.0
dtype: float64
'''

2.2 DataFrame（带索引的二维数组）

dates=pd.date_range('20210131',periods=6)
#print(dates)#打印日期

df=pd.DataFrame(np.random.randn(6,4),index=dates,columns=['a','b','c','d'])
print(df)
'''
                   a         b         c         d
2021-01-31 -0.040826  0.551323 -0.691311 -0.426433
2021-02-01  0.683486  1.045233  0.013433  0.193675
2021-02-02  0.596419  0.351275 -0.455355 -0.152276
2021-02-03 -3.077742  0.219348 -0.311051 -0.583136
2021-02-04  0.441740 -1.472139  1.324866 -0.025587
2021-02-05  0.471326  0.607313  0.728324  0.436690
'''
print(df['b'])
'''
2021-01-31    0.551323
2021-02-01    1.045233
2021-02-02    0.351275
2021-02-03    0.219348
2021-02-04   -1.472139
2021-02-05    0.607313
Freq: D, Name: b, dtype: float64
'''

#创建一组没有给定标签的df
df1=pd.DataFrame(np.arange(12).reshape((3,4)))
#print(df1)
'''
   0  1   2   3
0  0  1   2   3
1  4  5   6   7
2  8  9  10  11
'''

#创建一组给定列标签的df
df2=pd.DataFrame({
    'a':1,
    'b':pd.Timestamp('20210202'),
    'c':pd.Series(np.array([1,3,5,7]),index=list(range(4)),dtype='int32'),
    'd':np.array([3]*4,dtype='int32'),
    'e':pd.Categorical(["are","you","ok","thank you"])
})
print(df2)
'''
   a          b  c  d          e
0  1 2021-02-02  1  3        are
1  1 2021-02-02  3  3        you
2  1 2021-02-02  5  3         ok
3  1 2021-02-02  7  3  thank you
'''

import numpy as np
import pandas as pd

#创建一组给定列标签的df
df2=pd.DataFrame({
    'a':1,
    'b':pd.Timestamp('20210202'),
    'c':pd.Series(np.array([1,3,5,7]),index=list(range(4)),dtype='int32'),
    'd':np.array([3]*4,dtype='int32'),
    'e':pd.Categorical(["are","you","ok","thank you"])
})
print(df2)
'''
   a          b  c  d          e
0  1 2021-02-02  1  3        are
1  1 2021-02-02  3  3        you
2  1 2021-02-02  5  3         ok
3  1 2021-02-02  7  3  thank you
'''
#查看每列的类型
print(df2.dtypes)
'''
a             int64
b    datetime64[ns]
c             int32
d             int32
e          category
dtype: object
'''
#查看行下标
print(df2.index)
'''
Int64Index([0, 1, 2, 3], dtype='int64')
'''
#查看列下标
print(df2.columns)
'''
Index(['a', 'b', 'c', 'd', 'e'], dtype='object')
'''
#只查看df2的值
print(df2.values)
'''
[[1 Timestamp('2021-02-02 00:00:00') 1 3 'are']
 [1 Timestamp('2021-02-02 00:00:00') 3 3 'you']
 [1 Timestamp('2021-02-02 00:00:00') 5 3 'ok']
 [1 Timestamp('2021-02-02 00:00:00') 7 3 'thank you']]
 '''
#查看数据总结
print(df2.describe())
'''
         a         c    d
count  4.0  4.000000  4.0
mean   1.0  4.000000  3.0
std    0.0  2.581989  0.0
min    1.0  1.000000  3.0
25%    1.0  2.500000  3.0
50%    1.0  4.000000  3.0
75%    1.0  5.500000  3.0
max    1.0  7.000000  3.0
'''
#翻转数据
print(df2.T)
'''
                     0  ...                    3
a                    1  ...                    1
b  2021-02-02 00:00:00  ...  2021-02-02 00:00:00
c                    1  ...                    7
d                    3  ...                    3
e                  are  ...            thank you
[5 rows x 4 columns]
'''
#对数据index排序并输出
print(df2.sort_index(axis=1,ascending=False))
'''
           e  d  c          b  a
0        are  3  1 2021-02-02  1
1        you  3  3 2021-02-02  1
2         ok  3  5 2021-02-02  1
3  thank you  3  7 2021-02-02  1
'''
#对数据的值排序
print(df2.sort_values(by='c',axis=0,ascending=False))
'''
   a          b  c  d          e
3  1 2021-02-02  7  3  thank you
2  1 2021-02-02  5  3         ok
1  1 2021-02-02  3  3        you
0  1 2021-02-02  1  3        are
'''