一、Series
创建
#1创建一个序列,索引必须为列表
b=pd.Series(np.random.randn(5),index=['a','b','c','d','e'])
print(b)
#2查看索引
print(b.index)
#3默认不指定索引时,为0开始排的整型索引即可
c=pd.Series(np.random.randn(5))
print(c)
结果:
D:\ProgramData\Anaconda3\python.exe D:/numpy-kexue/03.py
a 0.168710
b 0.436219
c -0.236762
d 0.426008
e -0.951852
dtype: float64
Index(['a', 'b', 'c', 'd', 'e'], dtype='object')
0 1.506954
1 0.282373
2 0.963675
3 0.671374
4 -0.936167
dtype: float64
Process finished with exit code 0
#1通过字典创建一个序列
d={
'a':0.,'b':1.,'d':3.}
b=pd.Series(d,index=list('abcd'))#index代表行索引
print(b)
结果:
D:\ProgramData\Anaconda3\python.exe D:/numpy-kexue/03.py
a 0.0
b 1.0
c NaN
d 3.0
dtype: float64
Process finished with exit code 0
#1通过标量创建一个序列
b=pd.Series(5,index=list('abcd'))#index代表行索引
print(b)
结果:
D:\ProgramData\Anaconda3\python.exe D:/numpy-kexue/03.py
a 5
b 5
c 5
d 5
dtype: int64
Process finished with exit code 0
支持ndarry数组操作
s=pd.Series(np.random.randn(5),index=['a','b','c','d','e'])
print(s)
#1支持索引操作
print(s[0])#不包括最后的索引
print(s[:3])
print(s[2:5])
print(s[[1,3,4]])#支持整型直接索引。
结果:
D:\ProgramData\Anaconda3\python.exe D:/numpy-kexue/03.py
a 1.147490
b -0.084418
c -0.777898
d -0.488580
e -0.146267
dtype: float64
1.1474904747052694
a 1.147490
b -0.084418
c -0.777898
dtype: float64
c -0.777898
d -0.488580
e -0.146267
dtype: float64
b -0.084418
d -0.488580
e -0.146267
dtype: float64
s=pd.Series(np.random.randn(5),index=['a','b','c','d','e'])
print(s)
#1支持numpy中的函数操作
print(np.sin(s))
print(np.exp(s))
结果:
D:\ProgramData\Anaconda3\python.exe D:/numpy-kexue/03.py
a 2.122820
b -0.081273
c -0.144711
d -1.461435
e 0.341797
dtype: float64
a 0.851465
b -0.081183
c -0.144206
d -0.994026
e 0.335181
dtype: float64
a 8.354667
b 0.921942
c 0.865273
d 0.231903
e 1.407474
dtype: float64
s=pd.Series(np.random.randn(5),index=['a','b','c','d','e'])
print(s)
#3支持字典访问
print(s['a'])
s['b']=3
print(s)
#4用字典方式增加一个键值
s['g']=100
print(s)
#5访问不存在的键会报错
# print(s['f'])
#6可以用get访问,键不存在时指定默认值
print(s.get('f'))
print(s.get('f',0))
结果:
D:\ProgramData\Anaconda3\python.exe D:/numpy-kexue/03.py
a 0.980320
b 0.547169
c 2.342473
d -1.520565
e 0.589286
dtype: float64
0.9803196644381337
a 0.980320
b 3.000000
c 2.342473
d -1.520565
e 0.589286
dtype: float64
a 0.980320
b 3.000000
c 2.342473
d -1.520565
e 0.589286
g 100.000000
dtype: float64
None
0
Process finished with exit code 0
s1=pd.Series(np.random.randn(3),index=['a','c','e'])
s2=pd.Series(np.random.randn(3),index=['a','d','e'])
#1支持2个series标签对齐打印,此时2个序列中间多了1个空行,因为打印完s1另起一行,空这一行,就开始打印s2
print('{0}\n\n{1}'.format(s1,s2))
#支持两个序列相加,即a标签加a标签,即自动标签对齐相加,即s2没有的c,所以相加后的c值为NaN.
print(s1+s2)
结果:
D:\ProgramData\Anaconda3\python.exe D:/numpy-kexue/03.py
a 0.859558
c 1.547378
e 0.135625
dtype: float64
a 1.705699
d -0.591336
e -1.058273
dtype: float64
a 2.565257
c NaN
d NaN
e -0.922649
dtype: float64
Process finished with exit code 0
二、DataFrame
每行每列都可以看成是Series序列
创建
# 1通过字典创建
data = pd.DataFrame({
'one': pd.Series([1,2,3],index=['a','b','c']),
'two': pd.Series([1,2,3,4],index=['a','b','c','d'])})
print(data)
d={
'one': pd.Series([1,2,3],index=['a','b','c']),
'two': pd.Series([1,2,3,4],index=['a','b','c','d'])}
#2只取三个索引值,会自动对齐
df1=pd.DataFrame(d,index=['b','d','a'])
print(df1)
#3改变列标签
df2=pd.DataFrame(d,columns=['two','three'])
print(df2)
结果:
D:\ProgramData\Anaconda3\python.exe D:/numpy-kexue/03.py
one two
a 1.0 1
b 2.0 2
c 3.0 3
d NaN 4
one two
b 2.0 2
d NaN 4
a 1.0 1
two three
a 1 NaN
b 2 NaN
c 3 NaN
d 4 NaN
Process finished with exit code 0
# 1通过列表作为字典的值时,必须保证个数一致,否则会报错,而series不存在这个问题
data = pd.DataFrame({
'one': [1,2,3,4],#因为列表不像Series序列有索引,所以不用指定index了。要想指定索引是指定DataFrame的索引
'two': [21,22,23,24]})
print(data)
# d={'one': pd.Series([1,2,3],index=['a','b','c']),
# 'two': pd.Series([1,2,3,4],index=['a','b','c','d'])}
结果:
D:\ProgramData\Anaconda3\python.exe D:/numpy-kexue/03.py
one two
0 1 21
1 2 22
2 3 23
3 4 24
Process finished with exit code 0
# 1通过列表创建DataFrame,行列标签自己可以指定
data=[(1,2.2,'Hello'),(2,3.,'World')]
data1 = pd.DataFrame(data,index=['one','two'],columns=list('ABC'))
print(data1)
结果:
A B C
one 1 2.2 Hello
two 2 3.0 World
# 1大的列表是字典,可以指定行标签,列标签由字典的键自动生成,当自己再定义列标签时,会自动与现有的列标签对齐。
data=[{
'a':1,'b':2},{
'a':5,'b':10,'c':20}]
data1 = pd.DataFrame(data,index=['A','B'],columns=list('ab'))
print(data1)
#列标签由字典的键自动生成,当自己再定义列标签时,会自动与现有的列标签对齐,且原来没有e标签,所以为空。
#行标签若不指定,会自动分配整型
data2 = pd.DataFrame(data,index=['A','B'],columns=list('abe'))
print(data2)
结果:
D:\ProgramData\Anaconda3\python.exe D:/numpy-kexue/03.py
a b
A 1 2
B 5 10
a b e
A 1 2 NaN
B 5