# -*- coding: utf-8 -*-
import pandas as pd
from pandas import Series,DataFrame
import numpy as np
'''
padans的最重要的一个功能是对不同的索引对象进行运算
'''
s1=Series([1,2,3,4],index=['a','b','c','d'])
s2=Series([5,6,7,8],index=['a','b','d','f'])
print(s1+s2)
# a 6.0
# b 8.0
# c NaN
# d 11.0
# f NaN
# dtype: float64
'''
DataFrame的运算
'''
df1=DataFrame(np.arange(9).reshape(3,3),index=['Oh','Te','Co'],columns=list('bcd'))
df2=DataFrame(np.arange(12).reshape(4,3),index=['Ut','Oh','Te','Or'],columns=list('bde'))
print(df1)
# b c d
# Oh 0 1 2
# Te 3 4 5
# Co 6 7 8
print(df2)
# b d e
# Ut 0 1 2
# Oh 3 4 5
# Te 6 7 8
# Or 9 10 11
'''
DataFrame相加是,只有列与行都对应时才能相加,但是和行,列的位置没关系
'''
print(df1+df2)
# b c d e
# Co NaN NaN NaN NaN
# Oh 3.0 NaN 6.0 NaN
# Or NaN NaN NaN NaN
# Te 9.0 NaN 12.0 NaN
# Ut NaN NaN NaN NaN
'''
DataFrame算数填充的方法,不使没有的默认值为NaN
'''
df1=DataFrame(np.arange(12).reshape(3,4),columns=list('abcd'))
print(df1)
# a b c d
# 0 0 1 2 3
# 1 4 5 6 7
# 2 8 9 10 11
df2=DataFrame(np.arange(20).reshape(4,5),columns=list('abcde'))
print(df2)
# a b c d e
# 0 0 1 2 3 4
# 1 5 6 7 8 9
# 2 10 11 12 13 14
# 3 15 16 17 18 19
print(df1+df2)
# a b c d e
# 0 0.0 2.0 4.0 6.0 NaN
# 1 9.0 11.0 13.0 15.0 NaN
# 2 18.0 20.0 22.0 24.0 NaN
# 3 NaN NaN NaN NaN NaN
'''
设置值
'''
df3=df1.add(df2,fill_value=0)
print(df3)
# a b c d e
# 0 0.0 2.0 4.0 6.0 4.0
# 1 9.0 11.0 13.0 15.0 9.0
# 2 18.0 20.0 22.0 24.0 14.0
# 3 15.0 16.0 17.0 18.0 19.0
#重新指定列值
df4=df1.reindex(columns=df2.columns,fill_value=0)
print(df4)
# a b c d e
# 0 0 1 2 3 0
# 1 4 5 6 7 0
# 2 8 9 10 11 0
'''
DataFrame和Series的运算
'''
#numpy广播
arr=np.arange(12).reshape(3,4)
print(arr)
# [[ 0 1 2 3]
# [ 4 5 6 7]
# [ 8 9 10 11]]
print(arr[0])
# [0 1 2 3]
print(arr-arr[0])
# [[0 0 0 0]
# [4 4 4 4]
# [8 8 8 8]]
#DataFrame广播
frame=DataFrame(np.arange(12).reshape(4,3),columns=list('bde'),index=['Ut','Oh','Te','Or'])
print(frame)
# b d e
# Ut 0 1 2
# Oh 3 4 5
# Te 6 7 8
# Or 9 10 11
series=frame.ix[0]
print(series)
# b 0
# d 1
# e 2
# Name: Ut, dtype: int32
print(frame-series)
# b d e
# Ut 0 0 0
# Oh 3 3 3
# Te 6 6 6
# Or 9 9 9
series2=Series(range(3),index=list('bef'))
print(series2)
# b 0
# e 1
# f 2
# dtype: int64
#相加时行列不对应的时候为NaN
print(series2+frame)
# b d e f
# Ut 0.0 NaN 3.0 NaN
# Oh 3.0 NaN 6.0 NaN
# Te 6.0 NaN 9.0 NaN
# Or 9.0 NaN 12.0 NaN
series3=frame['d']
print(series3)
print(frame.sub(series3,axis=0))
# b d e
# Ut -1 0 1
# Oh -1 0 1
# Te -1 0 1
# Or -1 0 1
python数据分析六:Series与DataFrame的计算
最新推荐文章于 2024-08-18 20:08:34 发布