pandas的使用

test1:

import pandas as pd
import numpy as np

df2 = pd.DataFrame({'A':1.,
                    'B':pd.Timestamp('20190326'),
                    'C':pd.Series(1.,index=['one','two','three','four'],dtype='float32'),
                    'D':np.array([3]*4,dtype='int32'),
                    'E':'fool'})
print(df2)
print(df2.dtypes)
print(df2.index)
print(df2.columns)
print(df2.values)
print(df2.describe())
print(df2.T)

print(df2.sort_index(axis=1,ascending = False))
print(df2.sort_index(axis=0,ascending = False))

test2:

import numpy as np
import pandas as pd

dates = pd.date_range('20130101', periods = 6)
df = pd.DataFrame(np.arange(24).reshape(6,4), index=dates, columns=['A','B','C','D'])

print(df)
print(df['A'])
print(df.A)
print(df[:3])
print(df['20130101':'20130103'])

#select by label:loc
print(df.loc['20130101'])
print(df.loc[:,'A'])
print(df.loc['20130101',['A','B']])
#select by position:iloc
print(df.iloc[3])
print(df.iloc[3,1])
print(df.iloc[1:3,1:2])

#Boolean indexing
print(df[df.A >8])


test3:

import numpy as np
import pandas as pd

dates = pd.date_range('20190326', periods = 6)
df = pd.DataFrame(np.arange(24).reshape(6,4), index=dates, columns=['A','B','C','D'])
print(df)

df.iloc[2,3] = 111
print(df)

df.loc['20190326', 'A'] = 123
print(df)

df[df.A>16] = 0 #df中A列大于4所在的行全部赋值为0
print(df)

df.B[df.A>13] = 0 #df的A列中大于8的行里面B列对应的元素赋值为0
print(df)

df['F'] = np.nan #添加F列,并全部赋值NaN,当然也可以单个赋值
print(df)

df['E'] = pd.Series([1,2,3,4,5,6], index=pd.date_range('20190326',periods=6))
print(df)

test4:

import numpy as np
import pandas as pd

dates = pd.date_range('20190327',periods = 6)
df = pd.DataFrame(np.arange(24).reshape(6, 4), index=dates, columns=['A','B','C','D'])

df.iloc[0,1] = np.nan
df.iloc[1,2] = np.nan
print(df)

print(df.isnull()) #检查数据有没有缺失(缺失数据为True,没有缺失的为False)
print(np.any(df.isnull()) == True) #检查数据是否缺失,只要其中任意一个数据缺失,则返回True
print(df.dropna(axis=0, how='any')) #如果发现某一行(axis=0)任意一个为nan,则丢到这一行;how={'any','all'} 如果how='all',表示当所有元素均为nan,则丢掉行(列)

print(df.fillna(value=0)) #将nan替换为0

test5:

import numpy as np
import pandas as pd

#concatenating
df1 = pd.DataFrame(np.ones((3,4))*0, columns=['a','b','c','d'])
df2 = pd.DataFrame(np.ones((3,4))*1, columns=['a','b','c','d'])
df3 = pd.DataFrame(np.ones((3,4))*2, columns=['a','b','c','d'])
print(df1)
print(df2)
print(df3)

res = pd.concat([df1,df2,df3], axis=0 ,ignore_index= True) #竖向合并 ignore_index = True解决行标题重复等问题
print(res)

#join,['inner','outer']
df4 = pd.DataFrame(np.ones((3,4))*0, columns=['a','b','c','d'],index=['1','2','3'])
df5 = pd.DataFrame(np.ones((3,4))*1, columns=['b','c','d','e'],index=['2','3','4'])
print(df4)
print(df5)
res2 = pd.concat([df4,df5], join='outer') #join默认为outer模式,同名的列竖向合并,再用NaN填充
print(res2)

res3 = pd.concat([df4,df5], join='inner') #join的‘inner’模式只将共同都有的部分合并,其余部分去掉,对行的序号问题,可以用ignore_index = True进行处理
print(res3)

#join_axes
res4 = pd.concat([df4,df5],axis=1,join_axes=[df4.index]) #axis=1行合并,没有定义join_axes时默认将没有的现填充NaN,而本句是以df4的index为主,df4没有的index会直接丢到
print(df4)

test6:

import numpy as np
import pandas as pd

#merging two df by key/keys
left = pd.DataFrame({'Key':['k0','k1','k2','k3'],
                     'A':['a0','a1','a2','a3'],
                     'B':['b0','b1','b2','b3']})

right = pd.DataFrame({'Key':['k0','k1','k2','k3'],
                      'C':['c0','c1','c2','c3'],
                      'D':['d0','d1','d2','d3']})

print(left)
print(right)
res = pd.merge(left, right, on='Key')
print(res)

#consider two keys
left1 = pd.DataFrame({'Key1':['k0','k0','k1','k2'],
                      'Key2':['k0','k1','k0','k1'],
                     'A':['a0','a1','a2','a3'],
                     'B':['b0','b1','b2','b3']})

right1 = pd.DataFrame({'Key1':['k0','k1','k1','k2'],
                       'Key2':['k0','k0','k0','k0'],
                      'C':['c0','c1','c2','c3'],
                      'D':['d0','d1','d2','d3']})
print(left1)
print(right1) 
res1 = pd.merge(left1, right1, on=['Key1','Key2'])
#默认为'how=inner'
#相同的保留,不同的丢掉,left1中key1 ky2有一组k1 k0,而在right中有两组 k1 k0,因此会将left1中k1 k0 对应的A B中元素复制两遍对应两组C D 
#how={'left','right','inner','outer'}
'''
how='outer' 全部保留 未定义元素用NaN填充
how='left' 以left为主 未定义元素用NaN填充
how='right' 以right为主 未定义元素用NaN填充
'''
print(res1)

#indicator  多出一列 _merge 元素为 both、left_only、right_only
res2 = pd.merge(left1, right1, on=['Key1','Key2'],how ='outer', indicator=True)
print(res2)

#give the indicator a custom name
res3 = pd.merge(left1, right1, on=['Key1','Key2'],how ='outer', indicator='indicator_column')
print(res3)

#left_index and rihght_index
res4 = pd.merge(left1, right1, left_index=True, right_index=True, how='outer') #left_index right_index均为True,左右两个pandas数据列表的index均要考虑
print(res4)

#handle overlapping #列项命名一样的问题,可以在其后面加上对应的后缀
boys = pd.DataFrame({'K':['k0','k1','k2'],
                     'age':[1,2,3]})
girls = pd.DataFrame({'K':['k0','k0','k3'],
                      'age':[4,5,6]})
print(boys)
print(girls)

res5 = pd.merge(boys, girls, on='K',suffixes=['_boys', '_girls'])
print(res5)

test7:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#plot data

#Series
data = pd.Series(np.random.randn(1000), index=np.arange(1000)) #随机生成1000个数据,并index命名从0~999
data = data.cumsum() #对数据进行一个一个的累加

data.plot()
plt.show()
'''
在plt模块中可以用 plt.plot(x=..., y=...) 在pandas中这些数据以线的形式直接显示;
在plt模块中可以用 plt.scatter(x=..., y=...) 在pandas中这些数据以点的形式直接显示;
以及其他plot methods:
    'bar'、'hist'、'box'、'kde'、'area'、'scatter'、'hexbin'、'pie'
'''

#DataFrame
data1 = pd.DataFrame(np.random.randn(1000,4), index=np.arange(1000), columns=list("ABCD"))

print(data1.head(3)) #打印前3行数据,没有行数则默认是5行

data2 = data1.cumsum()

data1.plot()
data2.plot()

plt.show()

data2.plot.scatter(x='A', y='B', color='DarkBlue', label='Class1') #scatter只能有两个属性,因此需要指定x &
plt.show()
#散点图打印多组数据
a=data2.plot.scatter(x='A', y='B', color='DarkBlue', label='Class1')
data1.plot.scatter(x='A', y='C', color='DarkGreen', label='Class2', ax=a)
plt.show()
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值