Pandas常用操作

# -*- coding: utf-8 -*-
"""
Created on Fri Jan 25 15:08:25 2019

@author: ZengWei
"""

import pandas as pd
import numpy as np

'''
Part 1:基础操作
'''

s = pd.Series([1,3,6,np.nan,44,1])
dates = pd.date_range('20160101',periods=6)

df = pd.DataFrame(np.random.randn(6,4),index=dates,columns=['a','b','c','d'])

df1 = pd.DataFrame(np.arange(12).reshape((3,4)))

df2 = pd.DataFrame({
        'A':1,
        'B':pd.Timestamp('20130102'),
        'C':pd.Series(1,index=list(range(4)),dtype='float32'),
        'D':np.array([3]*4,dtype='int32'),
        'E':pd.Categorical(['test','train','test','train']),
        'F':'foo'
        })

df2.dtypes   # 类型
df2.index
df2.columns
df2.values

df2.describe()
df2.T #

df2.sort_index(axis=1,ascending=False)
df2.sort_index(axis=0,ascending=False)

df2.sort_values(by='E')

''' 
Part 2:选择数据
'''
print(df['A'],df.A)
print(df[0:3],df['20130102':'20130104'])

# selet by label:loc
print(df.loc['20130102'])
print(df.loc[:,['A','B']])

# select by position:iloc
print(df.iloc[3:5,1:3])
print(df.iloc[[1,3,5],1:3])

# mixed selection:ix
print(df.ix[:3,['A','C']])  # deprecated

# Boolean indexing
print(df[df.A>8])

'''
Part 3:赋值
先定位再修改
'''
df.iloc[2,2] = 11
df.loc['20130101','A'] = 22

df[df.A>0] = 0
df.B[df.A>0] = 0

# 添加一列
df['F'] = np.nan
df['E'] = pd.Series([1,2,3,4,5,6],index=pd.date_range('20160101',periods=6))

'''
Part 4:处理缺失数据
'''
print(df.dropna(axis=0,how='any'))  # how={'any','all'}

print(df.isnull())
print(np.any(df.isnull()) == True)

print(df.fillna(value=0))

'''
Part 5:导入导出
'''
data = pd.read_csv('finename.csv')

data.to_pickle('some.pickle')

'''
Part 6:合并concat
'''
df1 = pd.DataFrame(np.ones((3,4))*0,columns=['a','b','c','d'])
df2 = pd.DataFrame(np.ones((3,4))*1,columns=['a','b','c','d'])
df3 = pd.DataFrame(np.ones((3,4))*2,columns=['a','b','c','d'])

res = pd.concat([df1,df2,df3],axis=0,ignore_index=True)

# join,['inner','outer']
df1 = pd.DataFrame(np.ones((3,4))*0,columns=['a','b','c','d'],index=[1,2,3])
df2 = pd.DataFrame(np.ones((3,4))*1,columns=['b','c','d','e'],index=[2,3,4])

res1 = pd.concat([df1,df2],join='outer')  # outer和inner效果不同

res2 = pd.concat([df1,df2],join='inner',ignore_index=True) 

res3 = pd.concat([df1,df2],axis=1,join_axes=[df1.index]) 

# append
res4 = df1.append(df2,ignore_index=True)
# res4 = df1.append([df3,df2],ignore_index=True)

s1 = pd.Series([1,2,3,4],index=['a','b','c','d'])
res5 = df1.append(s1,ignore_index=True)

'''
Part 7:合并merge
'''
left = pd.DataFrame({
        'key':['K0','K1','K2','K3'],
        'A':['A0','A1','A2','A3'],
        'B':['B0','B1','B2','B3'],
        })
right = pd.DataFrame({
        'key':['K0','K1','K2','K3'],
        'C':['C0','C1','C2','C3'],
        'D':['D0','D1','D2','D3'],
        })

res = pd.merge(left,right,on='key')

# considering two axis
left = pd.DataFrame({
        'key1':['K0','K0','K1','K2'],
        'key2':['K0','K1','K0','K1'],
        'A':['A0','A1','A2','A3'],
        'B':['B0','B1','B2','B3'],
        })
right = pd.DataFrame({
        'key1':['K0','K1','K1','K2'],
        'key2':['K0','K0','K0','K0'],
        'C':['C0','C1','C2','C3'],
        'D':['D0','D1','D2','D3'],
        })

# how = {'left','right','outer','inner'}
res = pd.merge(left,right,on=['key1','key2'],how='inner')


'''
Part 8:可视化
'''
import matplotlib.pyplot as plt

data = pd.Series(np.random.randn(1000),index=np.arange(1000))
data = data.cumsum()
data.plot()
plt.show()

data = pd.DataFrame(np.random.randn(1000,4),index=np.arange(1000),
                    columns=list("ABCD"))
data = data.cumsum()
data.plot()
plt.show()

'''
plot methods:
    'bar','hist','box','kde','area','scatter','pie'
'''
ax = data.plot.scatter(x='A',y='B',color='DarkBlue',label='Class 1')
data.plot.scatter(x='A',y='C',color='DarkGreen',label='Class 2',ax=ax)
plt.show()

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值