"""
@author: XiangguoSun
@contact: sunxiangguodut@qq.com
@file: learn_pandas.py
@time: 2017/3/8 8:18
@software: PyCharm
"""
import numpy as np
from pandas import Series, DataFrame
import pandas as pd
'''
1,基本数据结构
'''
'''
1.1 Series: dic+array
'''
obj_dic={'a':1,'b':2,'c':3}
objd=Series(obj_dic)
obj = Series([4,7,-5,3],index=['a','b','c','d'])
print obj.index,obj.values
print obj[['a','c']]
print 'b' in obj
obj_na = Series(obj,index=['a','b','c','d','add'])
print obj_na
print obj_na.isnull()
print obj_na.notnull()
print obj_na.name
print obj_na.index.name
obj_na.index=['x','y','z','o','p']
obj_na.name='my_table'
obj_na.index.name='my_index'
print obj_na
'''
1.2 DataFrame
'''
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],
'year': [2000, 2001, 2002, 2001, 2002],
'pop': [1.5, 1.7, 3.6, 2.4, 2.9]
}
df = DataFrame(data, columns=['year', 'state', 'pop', 'debt'], index=['one', 'two', 'three', 'four', 'five'])
print df
print df.ix['three']
df['five']=np.arange(5)
print df
df['new_column']=df.state == 'Ohio'
print df
del df['new_column']
print df.columns
pop = {'Nevada':{2001:2.4,2002:2.9},
'Ohio':{2000:1.5,2001:1.7,2002:3.6}
}
data = DataFrame(pop)
print data
print data.T
print DataFrame(pop,index=[2001,2002,2003])
data.index.name='sunxiangguo'
data.columns.name = 'state'
print data
print data.values
'''index对象不可修改'''
'''
2,基本功能
'''
obj = Series([4.5,7,-2,4],index=['b','a','c','d'])
print obj
obj2 = obj.reindex(['a','b','c','d','e'])
print obj2
obj3 = obj.reindex(['a','b','c','d','e'], fill_value=0)
print obj3
obj = Series(['blue','perple','yellow'],index=[0,2,4])
print obj
obj2 = obj.reindex(range(8),method='ffill')
print obj2
print obj.reindex(range(7),method='pad')
print obj.reindex(range(7),method='bfill')
print obj.reindex(range(7),method='backfill')
obj = Series(np.arange(5), index=['a', 'b', 'c', 'd', 'e'])
new_obj = obj.drop('c')
print obj
print new_obj
print obj.drop(['c', 'd'])
data = DataFrame(np.arange(16).reshape((4, 4)),
index=['Ohio', 'Colorado', 'Utah', 'New York'],
columns=['one', 'two', 'three', 'four']
)
print data
print data.drop(['Colorado', 'Ohio'])
print data.drop('two', axis=1)
print data.drop(['two', 'four'], axis=1)
data = Series(np.arange(4), index=['a', 'b', 'c', 'd'])
print data
print data['b']
print data[1]
print data[2:4]
print data[['b','a','d']]
print data[[1,3]]
print data[data<2]
print data['a':'c']
data = DataFrame(np.arange(16).reshape((4, 4)),
index=['Ohio', 'Colorado', 'Utah', 'New York'],
columns=['one', 'two', 'three', 'four'])
print data
print data['two']
print data[['three', 'one']]
print data[:2]
print data[data['three'] > 5]
print data.ix[:2, :2]
print data.ix[1:3]
print data.xs('Ohio')
print data.xs(range(1, 4), axis=1)
'''
print data.icol(2)
print data.irow(0)
这两个已经被下面两行代码取代
'''
print data.iloc[:, 2]
print data.iloc[0]