data={'state':['ohio','ohio','ohio','nevada','nevada'],'year':[2000,2001,2002,2001,2002],'pop':[1.5,1.7,3.6,2.4,2.9]}
frame=DataFrame(data)
>>> frame
pop state year
0 1.5 ohio 2000
1 1.7 ohio 2001
2 3.6 ohio 2002
3 2.4 nevada 2001
4 2.9 nevada 2002
#可指定序列,DataFrame的列会按照指定的顺序进行排列:
>>> DataFrame(data,columns=['year','state','pop'])
year state pop
0 2000 ohio 1.5
1 2001 ohio 1.7
2 2002 ohio 3.6
3 2001 nevada 2.4
4 2002 nevada 2.9
#如果传入的数据找不到,就会NA值
>>> frame2=DataFrame(data,columns=['year','state','pop','debt'],index=['one','two','three','four','five'])
>>> frame2
year state pop debt
one 2000 ohio 1.5 NaN
two 2001 ohio 1.7 NaN
three 2002 ohio 3.6 NaN
four 2001 nevada 2.4 NaN
five 2002 nevada 2.9 NaN
#通过类似字典标记的方式或属性的方式,可,以将DataFrame的列获取为一个Series,返回的Series与原来有相同的索引,且name属性已指定
>>> frame2.year
one 2000
two 2001
three 2002
four 2001
five 2002
Name: year, dtype: int64
>>> frame2['state']
one ohio
two ohio
three ohio
four nevada
five nevada
Name: state, dtype: object
#行也可以通过位置或名称的方式进行获取比如索引字段ix
>>> frame2.ix['three']
year 2002
state ohio
pop 3.6
debt NaN
Name: three, dtype: object
#列可以通过赋值的方式进行修改
>>> frame2['debt']=16.5
>>> frame2
year state pop debt
one 2000 ohio 1.5 16.5
two 2001 ohio 1.7 16.5
three 2002 ohio 3.6 16.5
four 2001 nevada 2.4 16.5
five 2002 nevada 2.9 16.5
#为不存在的列赋值会创建出一个新列,关键词del用于删除列
>>> frame2['eastern']=frame2.state=='ohio'
>>> frame2
year state pop debt eastern
one 2000 ohio 1.5 16.5 True
two 2001 ohio 1.7 16.5 True
three 2002 ohio 3.6 16.5 True
four 2001 nevada 2.4 16.5 False
five 2002 nevada 2.9 16.5 False
>>> del frame2['eastern']
>>> frame2
year state pop debt
one 2000 ohio 1.5 16.5
two 2001 ohio 1.7 16.5
three 2002 ohio 3.6 16.5
four 2001 nevada 2.4 16.5
five 2002 nevada 2.9 16.5