import numpy as np
import pandas as pd
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
创建一个dataframe,要求索引是a,b,c,d,列名是one,two,其中one列数据为1,2,3,two列数据为1,2,3,4
data={
'one':[1,2,3,None],'two':[1,2,3,4]}
index=['a','b','c','d']
data
{'one': [1, 2, 3, None], 'two': [1, 2, 3, 4]}
a=pd.DataFrame(data,index=index)
a
|
one |
two |
a |
1.0 |
1 |
b |
2.0 |
2 |
c |
3.0 |
3 |
d |
NaN |
4 |
a.values
a.index
a.columns
a.shape
array([[ 1., 1.],
[ 2., 2.],
[ 3., 3.],
[nan, 4.]])
Index(['a', 'b', 'c', 'd'], dtype='object')
Index(['one', 'two'], dtype='object')
(4, 2)
查看df的索引名和列名
a.index
Index(['a', 'b', 'c', 'd'], dtype='object')
a.columns
Index(['one', 'two'], dtype='object')
列操作
给df增加一列,列名为three,列中的值为one列和two列值的乘积
a['three']=a['one']*a['two']
a
|
one |
two |
three |
a |
1.0 |
1 |
1.0 |
b |
2.0 |
2 |
4.0 |
c |
3.0 |
3 |
9.0 |
d |
NaN |
4 |
NaN |
分别用两种不同的方法删除df中的two和three列
a.pop('two')
a 1
b 2
c 3
d 4
Name: two, dtype: int64
a