In [1]:
import pandas as pd
In [2]:
xxxxxxxxxx
data = {
'name':['张三','李四','王五','赵六'],
'age':[19,19,20,21],
'height':[1.65,1.70,1.67,1.73],
}
df =pd.DataFrame(data)
df
Out[2]:
name age height
0 张三 19 1.65
1 李四 19 1.70
2 王五 20 1.67
3 赵六 21 1.73
In [3]:
xxxxxxxxxx
df['name']
Out[3]:
0 张三
1 李四
2 王五
3 赵六
Name: name, dtype: object
In [4]:
df.name
Out[4]:
0 张三
1 李四
2 王五
3 赵六
Name: name, dtype: object
In [5]:
xxxxxxxxxx
df[['name']]
Out[5]:
name
0 张三
1 李四
2 王五
3 赵六
In [6]:
df.columns
Out[6]:
Index(['name', 'age', 'height'], dtype='object')
In [7]:
xxxxxxxxxx
df.columns[1:3]
Out[7]:
Index(['age', 'height'], dtype='object')
In [8]:
xxxxxxxxxx
df[df.columns[1:3]]
Out[8]:
age height
0 19 1.65
1 19 1.70
2 20 1.67
3 21 1.73
In [10]:
x
import datetime
In [11]:
xxxxxxxxxx
df['year'] = datetime.datetime.now().year - df.age
In [12]:
df
Out[12]:
name age height year
0 张三 19 1.65 1999
1 李四 19 1.70 1999
2 王五 20 1.67 1998
3 赵六 21 1.73 1997
In [13]:
xxxxxxxxxx
df.drop('year',axis=1)
Out[13]:
name age height
0 张三 19 1.65
1 李四 19 1.70
2 王五 20 1.67
3 赵六 21 1.73
In [14]:
df
Out[14]:
name age height year
0 张三 19 1.65 1999
1 李四 19 1.70 1999
2 王五 20 1.67 1998
3 赵六 21 1.73 1997
In [15]:
xxxxxxxxxx
df.drop(df.columns[1::2],axis=1)
Out[15]:
name height
0 张三 1.65
1 李四 1.70
2 王五 1.67
3 赵六 1.73
In [16]:
df
Out[16]:
name age height year
0 张三 19 1.65 1999
1 李四 19 1.70 1999
2 王五 20 1.67 1998
3 赵六 21 1.73 1997
In [17]:
xxxxxxxxxx
df.loc[1]
Out[17]:
name 李四
age 19
height 1.7
year 1999
Name: 1, dtype: object
In [18]:
xxxxxxxxxx
df.loc[[1]]
Out[18]:
name age height year
1 李四 19 1.7 1999
In [19]:
xxxxxxxxxx
df.index
Out[19]:
RangeIndex(start=0, stop=4, step=1)
In [20]:
x
df.loc[df.index[-2:]]
Out[20]:
name age height year
2 王五 20 1.67 1998
3 赵六 21 1.73 1997
In [21]:
xxxxxxxxxx
df.loc[df.index[-2:],['name','age']]
Out[21]:
name age
2 王五 20
3 赵六 21
In [22]:
xxxxxxxxxx
df[1:3]
Out[22]:
name age height year
1 李四 19 1.70 1999
2 王五 20 1.67 1998
In [23]:
xxxxxxxxxx
df[-2:]
Out[23]:
name age height year
2 王五 20 1.67 1998
3 赵六 21 1.73 1997
In [24]:
xxxxxxxxxx
df[2]
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
c:\python36-32\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3077 try:
-> 3078 return self._engine.get_loc(key)
3079 except KeyError:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 2
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-24-b5f2749c85df> in <module>
----> 1 df[2]
c:\python36-32\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2686 return self._getitem_multilevel(key)
2687 else:
-> 2688 return self._getitem_column(key)
2689
2690 def _getitem_column(self, key):
c:\python36-32\lib\site-packages\pandas\core\frame.py in _getitem_column(self, key)
2693 # get column
2694 if self.columns.is_unique:
-> 2695 return self._get_item_cache(key)
2696
2697 # duplicate columns & possible reduce dimensionality
c:\python36-32\lib\site-packages\pandas\core\generic.py in _get_item_cache(self, item)
2487 res = cache.get(item)
2488 if res is None:
-> 2489 values = self._data.get(item)
2490 res = self._box_item_values(item, values)
2491 cache[item] = res
c:\python36-32\lib\site-packages\pandas\core\internals.py in get(self, item, fastpath)
4113
4114 if not isna(item):
-> 4115 loc = self.items.get_loc(item)
4116 else:
4117 indexer = np.arange(len(self.items))[isna(self.items)]
c:\python36-32\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3078 return self._engine.get_loc(key)
3079 except KeyError:
-> 3080 return self._engine.get_loc(self._maybe_cast_indexer(key))
3081
3082 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 2
In [25]:
df
Out[25]:
name age height year
0 张三 19 1.65 1999
1 李四 19 1.70 1999
2 王五 20 1.67 1998
3 赵六 21 1.73 1997
In [26]:
df.shape
Out[26]:
(4, 4)
In [27]:
xxxxxxxxxx
df.loc[df.shape[0]] = {'age':22,'name':'大苏打','height':1.89,'year':0}
df
Out[27]:
name age height year
0 张三 19 1.65 1999
1 李四 19 1.70 1999
2 王五 20 1.67 1998
3 赵六 21 1.73 1997
4 大苏打 22 1.89 0
In [29]:
x
df2 = df.drop(2)
In [30]:
df2
Out[30]:
name age height year
0 张三 19 1.65 1999
1 李四 19 1.70 1999
3 赵六 21 1.73 1997
4 大苏打 22 1.89 0
In [31]:
xxxxxxxxxx
df2.index =range(df2.shape[0])
In [32]:
df2
Out[32]:
name age height year
0 张三 19 1.65 1999
1 李四 19 1.70 1999
2 赵六 21 1.73 1997
3 大苏打 22 1.89 0
In [33]:
df2 = df.drop(2)
In [34]:
df2
Out[34]:
name age height year
0 张三 19 1.65 1999
1 李四 19 1.70 1999
3 赵六 21 1.73 1997
4 大苏打 22 1.89 0
In [36]:
xxxxxxxxxx
df2.loc[2]
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
c:\python36-32\lib\site-packages\pandas\core\indexing.py in _validate_key(self, key, axis)
1789 if not ax.contains(key):
-> 1790 error()
1791 except TypeError as e:
c:\python36-32\lib\site-packages\pandas\core\indexing.py in error()
1784 .format(key=key,
-> 1785 axis=self.obj._get_axis_name(axis)))
1786
KeyError: 'the label [2] is not in the [index]'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-36-1619a0efd72f> in <module>
----> 1 df2.loc[2]
c:\python36-32\lib\site-packages\pandas\core\indexing.py in __getitem__(self, key)
1476
1477 maybe_callable = com._apply_if_callable(key, self.obj)
-> 1478 return self._getitem_axis(maybe_callable, axis=axis)
1479
1480 def _is_scalar_access(self, key):
c:\python36-32\lib\site-packages\pandas\core\indexing.py in _getitem_axis(self, key, axis)
1909
1910 # fall thru to straight lookup
-> 1911 self._validate_key(key, axis)
1912 return self._get_label(key, axis=axis)
1913
c:\python36-32\lib\site-packages\pandas\core\indexing.py in _validate_key(self, key, axis)
1796 raise
1797 except:
-> 1798 error()
1799
1800 def _is_scalar_access(self, key):
c:\python36-32\lib\site-packages\pandas\core\indexing.py in error()
1783 raise KeyError(u"the label [{key}] is not in the [{axis}]"
1784 .format(key=key,
-> 1785 axis=self.obj._get_axis_name(axis)))
1786
1787 try:
KeyError: 'the label [2] is not in the [index]'
In [37]:
xxxxxxxxxx
df2.iloc[2] #interger location
Out[37]:
name 赵六
age 21
height 1.73
year 1997
Name: 3, dtype: object
In [38]:
df2
Out[38]:
name age height year
0 张三 19 1.65 1999
1 李四 19 1.70 1999
3 赵六 21 1.73 1997
4 大苏打 22 1.89 0
In [39]:
xxxxxxxxxx
df2.iat[1,1]
Out[39]:
19
In [40]:
xxxxxxxxxx
df2.iat[1,1] = 34
In [41]:
df2
Out[41]:
name age height year
0 张三 19 1.65 1999
1 李四 34 1.70 1999
3 赵六 21 1.73 1997
4 大苏打 22 1.89 0
# 比较数据
比较数据¶
In [44]:
x
df[df['height'] >=1.7]
Out[44]:
name age height year
1 李四 19 1.70 1999
3 赵六 21 1.73 1997
4 大苏打 22 1.89 0
In [45]:
xxxxxxxxxx
df[(df['height'] >=1.7) & (df['age'] <=20)]
Out[45]:
name age height year
1 李四 19 1.7 1999
In [46]:
x
df.query('height>=1.65 and age<=20')
Out[46]:
name age height year
0 张三 19 1.65 1999
1 李四 19 1.70 1999
2 王五 20 1.67 1998
In [47]:
xxxxxxxxxx
df.query('(height>=1.65 and age<=20) or name=="大苏打"')
Out[47]:
name age height year
0 张三 19 1.65 1999
1 李四 19 1.70 1999
2 王五 20 1.67 1998
4 大苏打 22 1.89 0
In [48]:
xxxxxxxxxx
age = 20
df.query('age <=@age')
Out[48]:
name age height year
0 张三 19 1.65 1999
1 李四 19 1.70 1999
2 王五 20 1.67 1998
In [49]:
x
df['age'].isin([19,18])
Out[49]:
0 True
1 True
2 False
3 False
4 False
Name: age, dtype: bool
In [50]:
x
df[df['age'].isin([19,18])]
Out[50]:
name age height year
0 张三 19 1.65 1999
1 李四 19 1.70 1999
In [51]:
df.T
Out[51]:
0 1 2 3 4
name 张三 李四 王五 赵六 大苏打
age 19 19 20 21 22
height 1.65 1.7 1.67 1.73 1.89
year 1999 1999 1998 1997 0