import numpy as np
import pandas as pd
3.1 索引器
3.1.1 表的列索引
df = pd.read_csv('joyful-pandas-master/data/learn_pandas.csv',usecols = ['School','Grade','Name','Gender','Weight','Transfer'])
df['Name'].head()
0 Gaopeng Yang
1 Changqiang You
2 Mei Sun
3 Xiaojuan Sun
4 Gaojuan You
Name: Name, dtype: object
df[['Name','Gender']].head()
| Name | Gender |
---|
0 | Gaopeng Yang | Female |
---|
1 | Changqiang You | Male |
---|
2 | Mei Sun | Male |
---|
3 | Xiaojuan Sun | Female |
---|
4 | Gaojuan You | Male |
---|
df.Name.head()
0 Gaopeng Yang
1 Changqiang You
2 Mei Sun
3 Xiaojuan Sun
4 Gaojuan You
Name: Name, dtype: object
3.1.2 序列的行索引
s = pd.Series([1,2,3,4,5,6],index = ['a','b','a','a','a','c'])
s
a 1
b 2
a 3
a 4
a 5
c 6
dtype: int64
s['c':'b':-2]
c 6
a 4
b 2
dtype: int64
s['c':'b':-3]
c 6
a 3
dtype: int64
s['c':'b':-1]
c 6
a 5
a 4
a 3
b 2
dtype: int64
s['b']
2
s[['c','b']]
c 6
b 2
dtype: int64
s['c':'b':-2]
c 6
a 4
b 2
dtype: int64
s['c':'b':-3]
c 6
a 3
dtype: int64
s['c':'b':-3]
c 6
a 3
dtype: int64
s['c':'b':-1]
c 6
a 5
a 4
a 3
b 2
dtype: int64
s = pd.Series(['a','b','c','d','e','f'],index = [1,3,1,2,5,4])
s
1 a
3 b
1 c
2 d
5 e
4 f
dtype: object
s[1]
1 a
1 c
dtype: object
s[[2,3]]
2 d
3 b
dtype: object
s[1:-1:2]
3 b
2 d
dtype: object
3.1.3 loc索引器
df_demo = df.set_index('Name')
df_demo.head()
| School | Grade | Gender | Weight | Transfer |
---|
Name | | | | | |
---|
Gaopeng Yang | Shanghai Jiao Tong University | Freshman | Female | 46.0 | N |
---|
Changqiang You | Peking University | Freshman | Male | 70.0 | N |
---|
Mei Sun | Shanghai Jiao Tong University | Senior | Male | 89.0 | N |
---|
Xiaojuan Sun | Fudan University | Sophomore | Female | 41.0 | N |
---|
Gaojuan You | Fudan University | Sophomore | Male | 74.0 | N |
---|
df_demo.loc['Qiang Sun']
| School | Grade | Gender | Weight | Transfer |
---|
Name | | | | | |
---|
Qiang Sun | Tsinghua University | Junior | Female | 53.0 | N |
---|
Qiang Sun | Tsinghua University | Sophomore | Female | 40.0 | N |
---|
Qiang Sun | Shanghai Jiao Tong University | Junior | Female | NaN | N |
---|
df_demo.loc['Quan Zhao']
School Shanghai Jiao Tong University
Grade Junior
Gender Female
Weight 53
Transfer N
Name: Quan Zhao, dtype: object
df_demo.loc['Qiang Sun','School']
Name
Qiang Sun Tsinghua University
Qiang Sun Tsinghua University
Qiang Sun Shanghai Jiao Tong University
Name: School, dtype: object
df_demo.loc['Quan Zhao','School']
'Shanghai Jiao Tong University'
df_demo.loc[['Quan Zhao','Qiang Sun'],['School','Gender']]
| School | Gender |
---|
Name | | |
---|
Quan Zhao | Shanghai Jiao Tong University | Female |
---|
Qiang Sun | Tsinghua University | Female |
---|
Qiang Sun | Tsinghua University | Female |
---|
Qiang Sun | Shanghai Jiao Tong University | Female |
---|
df_demo.loc['Gaojuan You':'Gaoqiang Qian','School':'Gender']
| School | Grade | Gender |
---|
Name | | | |
---|
Gaojuan You | Fudan University | Sophomore | Male |
---|
Xiaoli Qian | Tsinghua University | Freshman | Female |
---|
Qiang Chu | Shanghai Jiao Tong University | Freshman | Female |
---|
Gaoqiang Qian | Tsinghua University | Junior | Female |
---|
df_loc_slice_demo = df_demo.copy()
df_loc_slice_demo.index = range(df_demo.shape[0],0,-1)
df_loc_slice_demo
| School | Grade | Gender | Weight | Transfer |
---|
200 | Shanghai Jiao Tong University | Freshman | Female | 46.0 | N |
---|
199 | Peking University | Freshman | Male | 70.0 | N |
---|
198 | Shanghai Jiao Tong University | Senior | Male | 89.0 | N |
---|
197 | Fudan University | Sophomore | Female | 41.0 | N |
---|
196 | Fudan University | Sophomore | Male | 74.0 | N |
---|
... | ... | ... | ... | ... | ... |
---|
5 | Fudan University | Junior | Female | 46.0 | N |
---|
4 | Tsinghua University | Senior | Female | 50.0 | N |
---|
3 | Shanghai Jiao Tong University | Senior | Female | 45.0 | N |
---|
2 | Shanghai Jiao Tong University | Senior | Male | 71.0 | N |
---|
1 | Tsinghua University | Sophomore | Male | 51.0 | N |
---|
200 rows × 5 columns
df_demo.shape[0]
200
df_loc_slice_demo.loc[5:3]
| School | Grade | Gender | Weight | Transfer |
---|
5 | Fudan University | Junior | Female | 46.0 | N |
---|
4 | Tsinghua University | Senior | Female | 50.0 | N |
---|
3 | Shanghai Jiao Tong University | Senior | Female | 45.0 | N |
---|
df_loc_slice_demo.loc[3:5]
| School | Grade | Gender | Weight | Transfer |
---|
df_demo.loc[df_demo.Weight>70].head()
| School | Grade | Gender | Weight | Transfer |
---|
Name | | | | | |
---|
Mei Sun | Shanghai Jiao Tong University | Senior | Male | 89.0 | N |
---|
Gaojuan You | Fudan University | Sophomore | Male | 74.0 | N |
---|
Xiaopeng Zhou | Shanghai Jiao Tong University | Freshman | Male | 74.0 | N |
---|
Xiaofeng Sun | Tsinghua University | Senior | Male | 71.0 | N |
---|
Qiang Zheng | Shanghai Jiao Tong University | Senior | Male | 87.0 | N |
---|
df_demo.loc[df_demo.Grade.isin(['Freshman','Senior'])].head()
| School | Grade | Gender | Weight | Transfer |
---|
Name | | | | | |
---|
Gaopeng Yang | Shanghai Jiao Tong University | Freshman | Female | 46.0 | N |
---|
Changqiang You | Peking University | Freshman | Male | 70.0 | N |
---|
Mei Sun | Shanghai Jiao Tong University | Senior | Male | 89.0 | N |
---|
Xiaoli Qian | Tsinghua University | Freshman | Female | 51.0 | N |
---|
Qiang Chu | Shanghai Jiao Tong University | Freshman | Female | 52.0 | N |
---|
condition_1_1 = df_demo.School == 'Fudan University'
condition_1_2 = df_demo.Grade == 'Senior'
condition_1_3 = df_demo.Weight > 70
condition_1 = condition_1_1 & condition_1_2 &condition_1_3
condition_2_1 = df_demo.School == 'Peking University'
condition_2_2 = df_demo.Grade == 'Senior'
condition_2_3 = df_demo.Weight > 80
condition_2 = condition_2_1 & (~condition_2_2) & condition_2_3
df_demo.loc[condition_1 | condition_2]
| School | Grade | Gender | Weight | Transfer |
---|
Name | | | | | |
---|
Qiang Han | Peking University | Freshman | Male | 87.0 | N |
---|
Chengpeng Zhou | Fudan University | Senior | Male | 81.0 | N |
---|
Changpeng Zhao | Peking University | Freshman | Male | 83.0 | N |
---|
Chengpeng Qian | Fudan University | Senior | Male | 73.0 | Y |
---|
def condition(x):
condition_1_1 = x.School == 'Fudan University'
condition_1_2 = x.Grade == 'Senior'
condition_1_3 = x.Weight > 70
condition_1 = condition_1_1 & condition_1_2 &condition_1_3
condition_2_1 = x.School == 'Peking University'
condition_2_2 = x.Grade == 'Senior'
condition_2_3 = x.Weight > 80
condition_2 = condition_2_1 & (~condition_2_2) & condition_2_3
result = condition_1 | condition_2
return result
df_demo.loc[condition]
| School | Grade | Gender | Weight | Transfer |
---|
Name | | | | | |
---|
Qiang Han | Peking University | Freshman | Male | 87.0 | N |
---|
Chengpeng Zhou | Fudan University | Senior | Male | 81.0 | N |
---|
Changpeng Zhao | Peking University | Freshman | Male | 83.0 | N |
---|
Chengpeng Qian | Fudan University | Senior | Male | 73.0 | Y |
---|
df_demo.loc[lambda x: 'Quan Zhao',lambda x:'Gender']
'Female'
df_demo.loc[lambda x: slice('Gaojuan You','Gaoqiang Qian')]
| School | Grade | Gender | Weight | Transfer |
---|
Name | | | | | |
---|
Gaojuan You | Fudan University | Sophomore | Male | 74.0 | N |
---|
Xiaoli Qian | Tsinghua University | Freshman | Female | 51.0 | N |
---|
Qiang Chu | Shanghai Jiao Tong University | Freshman | Female | 52.0 | N |
---|
Gaoqiang Qian | Tsinghua University | Junior | Female | 50.0 | N |
---|
不要使用链式赋值
df_chain = pd.DataFrame([[0,0],[1,0],[-1,0]],columns = list('AB'))
df_chain
3.1.4 iloc索引器
iloc 的使用与loc类似,是针对位置进行筛选
df_demo.iloc[1,1]
'Freshman'
df_demo.iloc[[0,1],[0,1]]
| School | Grade |
---|
Name | | |
---|
Gaopeng Yang | Shanghai Jiao Tong University | Freshman |
---|
Changqiang You | Peking University | Freshman |
---|
df_demo.iloc[1:4,2:4]
| Gender | Weight |
---|
Name | | |
---|
Changqiang You | Male | 70.0 |
---|
Mei Sun | Male | 89.0 |
---|
Xiaojuan Sun | Female | 41.0 |
---|
df_demo
| School | Grade | Gender | Weight | Transfer |
---|
Name | | | | | |
---|
Gaopeng Yang | Shanghai Jiao Tong University | Freshman | Female | 46.0 | N |
---|
Changqiang You | Peking University | Freshman | Male | 70.0 | N |
---|
Mei Sun | Shanghai Jiao Tong University | Senior | Male | 89.0 | N |
---|
Xiaojuan Sun | Fudan University | Sophomore | Female | 41.0 | N |
---|
Gaojuan You | Fudan University | Sophomore | Male | 74.0 | N |
---|
... | ... | ... | ... | ... | ... |
---|
Xiaojuan Sun | Fudan University | Junior | Female | 46.0 | N |
---|
Li Zhao | Tsinghua University | Senior | Female | 50.0 | N |
---|
Chengqiang Chu | Shanghai Jiao Tong University | Senior | Female | 45.0 | N |
---|
Chengmei Shen | Shanghai Jiao Tong University | Senior | Male | 71.0 | N |
---|
Chunpeng Lv | Tsinghua University | Sophomore | Male | 51.0 | N |
---|
200 rows × 5 columns
df_demo.iloc[lambda x:slice(1,4)]
| School | Grade | Gender | Weight | Transfer |
---|
Name | | | | | |
---|
Changqiang You | Peking University | Freshman | Male | 70.0 | N |
---|
Mei Sun | Shanghai Jiao Tong University | Senior | Male | 89.0 | N |
---|
Xiaojuan Sun | Fudan University | Sophomore | Female | 41.0 | N |
---|
df_demo.iloc[(df_demo.Weight>80).values].head()
| School | Grade | Gender | Weight | Transfer |
---|
Name | | | | | |
---|
Mei Sun | Shanghai Jiao Tong University | Senior | Male | 89.0 | N |
---|
Qiang Zheng | Shanghai Jiao Tong University | Senior | Male | 87.0 | N |
---|
Qiang Han | Peking University | Freshman | Male | 87.0 | N |
---|
Chengpeng Zhou | Fudan University | Senior | Male | 81.0 | N |
---|
Feng Han | Shanghai Jiao Tong University | Sophomore | Male | 82.0 | N |
---|
df_demo.School.iloc[1]
'Peking University'
df_demo.School.iloc[1:5:2]
Name
Changqiang You Peking University
Xiaojuan Sun Fudan University
Name: School, dtype: object
3.1.5 query方法
df.query('((School == "Fudan University")&'
'(Grade == "Senior")&'
'(Weight > 70))|'
'((School == "Peking University")&'
'(Grade != "Senior")&'
'(Weight > 80))'
)
| School | Grade | Name | Gender | Weight | Transfer |
---|
38 | Peking University | Freshman | Qiang Han | Male | 87.0 | N |
---|
66 | Fudan University | Senior | Chengpeng Zhou | Male | 81.0 | N |
---|
99 | Peking University | Freshman | Changpeng Zhao | Male | 83.0 | N |
---|
131 | Fudan University | Senior | Chengpeng Qian | Male | 73.0 | Y |
---|
df.query('Weight > Weight.mean()').head()
| School | Grade | Name | Gender | Weight | Transfer |
---|
1 | Peking University | Freshman | Changqiang You | Male | 70.0 | N |
---|
2 | Shanghai Jiao Tong University | Senior | Mei Sun | Male | 89.0 | N |
---|
4 | Fudan University | Sophomore | Gaojuan You | Male | 74.0 | N |
---|
10 | Shanghai Jiao Tong University | Freshman | Xiaopeng Zhou | Male | 74.0 | N |
---|
14 | Tsinghua University | Senior | Xiaomei Zhou | Female | 57.0 | N |
---|
df.query('(Grade not in ["Freshman","Sophomore"]) and '
'(Gender == "Male")').head()
| School | Grade | Name | Gender | Weight | Transfer |
---|
2 | Shanghai Jiao Tong University | Senior | Mei Sun | Male | 89.0 | N |
---|
16 | Tsinghua University | Junior | Xiaoqiang Qin | Male | 68.0 | N |
---|
17 | Tsinghua University | Junior | Peng Wang | Male | 65.0 | N |
---|
18 | Tsinghua University | Senior | Xiaofeng Sun | Male | 71.0 | N |
---|
21 | Shanghai Jiao Tong University | Senior | Xiaopeng Shen | Male | 62.0 | NaN |
---|
df.query('Grade == ["Junior","Senior"]').head()
| School | Grade | Name | Gender | Weight | Transfer |
---|
2 | Shanghai Jiao Tong University | Senior | Mei Sun | Male | 89.0 | N |
---|
7 | Tsinghua University | Junior | Gaoqiang Qian | Female | 50.0 | N |
---|
9 | Peking University | Junior | Juan Xu | Female | NaN | N |
---|
11 | Tsinghua University | Junior | Xiaoquan Lv | Female | 43.0 | N |
---|
12 | Shanghai Jiao Tong University | Senior | Peng You | Female | 48.0 | NaN |
---|
low,high =70,80
df.query('Weight.between(@low,@high)').head()
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-82-f7e7d81043c7> in <module>
1 #引用外部变量
2 low,high =70,80
----> 3 df.query('Weight.between(@low,@high)').head()
D:\anaconda\lib\site-packages\pandas\core\frame.py in query(self, expr, inplace, **kwargs)
3343 kwargs["level"] = kwargs.pop("level", 0) + 1
3344 kwargs["target"] = None
-> 3345 res = self.eval(expr, **kwargs)
3346
3347 try:
D:\anaconda\lib\site-packages\pandas\core\frame.py in eval(self, expr, inplace, **kwargs)
3473 kwargs["resolvers"] = kwargs.get("resolvers", ()) + tuple(resolvers)
3474
-> 3475 return _eval(expr, inplace=inplace, **kwargs)
3476
3477 def select_dtypes(self, include=None, exclude=None) -> "DataFrame":
D:\anaconda\lib\site-packages\pandas\core\computation\eval.py in eval(expr, parser, engine, truediv, local_dict, global_dict, resolvers, level, target, inplace)
344 eng = _engines[engine]
345 eng_inst = eng(parsed_expr)
--> 346 ret = eng_inst.evaluate()
347
348 if parsed_expr.assigner is None:
D:\anaconda\lib\site-packages\pandas\core\computation\engines.py in evaluate(self)
71
72 # make sure no names in resolvers and locals/globals clash
---> 73 res = self._evaluate()
74 return reconstruct_object(
75 self.result_type, res, self.aligned_axes, self.expr.terms.return_type
D:\anaconda\lib\site-packages\pandas\core\computation\engines.py in _evaluate(self)
111 env = self.expr.env
112 scope = env.full_scope
--> 113 _check_ne_builtin_clash(self.expr)
114 return ne.evaluate(s, local_dict=scope)
115
D:\anaconda\lib\site-packages\pandas\core\computation\engines.py in _check_ne_builtin_clash(expr)
27 Terms can contain
28 """
---> 29 names = expr.names
30 overlap = names & _ne_builtins
31
D:\anaconda\lib\site-packages\pandas\core\computation\expr.py in names(self)
812 """
813 if is_term(self.terms):
--> 814 return frozenset([self.terms.name])
815 return frozenset(term.name for term in com.flatten(self.terms))
816
D:\anaconda\lib\site-packages\pandas\core\generic.py in __hash__(self)
1667 def __hash__(self):
1668 raise TypeError(
-> 1669 f"{repr(type(self).__name__)} objects are mutable, "
1670 f"thus they cannot be hashed"
1671 )
TypeError: 'Series' objects are mutable, thus they cannot be hashed
3.1.6 随机抽样
df_sample = pd.DataFrame({'id':list('abcde'),
'value':[1,2,3,4,90]
})
df_sample
df_sample.sample(3,replace = True,weights = df_sample.value)
weights = df_sample.value