Datawhale Task03 索引 打卡

import numpy as np
import pandas as pd

3.1 索引器

3.1.1 表的列索引

df = pd.read_csv('joyful-pandas-master/data/learn_pandas.csv',usecols = ['School','Grade','Name','Gender','Weight','Transfer']) 
#usecoles表示读取列的集合
df['Name'].head() #通过[]来实现
0      Gaopeng Yang
1    Changqiang You
2           Mei Sun
3      Xiaojuan Sun
4       Gaojuan You
Name: Name, dtype: object
df[['Name','Gender']].head()  #通过[列名组成的列表来实现]
NameGender
0Gaopeng YangFemale
1Changqiang YouMale
2Mei SunMale
3Xiaojuan SunFemale
4Gaojuan YouMale
df.Name.head()
0      Gaopeng Yang
1    Changqiang You
2           Mei Sun
3      Xiaojuan Sun
4       Gaojuan You
Name: Name, dtype: object

3.1.2 序列的行索引

#以字符串为索引的Series
s = pd.Series([1,2,3,4,5,6],index = ['a','b','a','a','a','c'])
s
a    1
b    2
a    3
a    4
a    5
c    6
dtype: int64
s['c':'b':-2]  #???
c    6
a    4
b    2
dtype: int64
s['c':'b':-3]
c    6
a    3
dtype: int64
s['c':'b':-1]
c    6
a    5
a    4
a    3
b    2
dtype: int64
s['b']
2



s[['c','b']]
c    6
b    2
dtype: int64
s['c':'b':-2] #-2是步长

c    6
a    4
b    2
dtype: int64
s['c':'b':-3]
c    6
a    3
dtype: int64
s['c':'b':-3]
c    6
a    3
dtype: int64
s['c':'b':-1]
c    6
a    5
a    4
a    3
b    2
dtype: int64
#以整数为索引的Series
s = pd.Series(['a','b','c','d','e','f'],index = [1,3,1,2,5,4])
s
1    a
3    b
1    c
2    d
5    e
4    f
dtype: object
s[1]
1    a
1    c
dtype: object
s[[2,3]]
2    d
3    b
dtype: object
s[1:-1:2]  #从第2个元素到到最后一个元素,而且最后一个元素取不到,步长为2,所以取
        #索引是从0开始的!脑子瓦特了吧 
        #3 1 2 5 步长为2  所以取b d
3    b
2    d
dtype: object

3.1.3 loc索引器

df_demo = df.set_index('Name')

df_demo.head()
SchoolGradeGenderWeightTransfer
Name
Gaopeng YangShanghai Jiao Tong UniversityFreshmanFemale46.0N
Changqiang YouPeking UniversityFreshmanMale70.0N
Mei SunShanghai Jiao Tong UniversitySeniorMale89.0N
Xiaojuan SunFudan UniversitySophomoreFemale41.0N
Gaojuan YouFudan UniversitySophomoreMale74.0N
#*为单个元素
df_demo.loc['Qiang Sun']  #重复为dataframe
SchoolGradeGenderWeightTransfer
Name
Qiang SunTsinghua UniversityJuniorFemale53.0N
Qiang SunTsinghua UniversitySophomoreFemale40.0N
Qiang SunShanghai Jiao Tong UniversityJuniorFemaleNaNN
df_demo.loc['Quan Zhao']  #不重复为series
School      Shanghai Jiao Tong University
Grade                              Junior
Gender                             Female
Weight                                 53
Transfer                                N
Name: Quan Zhao, dtype: object
#同时选择行和列
df_demo.loc['Qiang Sun','School'] #返回Series
Name
Qiang Sun              Tsinghua University
Qiang Sun              Tsinghua University
Qiang Sun    Shanghai Jiao Tong University
Name: School, dtype: object
df_demo.loc['Quan Zhao','School']  #返回单个元素
'Shanghai Jiao Tong University'
# *为元素列表
df_demo.loc[['Quan Zhao','Qiang Sun'],['School','Gender']] #取出所有元素值对应的行和列
SchoolGender
Name
Quan ZhaoShanghai Jiao Tong UniversityFemale
Qiang SunTsinghua UniversityFemale
Qiang SunTsinghua UniversityFemale
Qiang SunShanghai Jiao Tong UniversityFemale
#*为切片
df_demo.loc['Gaojuan You':'Gaoqiang Qian','School':'Gender']
SchoolGradeGender
Name
Gaojuan YouFudan UniversitySophomoreMale
Xiaoli QianTsinghua UniversityFreshmanFemale
Qiang ChuShanghai Jiao Tong UniversityFreshmanFemale
Gaoqiang QianTsinghua UniversityJuniorFemale
df_loc_slice_demo = df_demo.copy()
df_loc_slice_demo.index = range(df_demo.shape[0],0,-1)  #倒序反着来
df_loc_slice_demo

SchoolGradeGenderWeightTransfer
200Shanghai Jiao Tong UniversityFreshmanFemale46.0N
199Peking UniversityFreshmanMale70.0N
198Shanghai Jiao Tong UniversitySeniorMale89.0N
197Fudan UniversitySophomoreFemale41.0N
196Fudan UniversitySophomoreMale74.0N
..................
5Fudan UniversityJuniorFemale46.0N
4Tsinghua UniversitySeniorFemale50.0N
3Shanghai Jiao Tong UniversitySeniorFemale45.0N
2Shanghai Jiao Tong UniversitySeniorMale71.0N
1Tsinghua UniversitySophomoreMale51.0N

200 rows × 5 columns

df_demo.shape[0]
200
df_loc_slice_demo.loc[5:3]
SchoolGradeGenderWeightTransfer
5Fudan UniversityJuniorFemale46.0N
4Tsinghua UniversitySeniorFemale50.0N
3Shanghai Jiao Tong UniversitySeniorFemale45.0N
df_loc_slice_demo.loc[3:5]
SchoolGradeGenderWeightTransfer
#*为布尔值列表
df_demo.loc[df_demo.Weight>70].head()
SchoolGradeGenderWeightTransfer
Name
Mei SunShanghai Jiao Tong UniversitySeniorMale89.0N
Gaojuan YouFudan UniversitySophomoreMale74.0N
Xiaopeng ZhouShanghai Jiao Tong UniversityFreshmanMale74.0N
Xiaofeng SunTsinghua UniversitySeniorMale71.0N
Qiang ZhengShanghai Jiao Tong UniversitySeniorMale87.0N
df_demo.loc[df_demo.Grade.isin(['Freshman','Senior'])].head()
SchoolGradeGenderWeightTransfer
Name
Gaopeng YangShanghai Jiao Tong UniversityFreshmanFemale46.0N
Changqiang YouPeking UniversityFreshmanMale70.0N
Mei SunShanghai Jiao Tong UniversitySeniorMale89.0N
Xiaoli QianTsinghua UniversityFreshmanFemale51.0N
Qiang ChuShanghai Jiao Tong UniversityFreshmanFemale52.0N
#复合条件
condition_1_1 = df_demo.School == 'Fudan University'
condition_1_2 = df_demo.Grade == 'Senior'
condition_1_3 = df_demo.Weight > 70
condition_1 = condition_1_1 & condition_1_2 &condition_1_3

condition_2_1 = df_demo.School == 'Peking University'
condition_2_2 = df_demo.Grade == 'Senior'
condition_2_3 = df_demo.Weight > 80
condition_2 = condition_2_1 & (~condition_2_2) & condition_2_3  #~是取反
df_demo.loc[condition_1 | condition_2]
SchoolGradeGenderWeightTransfer
Name
Qiang HanPeking UniversityFreshmanMale87.0N
Chengpeng ZhouFudan UniversitySeniorMale81.0N
Changpeng ZhaoPeking UniversityFreshmanMale83.0N
Chengpeng QianFudan UniversitySeniorMale73.0Y
#*为函数
def condition(x):
    condition_1_1 = x.School == 'Fudan University'
    condition_1_2 = x.Grade == 'Senior'
    condition_1_3 = x.Weight > 70
    condition_1 = condition_1_1 & condition_1_2 &condition_1_3
    condition_2_1 = x.School == 'Peking University'
    condition_2_2 = x.Grade == 'Senior'
    condition_2_3 = x.Weight > 80
    condition_2 = condition_2_1 & (~condition_2_2) & condition_2_3  #~是取反
    result = condition_1 | condition_2
    return result
df_demo.loc[condition]  
SchoolGradeGenderWeightTransfer
Name
Qiang HanPeking UniversityFreshmanMale87.0N
Chengpeng ZhouFudan UniversitySeniorMale81.0N
Changpeng ZhaoPeking UniversityFreshmanMale83.0N
Chengpeng QianFudan UniversitySeniorMale73.0Y
#lambda表达式
df_demo.loc[lambda x: 'Quan Zhao',lambda x:'Gender']
'Female'
df_demo.loc[lambda x: slice('Gaojuan You','Gaoqiang Qian')]
SchoolGradeGenderWeightTransfer
Name
Gaojuan YouFudan UniversitySophomoreMale74.0N
Xiaoli QianTsinghua UniversityFreshmanFemale51.0N
Qiang ChuShanghai Jiao Tong UniversityFreshmanFemale52.0N
Gaoqiang QianTsinghua UniversityJuniorFemale50.0N

不要使用链式赋值

df_chain = pd.DataFrame([[0,0],[1,0],[-1,0]],columns = list('AB'))
df_chain
AB
000
110
2-10

3.1.4 iloc索引器

iloc 的使用与loc类似,是针对位置进行筛选

df_demo.iloc[1,1]  #索引从0开始 

'Freshman'
df_demo.iloc[[0,1],[0,1]] #01行 01列 前两行 前两列
SchoolGrade
Name
Gaopeng YangShanghai Jiao Tong UniversityFreshman
Changqiang YouPeking UniversityFreshman
df_demo.iloc[1:4,2:4]  #2-4行 3-4列
GenderWeight
Name
Changqiang YouMale70.0
Mei SunMale89.0
Xiaojuan SunFemale41.0
df_demo
SchoolGradeGenderWeightTransfer
Name
Gaopeng YangShanghai Jiao Tong UniversityFreshmanFemale46.0N
Changqiang YouPeking UniversityFreshmanMale70.0N
Mei SunShanghai Jiao Tong UniversitySeniorMale89.0N
Xiaojuan SunFudan UniversitySophomoreFemale41.0N
Gaojuan YouFudan UniversitySophomoreMale74.0N
..................
Xiaojuan SunFudan UniversityJuniorFemale46.0N
Li ZhaoTsinghua UniversitySeniorFemale50.0N
Chengqiang ChuShanghai Jiao Tong UniversitySeniorFemale45.0N
Chengmei ShenShanghai Jiao Tong UniversitySeniorMale71.0N
Chunpeng LvTsinghua UniversitySophomoreMale51.0N

200 rows × 5 columns

df_demo.iloc[lambda x:slice(1,4)] #234行
SchoolGradeGenderWeightTransfer
Name
Changqiang YouPeking UniversityFreshmanMale70.0N
Mei SunShanghai Jiao Tong UniversitySeniorMale89.0N
Xiaojuan SunFudan UniversitySophomoreFemale41.0N
#布尔列表
df_demo.iloc[(df_demo.Weight>80).values].head()
SchoolGradeGenderWeightTransfer
Name
Mei SunShanghai Jiao Tong UniversitySeniorMale89.0N
Qiang ZhengShanghai Jiao Tong UniversitySeniorMale87.0N
Qiang HanPeking UniversityFreshmanMale87.0N
Chengpeng ZhouFudan UniversitySeniorMale81.0N
Feng HanShanghai Jiao Tong UniversitySophomoreMale82.0N
#Series同样也可以通过iloc返回相应位置的值或子序列
df_demo.School.iloc[1]
'Peking University'
df_demo.School.iloc[1:5:2]
Name
Changqiang You    Peking University
Xiaojuan Sun       Fudan University
Name: School, dtype: object

3.1.5 query方法

#简化前面的loc索引器 布尔列表 复合条件
df.query('((School == "Fudan University")&'
         '(Grade == "Senior")&'
         '(Weight > 70))|'
         '((School == "Peking University")&'
         '(Grade != "Senior")&'
         '(Weight > 80))'
)
SchoolGradeNameGenderWeightTransfer
38Peking UniversityFreshmanQiang HanMale87.0N
66Fudan UniversitySeniorChengpeng ZhouMale81.0N
99Peking UniversityFreshmanChangpeng ZhaoMale83.0N
131Fudan UniversitySeniorChengpeng QianMale73.0Y
df.query('Weight > Weight.mean()').head()
SchoolGradeNameGenderWeightTransfer
1Peking UniversityFreshmanChangqiang YouMale70.0N
2Shanghai Jiao Tong UniversitySeniorMei SunMale89.0N
4Fudan UniversitySophomoreGaojuan YouMale74.0N
10Shanghai Jiao Tong UniversityFreshmanXiaopeng ZhouMale74.0N
14Tsinghua UniversitySeniorXiaomei ZhouFemale57.0N
df.query('(Grade not in ["Freshman","Sophomore"]) and ' 
        '(Gender == "Male")').head()
SchoolGradeNameGenderWeightTransfer
2Shanghai Jiao Tong UniversitySeniorMei SunMale89.0N
16Tsinghua UniversityJuniorXiaoqiang QinMale68.0N
17Tsinghua UniversityJuniorPeng WangMale65.0N
18Tsinghua UniversitySeniorXiaofeng SunMale71.0N
21Shanghai Jiao Tong UniversitySeniorXiaopeng ShenMale62.0NaN
df.query('Grade == ["Junior","Senior"]').head()
SchoolGradeNameGenderWeightTransfer
2Shanghai Jiao Tong UniversitySeniorMei SunMale89.0N
7Tsinghua UniversityJuniorGaoqiang QianFemale50.0N
9Peking UniversityJuniorJuan XuFemaleNaNN
11Tsinghua UniversityJuniorXiaoquan LvFemale43.0N
12Shanghai Jiao Tong UniversitySeniorPeng YouFemale48.0NaN
#引用外部变量
low,high =70,80
df.query('Weight.between(@low,@high)').head()  #?
---------------------------------------------------------------------------

TypeError                                 Traceback (most recent call last)

<ipython-input-82-f7e7d81043c7> in <module>
      1 #引用外部变量
      2 low,high =70,80
----> 3 df.query('Weight.between(@low,@high)').head()


D:\anaconda\lib\site-packages\pandas\core\frame.py in query(self, expr, inplace, **kwargs)
   3343         kwargs["level"] = kwargs.pop("level", 0) + 1
   3344         kwargs["target"] = None
-> 3345         res = self.eval(expr, **kwargs)
   3346 
   3347         try:


D:\anaconda\lib\site-packages\pandas\core\frame.py in eval(self, expr, inplace, **kwargs)
   3473         kwargs["resolvers"] = kwargs.get("resolvers", ()) + tuple(resolvers)
   3474 
-> 3475         return _eval(expr, inplace=inplace, **kwargs)
   3476 
   3477     def select_dtypes(self, include=None, exclude=None) -> "DataFrame":


D:\anaconda\lib\site-packages\pandas\core\computation\eval.py in eval(expr, parser, engine, truediv, local_dict, global_dict, resolvers, level, target, inplace)
    344         eng = _engines[engine]
    345         eng_inst = eng(parsed_expr)
--> 346         ret = eng_inst.evaluate()
    347 
    348         if parsed_expr.assigner is None:


D:\anaconda\lib\site-packages\pandas\core\computation\engines.py in evaluate(self)
     71 
     72         # make sure no names in resolvers and locals/globals clash
---> 73         res = self._evaluate()
     74         return reconstruct_object(
     75             self.result_type, res, self.aligned_axes, self.expr.terms.return_type


D:\anaconda\lib\site-packages\pandas\core\computation\engines.py in _evaluate(self)
    111         env = self.expr.env
    112         scope = env.full_scope
--> 113         _check_ne_builtin_clash(self.expr)
    114         return ne.evaluate(s, local_dict=scope)
    115 


D:\anaconda\lib\site-packages\pandas\core\computation\engines.py in _check_ne_builtin_clash(expr)
     27         Terms can contain
     28     """
---> 29     names = expr.names
     30     overlap = names & _ne_builtins
     31 


D:\anaconda\lib\site-packages\pandas\core\computation\expr.py in names(self)
    812         """
    813         if is_term(self.terms):
--> 814             return frozenset([self.terms.name])
    815         return frozenset(term.name for term in com.flatten(self.terms))
    816 


D:\anaconda\lib\site-packages\pandas\core\generic.py in __hash__(self)
   1667     def __hash__(self):
   1668         raise TypeError(
-> 1669             f"{repr(type(self).__name__)} objects are mutable, "
   1670             f"thus they cannot be hashed"
   1671         )


TypeError: 'Series' objects are mutable, thus they cannot be hashed

3.1.6 随机抽样

df_sample = pd.DataFrame({'id':list('abcde'),
                          'value':[1,2,3,4,90]
    
})
df_sample
idvalue
0a1
1b2
2c3
3d4
4e90
df_sample.sample(3,replace = True,weights = df_sample.value)  #以value值的相对大小为抽样概率进行有放回的抽样,比如说e = 90,值很大,被抽到的概率就很高。
idvalue
4e90
4e90
4e90
weights = df_sample.value


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值