pandas数据分析常用方法总结(数据集可以自己更改)

1.导入库

#导入库
import numpy as np
import pandas as pd

2.查看数据

#读取文件
data = pd.read_csv('titanic_data.csv')
data
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS
5603Moran, Mr. JamesmaleNaN003308778.4583NaNQ
6701McCarthy, Mr. Timothy Jmale54.0001746351.8625E46S
7803Palsson, Master. Gosta Leonardmale2.03134990921.0750NaNS
8913Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)female27.00234774211.1333NaNS
91012Nasser, Mrs. Nicholas (Adele Achem)female14.01023773630.0708NaNC
101113Sandstrom, Miss. Marguerite Rutfemale4.011PP 954916.7000G6S
111211Bonnell, Miss. Elizabethfemale58.00011378326.5500C103S
121303Saundercock, Mr. William Henrymale20.000A/5. 21518.0500NaNS
131403Andersson, Mr. Anders Johanmale39.01534708231.2750NaNS
141503Vestrom, Miss. Hulda Amanda Adolfinafemale14.0003504067.8542NaNS
151612Hewlett, Mrs. (Mary D Kingcome)female55.00024870616.0000NaNS
161703Rice, Master. Eugenemale2.04138265229.1250NaNQ
171812Williams, Mr. Charles EugenemaleNaN0024437313.0000NaNS
181903Vander Planke, Mrs. Julius (Emelia Maria Vande...female31.01034576318.0000NaNS
192013Masselmani, Mrs. FatimafemaleNaN0026497.2250NaNC
202102Fynney, Mr. Joseph Jmale35.00023986526.0000NaNS
212212Beesley, Mr. Lawrencemale34.00024869813.0000D56S
222313McGowan, Miss. Anna "Annie"female15.0003309238.0292NaNQ
232411Sloper, Mr. William Thompsonmale28.00011378835.5000A6S
242503Palsson, Miss. Torborg Danirafemale8.03134990921.0750NaNS
252613Asplund, Mrs. Carl Oscar (Selma Augusta Emilia...female38.01534707731.3875NaNS
262703Emir, Mr. Farred ChehabmaleNaN0026317.2250NaNC
272801Fortune, Mr. Charles Alexandermale19.03219950263.0000C23 C25 C27S
282913O'Dwyer, Miss. Ellen "Nellie"femaleNaN003309597.8792NaNQ
293003Todoroff, Mr. LaliomaleNaN003492167.8958NaNS
.......................................
86186202Giles, Mr. Frederick Edwardmale21.0102813411.5000NaNS
86286311Swift, Mrs. Frederick Joel (Margaret Welles Ba...female48.0001746625.9292D17S
86386403Sage, Miss. Dorothy Edith "Dolly"femaleNaN82CA. 234369.5500NaNS
86486502Gill, Mr. John Williammale24.00023386613.0000NaNS
86586612Bystrom, Mrs. (Karolina)female42.00023685213.0000NaNS
86686712Duran y More, Miss. Asuncionfemale27.010SC/PARIS 214913.8583NaNC
86786801Roebling, Mr. Washington Augustus IImale31.000PC 1759050.4958A24S
86886903van Melkebeke, Mr. PhilemonmaleNaN003457779.5000NaNS
86987013Johnson, Master. Harold Theodormale4.01134774211.1333NaNS
87087103Balkic, Mr. Cerinmale26.0003492487.8958NaNS
87187211Beckwith, Mrs. Richard Leonard (Sallie Monypeny)female47.0111175152.5542D35S
87287301Carlsson, Mr. Frans Olofmale33.0006955.0000B51 B53 B55S
87387403Vander Cruyssen, Mr. Victormale47.0003457659.0000NaNS
87487512Abelson, Mrs. Samuel (Hannah Wizosky)female28.010P/PP 338124.0000NaNC
87587613Najib, Miss. Adele Kiamie "Jane"female15.00026677.2250NaNC
87687703Gustafsson, Mr. Alfred Ossianmale20.00075349.8458NaNS
87787803Petroff, Mr. Nedeliomale19.0003492127.8958NaNS
87887903Laleff, Mr. KristomaleNaN003492177.8958NaNS
87988011Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)female56.0011176783.1583C50C
88088112Shelley, Mrs. William (Imanita Parrish Hall)female25.00123043326.0000NaNS
88188203Markun, Mr. Johannmale33.0003492577.8958NaNS
88288303Dahlberg, Miss. Gerda Ulrikafemale22.000755210.5167NaNS
88388402Banfield, Mr. Frederick Jamesmale28.000C.A./SOTON 3406810.5000NaNS
88488503Sutehall, Mr. Henry Jrmale25.000SOTON/OQ 3920767.0500NaNS
88588603Rice, Mrs. William (Margaret Norton)female39.00538265229.1250NaNQ
88688702Montvila, Rev. Juozasmale27.00021153613.0000NaNS
88788811Graham, Miss. Margaret Edithfemale19.00011205330.0000B42S
88888903Johnston, Miss. Catherine Helen "Carrie"femaleNaN12W./C. 660723.4500NaNS
88989011Behr, Mr. Karl Howellmale26.00011136930.0000C148C
89089103Dooley, Mr. Patrickmale32.0003703767.7500NaNQ

891 rows × 12 columns

#查看数据维度
data.shape
(891, 12)
#查看数据基本信息
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
PassengerId    891 non-null int64
Survived       891 non-null int64
Pclass         891 non-null int64
Name           891 non-null object
Sex            891 non-null object
Age            714 non-null float64
SibSp          891 non-null int64
Parch          891 non-null int64
Ticket         891 non-null object
Fare           891 non-null float64
Cabin          204 non-null object
Embarked       889 non-null object
dtypes: float64(2), int64(5), object(5)
memory usage: 83.6+ KB
#查看空值.把函数分步运行理解
data.isnull().sum().sort_values(ascending=False)
Cabin          687
Age            177
Embarked         2
Fare             0
Ticket           0
Parch            0
SibSp            0
Sex              0
Name             0
Pclass           0
Survived         0
PassengerId      0
dtype: int64
#查看每一列的数据格式
data.dtypes
PassengerId      int64
Survived         int64
Pclass           int64
Name            object
Sex             object
Age            float64
SibSp            int64
Parch            int64
Ticket          object
Fare           float64
Cabin           object
Embarked        object
dtype: object
#查看具体每一列的的格式
data['Sex'].dtype
dtype('O')
#查看整个表的空值
data.isnull()
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
1FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
2FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
3FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
4FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
5FalseFalseFalseFalseFalseTrueFalseFalseFalseFalseTrueFalse
6FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
7FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
8FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
9FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
10FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
11FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
12FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
13FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
14FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
15FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
16FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
17FalseFalseFalseFalseFalseTrueFalseFalseFalseFalseTrueFalse
18FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
19FalseFalseFalseFalseFalseTrueFalseFalseFalseFalseTrueFalse
20FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
21FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
22FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
23FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
24FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
25FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
26FalseFalseFalseFalseFalseTrueFalseFalseFalseFalseTrueFalse
27FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
28FalseFalseFalseFalseFalseTrueFalseFalseFalseFalseTrueFalse
29FalseFalseFalseFalseFalseTrueFalseFalseFalseFalseTrueFalse
.......................................
861FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
862FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
863FalseFalseFalseFalseFalseTrueFalseFalseFalseFalseTrueFalse
864FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
865FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
866FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
867FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
868FalseFalseFalseFalseFalseTrueFalseFalseFalseFalseTrueFalse
869FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
870FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
871FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
872FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
873FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
874FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
875FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
876FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
877FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
878FalseFalseFalseFalseFalseTrueFalseFalseFalseFalseTrueFalse
879FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
880FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
881FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
882FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
883FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
884FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
885FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
886FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
887FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
888FalseFalseFalseFalseFalseTrueFalseFalseFalseFalseTrueFalse
889FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
890FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse

891 rows × 12 columns

#查看具体某一列的空值
data['Age'].isnull()
0      False
1      False
2      False
3      False
4      False
5       True
6      False
7      False
8      False
9      False
10     False
11     False
12     False
13     False
14     False
15     False
16     False
17      True
18     False
19      True
20     False
21     False
22     False
23     False
24     False
25     False
26      True
27     False
28      True
29      True
       ...  
861    False
862    False
863     True
864    False
865    False
866    False
867    False
868     True
869    False
870    False
871    False
872    False
873    False
874    False
875    False
876    False
877    False
878     True
879    False
880    False
881    False
882    False
883    False
884    False
885    False
886    False
887    False
888     True
889    False
890    False
Name: Age, Length: 891, dtype: bool
#查看某列的取值种类
data['Parch'].unique()
array([0, 1, 2, 5, 3, 4, 6], dtype=int64)
#查看某一列(值不同)的值分布
data['Parch'].value_counts(dropna=False) #不把空值丢弃
0    678
1    118
2     80
5      5
3      5
4      4
6      1
Name: Parch, dtype: int64
#查看数据表的值,就是不包括表头和索引
data.values
array([[1, 0, 3, ..., 7.25, nan, 'S'],
       [2, 1, 1, ..., 71.2833, 'C85', 'C'],
       [3, 1, 3, ..., 7.925, nan, 'S'],
       ...,
       [889, 0, 3, ..., 23.45, nan, 'S'],
       [890, 1, 1, ..., 30.0, 'C148', 'C'],
       [891, 0, 3, ..., 7.75, nan, 'Q']], dtype=object)
#查看前几行
data.head(4)
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
#查看后几行
data.tail(4)
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
88788811Graham, Miss. Margaret Edithfemale19.00011205330.00B42S
88888903Johnston, Miss. Catherine Helen "Carrie"femaleNaN12W./C. 660723.45NaNS
88989011Behr, Mr. Karl Howellmale26.00011136930.00C148C
89089103Dooley, Mr. Patrickmale32.0003703767.75NaNQ

3.清洗数据

#-----------------------------------------以上是查看数据
#-----------------------------------------以下是清洗数据
#用0填充空值
data['Age'].fillna(value=0)
0      22.0
1      38.0
2      26.0
3      35.0
4      35.0
5       0.0
6      54.0
7       2.0
8      27.0
9      14.0
10      4.0
11     58.0
12     20.0
13     39.0
14     14.0
15     55.0
16      2.0
17      0.0
18     31.0
19      0.0
20     35.0
21     34.0
22     15.0
23     28.0
24      8.0
25     38.0
26      0.0
27     19.0
28      0.0
29      0.0
       ... 
861    21.0
862    48.0
863     0.0
864    24.0
865    42.0
866    27.0
867    31.0
868     0.0
869     4.0
870    26.0
871    47.0
872    33.0
873    47.0
874    28.0
875    15.0
876    20.0
877    19.0
878     0.0
879    56.0
880    25.0
881    33.0
882    22.0
883    28.0
884    25.0
885    39.0
886    27.0
887    19.0
888     0.0
889    26.0
890    32.0
Name: Age, Length: 891, dtype: float64
#使用空值前面一个数填充数据(真正的表数据没变)
data['Age'].fillna(method='ffill')
0      22.0
1      38.0
2      26.0
3      35.0
4      35.0
5      35.0
6      54.0
7       2.0
8      27.0
9      14.0
10      4.0
11     58.0
12     20.0
13     39.0
14     14.0
15     55.0
16      2.0
17      2.0
18     31.0
19     31.0
20     35.0
21     34.0
22     15.0
23     28.0
24      8.0
25     38.0
26     38.0
27     19.0
28     19.0
29     19.0
       ... 
861    21.0
862    48.0
863    48.0
864    24.0
865    42.0
866    27.0
867    31.0
868    31.0
869     4.0
870    26.0
871    47.0
872    33.0
873    47.0
874    28.0
875    15.0
876    20.0
877    19.0
878    19.0
879    56.0
880    25.0
881    33.0
882    22.0
883    28.0
884    25.0
885    39.0
886    27.0
887    19.0
888    19.0
889    26.0
890    32.0
Name: Age, Length: 891, dtype: float64
#使用空值后面一个数填充数据(真正的表数据没变)
data['Age'].fillna(method='bfill')
0      22.0
1      38.0
2      26.0
3      35.0
4      35.0
5      54.0
6      54.0
7       2.0
8      27.0
9      14.0
10      4.0
11     58.0
12     20.0
13     39.0
14     14.0
15     55.0
16      2.0
17     31.0
18     31.0
19     35.0
20     35.0
21     34.0
22     15.0
23     28.0
24      8.0
25     38.0
26     19.0
27     19.0
28     40.0
29     40.0
       ... 
861    21.0
862    48.0
863    24.0
864    24.0
865    42.0
866    27.0
867    31.0
868     4.0
869     4.0
870    26.0
871    47.0
872    33.0
873    47.0
874    28.0
875    15.0
876    20.0
877    19.0
878    56.0
879    56.0
880    25.0
881    33.0
882    22.0
883    28.0
884    25.0
885    39.0
886    27.0
887    19.0
888    26.0
889    26.0
890    32.0
Name: Age, Length: 891, dtype: float64
#查看元数据
data['Age']
0      22.0
1      38.0
2      26.0
3      35.0
4      35.0
5       NaN
6      54.0
7       2.0
8      27.0
9      14.0
10      4.0
11     58.0
12     20.0
13     39.0
14     14.0
15     55.0
16      2.0
17      NaN
18     31.0
19      NaN
20     35.0
21     34.0
22     15.0
23     28.0
24      8.0
25     38.0
26      NaN
27     19.0
28      NaN
29      NaN
       ... 
861    21.0
862    48.0
863     NaN
864    24.0
865    42.0
866    27.0
867    31.0
868     NaN
869     4.0
870    26.0
871    47.0
872    33.0
873    47.0
874    28.0
875    15.0
876    20.0
877    19.0
878     NaN
879    56.0
880    25.0
881    33.0
882    22.0
883    28.0
884    25.0
885    39.0
886    27.0
887    19.0
888     NaN
889    26.0
890    32.0
Name: Age, Length: 891, dtype: float64
#空值依然是空值,不影响原数据
#因为填充时有个inplace参数,该参数默认为False不改变原数据,若要改变原数据需要设置为True
#使用均值改变原数据空值
data['Age'].fillna(data['Age'].mean(),inplace=True)
data['Age']
0      22.000000
1      38.000000
2      26.000000
3      35.000000
4      35.000000
5      29.699118
6      54.000000
7       2.000000
8      27.000000
9      14.000000
10      4.000000
11     58.000000
12     20.000000
13     39.000000
14     14.000000
15     55.000000
16      2.000000
17     29.699118
18     31.000000
19     29.699118
20     35.000000
21     34.000000
22     15.000000
23     28.000000
24      8.000000
25     38.000000
26     29.699118
27     19.000000
28     29.699118
29     29.699118
         ...    
861    21.000000
862    48.000000
863    29.699118
864    24.000000
865    42.000000
866    27.000000
867    31.000000
868    29.699118
869     4.000000
870    26.000000
871    47.000000
872    33.000000
873    47.000000
874    28.000000
875    15.000000
876    20.000000
877    19.000000
878    29.699118
879    56.000000
880    25.000000
881    33.000000
882    22.000000
883    28.000000
884    25.000000
885    39.000000
886    27.000000
887    19.000000
888    29.699118
889    26.000000
890    32.000000
Name: Age, Length: 891, dtype: float64
#清除字符空格
'  a bb'.strip()
#data['a']=data['a'].map(str.strip())   对于a列的每一个字符都进行操作
'a bb'
data['Name']=data['Name'].map(str.strip)
data['Name']
0                                Braund, Mr. Owen Harris
1      Cumings, Mrs. John Bradley (Florence Briggs Th...
2                                 Heikkinen, Miss. Laina
3           Futrelle, Mrs. Jacques Heath (Lily May Peel)
4                               Allen, Mr. William Henry
5                                       Moran, Mr. James
6                                McCarthy, Mr. Timothy J
7                         Palsson, Master. Gosta Leonard
8      Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)
9                    Nasser, Mrs. Nicholas (Adele Achem)
10                       Sandstrom, Miss. Marguerite Rut
11                              Bonnell, Miss. Elizabeth
12                        Saundercock, Mr. William Henry
13                           Andersson, Mr. Anders Johan
14                  Vestrom, Miss. Hulda Amanda Adolfina
15                       Hewlett, Mrs. (Mary D Kingcome)
16                                  Rice, Master. Eugene
17                          Williams, Mr. Charles Eugene
18     Vander Planke, Mrs. Julius (Emelia Maria Vande...
19                               Masselmani, Mrs. Fatima
20                                  Fynney, Mr. Joseph J
21                                 Beesley, Mr. Lawrence
22                           McGowan, Miss. Anna "Annie"
23                          Sloper, Mr. William Thompson
24                         Palsson, Miss. Torborg Danira
25     Asplund, Mrs. Carl Oscar (Selma Augusta Emilia...
26                               Emir, Mr. Farred Chehab
27                        Fortune, Mr. Charles Alexander
28                         O'Dwyer, Miss. Ellen "Nellie"
29                                   Todoroff, Mr. Lalio
                             ...                        
861                          Giles, Mr. Frederick Edward
862    Swift, Mrs. Frederick Joel (Margaret Welles Ba...
863                    Sage, Miss. Dorothy Edith "Dolly"
864                               Gill, Mr. John William
865                             Bystrom, Mrs. (Karolina)
866                         Duran y More, Miss. Asuncion
867                 Roebling, Mr. Washington Augustus II
868                          van Melkebeke, Mr. Philemon
869                      Johnson, Master. Harold Theodor
870                                    Balkic, Mr. Cerin
871     Beckwith, Mrs. Richard Leonard (Sallie Monypeny)
872                             Carlsson, Mr. Frans Olof
873                          Vander Cruyssen, Mr. Victor
874                Abelson, Mrs. Samuel (Hannah Wizosky)
875                     Najib, Miss. Adele Kiamie "Jane"
876                        Gustafsson, Mr. Alfred Ossian
877                                 Petroff, Mr. Nedelio
878                                   Laleff, Mr. Kristo
879        Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)
880         Shelley, Mrs. William (Imanita Parrish Hall)
881                                   Markun, Mr. Johann
882                         Dahlberg, Miss. Gerda Ulrika
883                        Banfield, Mr. Frederick James
884                               Sutehall, Mr. Henry Jr
885                 Rice, Mrs. William (Margaret Norton)
886                                Montvila, Rev. Juozas
887                         Graham, Miss. Margaret Edith
888             Johnston, Miss. Catherine Helen "Carrie"
889                                Behr, Mr. Karl Howell
890                                  Dooley, Mr. Patrick
Name: Name, Length: 891, dtype: object
#大小写转换
data['Name']=data['Name'].str.lower()
data['Name']
0                                braund, mr. owen harris
1      cumings, mrs. john bradley (florence briggs th...
2                                 heikkinen, miss. laina
3           futrelle, mrs. jacques heath (lily may peel)
4                               allen, mr. william henry
5                                       moran, mr. james
6                                mccarthy, mr. timothy j
7                         palsson, master. gosta leonard
8      johnson, mrs. oscar w (elisabeth vilhelmina berg)
9                    nasser, mrs. nicholas (adele achem)
10                       sandstrom, miss. marguerite rut
11                              bonnell, miss. elizabeth
12                        saundercock, mr. william henry
13                           andersson, mr. anders johan
14                  vestrom, miss. hulda amanda adolfina
15                       hewlett, mrs. (mary d kingcome)
16                                  rice, master. eugene
17                          williams, mr. charles eugene
18     vander planke, mrs. julius (emelia maria vande...
19                               masselmani, mrs. fatima
20                                  fynney, mr. joseph j
21                                 beesley, mr. lawrence
22                           mcgowan, miss. anna "annie"
23                          sloper, mr. william thompson
24                         palsson, miss. torborg danira
25     asplund, mrs. carl oscar (selma augusta emilia...
26                               emir, mr. farred chehab
27                        fortune, mr. charles alexander
28                         o'dwyer, miss. ellen "nellie"
29                                   todoroff, mr. lalio
                             ...                        
861                          giles, mr. frederick edward
862    swift, mrs. frederick joel (margaret welles ba...
863                    sage, miss. dorothy edith "dolly"
864                               gill, mr. john william
865                             bystrom, mrs. (karolina)
866                         duran y more, miss. asuncion
867                 roebling, mr. washington augustus ii
868                          van melkebeke, mr. philemon
869                      johnson, master. harold theodor
870                                    balkic, mr. cerin
871     beckwith, mrs. richard leonard (sallie monypeny)
872                             carlsson, mr. frans olof
873                          vander cruyssen, mr. victor
874                abelson, mrs. samuel (hannah wizosky)
875                     najib, miss. adele kiamie "jane"
876                        gustafsson, mr. alfred ossian
877                                 petroff, mr. nedelio
878                                   laleff, mr. kristo
879        potter, mrs. thomas jr (lily alexenia wilson)
880         shelley, mrs. william (imanita parrish hall)
881                                   markun, mr. johann
882                         dahlberg, miss. gerda ulrika
883                        banfield, mr. frederick james
884                               sutehall, mr. henry jr
885                 rice, mrs. william (margaret norton)
886                                montvila, rev. juozas
887                         graham, miss. margaret edith
888             johnston, miss. catherine helen "carrie"
889                                behr, mr. karl howell
890                                  dooley, mr. patrick
Name: Name, Length: 891, dtype: object
#更改数据格式
data['Survived'].dtype
dtype('int64')
data['Survived'].astype('str')
0      0
1      1
2      1
3      1
4      0
5      0
6      0
7      0
8      1
9      1
10     1
11     1
12     0
13     0
14     0
15     1
16     0
17     1
18     0
19     1
20     0
21     1
22     1
23     1
24     0
25     1
26     0
27     0
28     1
29     0
      ..
861    0
862    1
863    0
864    0
865    1
866    1
867    0
868    0
869    1
870    0
871    1
872    0
873    0
874    1
875    1
876    0
877    0
878    0
879    1
880    1
881    0
882    0
883    0
884    0
885    0
886    0
887    1
888    0
889    1
890    0
Name: Survived, Length: 891, dtype: object
#改变列名
data.rename(columns={'Sex':'性别 ','Name':'姓名'}).head()
PassengerIdSurvivedPclass姓名性别AgeSibSpParchTicketFareCabinEmbarked
0103braund, mr. owen harrismale22.010A/5 211717.2500NaNS
1211cumings, mrs. john bradley (florence briggs th...female38.010PC 1759971.2833C85C
2313heikkinen, miss. lainafemale26.000STON/O2. 31012827.9250NaNS
3411futrelle, mrs. jacques heath (lily may peel)female35.01011380353.1000C123S
4503allen, mr. william henrymale35.0003734508.0500NaNS
#打印列名(依然不会在原表上改)
data.columns
Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')
#删除某列重复值(删除后出现的)
data['Sex'].drop_duplicates()
0      male
1    female
Name: Sex, dtype: object
#删除某列重复值(删除先出现的)保留后面的
data['Sex'].drop_duplicates(keep='last')
888    female
890      male
Name: Sex, dtype: object
#数据替换
data['Survived'].replace(0,'died')
0      died
1         1
2         1
3         1
4      died
5      died
6      died
7      died
8         1
9         1
10        1
11        1
12     died
13     died
14     died
15        1
16     died
17        1
18     died
19        1
20     died
21        1
22        1
23        1
24     died
25        1
26     died
27     died
28        1
29     died
       ... 
861    died
862       1
863    died
864    died
865       1
866       1
867    died
868    died
869       1
870    died
871       1
872    died
873    died
874       1
875       1
876    died
877    died
878    died
879       1
880       1
881    died
882    died
883    died
884    died
885    died
886    died
887       1
888    died
889       1
890    died
Name: Survived, Length: 891, dtype: object

4.数据合并

#--------------------------------------数据合并
data1= pd.DataFrame({'PassengerId':[9,19,29,39999],
                    'Sur':[0,1,0,1],
                    'Name':['xiaoming','xiaohong','xiaozhang','xiaozhao'],
                    'time':[10.0,22.0,30.0,40.0]})
data1
PassengerIdSurNametime
090xiaoming10.0
1191xiaohong22.0
2290xiaozhang30.0
3399991xiaozhao40.0
#merge合并两张表
data_inner = pd.merge(data,data1,on='PassengerId',how='inner')#根据'PassengerId'字段合并,取交集,也就是内连接
data_inner
PassengerIdSurvivedPclassName_xSexAgeSibSpParchTicketFareCabinEmbarkedSurName_ytime
0913johnson, mrs. oscar w (elisabeth vilhelmina berg)female27.0000000234774211.1333NaNS0xiaoming10.0
11903vander planke, mrs. julius (emelia maria vande...female31.0000001034576318.0000NaNS1xiaohong22.0
22913o'dwyer, miss. ellen "nellie"female29.699118003309597.8792NaNQ0xiaozhang30.0
#左连接
data_left = pd.merge(data,data1,on='PassengerId',how='left')
data_left.head(30)
PassengerIdSurvivedPclassName_xSexAgeSibSpParchTicketFareCabinEmbarkedSurName_ytime
0103braund, mr. owen harrismale22.00000010A/5 211717.2500NaNSNaNNaNNaN
1211cumings, mrs. john bradley (florence briggs th...female38.00000010PC 1759971.2833C85CNaNNaNNaN
2313heikkinen, miss. lainafemale26.00000000STON/O2. 31012827.9250NaNSNaNNaNNaN
3411futrelle, mrs. jacques heath (lily may peel)female35.0000001011380353.1000C123SNaNNaNNaN
4503allen, mr. william henrymale35.000000003734508.0500NaNSNaNNaNNaN
5603moran, mr. jamesmale29.699118003308778.4583NaNQNaNNaNNaN
6701mccarthy, mr. timothy jmale54.000000001746351.8625E46SNaNNaNNaN
7803palsson, master. gosta leonardmale2.0000003134990921.0750NaNSNaNNaNNaN
8913johnson, mrs. oscar w (elisabeth vilhelmina berg)female27.0000000234774211.1333NaNS0.0xiaoming10.0
91012nasser, mrs. nicholas (adele achem)female14.0000001023773630.0708NaNCNaNNaNNaN
101113sandstrom, miss. marguerite rutfemale4.00000011PP 954916.7000G6SNaNNaNNaN
111211bonnell, miss. elizabethfemale58.0000000011378326.5500C103SNaNNaNNaN
121303saundercock, mr. william henrymale20.00000000A/5. 21518.0500NaNSNaNNaNNaN
131403andersson, mr. anders johanmale39.0000001534708231.2750NaNSNaNNaNNaN
141503vestrom, miss. hulda amanda adolfinafemale14.000000003504067.8542NaNSNaNNaNNaN
151612hewlett, mrs. (mary d kingcome)female55.0000000024870616.0000NaNSNaNNaNNaN
161703rice, master. eugenemale2.0000004138265229.1250NaNQNaNNaNNaN
171812williams, mr. charles eugenemale29.6991180024437313.0000NaNSNaNNaNNaN
181903vander planke, mrs. julius (emelia maria vande...female31.0000001034576318.0000NaNS1.0xiaohong22.0
192013masselmani, mrs. fatimafemale29.6991180026497.2250NaNCNaNNaNNaN
202102fynney, mr. joseph jmale35.0000000023986526.0000NaNSNaNNaNNaN
212212beesley, mr. lawrencemale34.0000000024869813.0000D56SNaNNaNNaN
222313mcgowan, miss. anna "annie"female15.000000003309238.0292NaNQNaNNaNNaN
232411sloper, mr. william thompsonmale28.0000000011378835.5000A6SNaNNaNNaN
242503palsson, miss. torborg danirafemale8.0000003134990921.0750NaNSNaNNaNNaN
252613asplund, mrs. carl oscar (selma augusta emilia...female38.0000001534707731.3875NaNSNaNNaNNaN
262703emir, mr. farred chehabmale29.6991180026317.2250NaNCNaNNaNNaN
272801fortune, mr. charles alexandermale19.0000003219950263.0000C23 C25 C27SNaNNaNNaN
282913o'dwyer, miss. ellen "nellie"female29.699118003309597.8792NaNQ0.0xiaozhang30.0
293003todoroff, mr. laliomale29.699118003492167.8958NaNSNaNNaNNaN
#右连接
data_right = pd.merge(data,data1,on='PassengerId',how='right')
data_right
PassengerIdSurvivedPclassName_xSexAgeSibSpParchTicketFareCabinEmbarkedSurName_ytime
091.03.0johnson, mrs. oscar w (elisabeth vilhelmina berg)female27.0000000.02.034774211.1333NaNS0xiaoming10.0
1190.03.0vander planke, mrs. julius (emelia maria vande...female31.0000001.00.034576318.0000NaNS1xiaohong22.0
2291.03.0o'dwyer, miss. ellen "nellie"female29.6991180.00.03309597.8792NaNQ0xiaozhang30.0
339999NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1xiaozhao40.0
#全连接
data_outer = pd.merge(data,data1,on='PassengerId',how='outer')
data_outer.head(20)#打印前20条看
PassengerIdSurvivedPclassName_xSexAgeSibSpParchTicketFareCabinEmbarkedSurName_ytime
010.03.0braund, mr. owen harrismale22.0000001.00.0A/5 211717.2500NaNSNaNNaNNaN
121.01.0cumings, mrs. john bradley (florence briggs th...female38.0000001.00.0PC 1759971.2833C85CNaNNaNNaN
231.03.0heikkinen, miss. lainafemale26.0000000.00.0STON/O2. 31012827.9250NaNSNaNNaNNaN
341.01.0futrelle, mrs. jacques heath (lily may peel)female35.0000001.00.011380353.1000C123SNaNNaNNaN
450.03.0allen, mr. william henrymale35.0000000.00.03734508.0500NaNSNaNNaNNaN
560.03.0moran, mr. jamesmale29.6991180.00.03308778.4583NaNQNaNNaNNaN
670.01.0mccarthy, mr. timothy jmale54.0000000.00.01746351.8625E46SNaNNaNNaN
780.03.0palsson, master. gosta leonardmale2.0000003.01.034990921.0750NaNSNaNNaNNaN
891.03.0johnson, mrs. oscar w (elisabeth vilhelmina berg)female27.0000000.02.034774211.1333NaNS0.0xiaoming10.0
9101.02.0nasser, mrs. nicholas (adele achem)female14.0000001.00.023773630.0708NaNCNaNNaNNaN
10111.03.0sandstrom, miss. marguerite rutfemale4.0000001.01.0PP 954916.7000G6SNaNNaNNaN
11121.01.0bonnell, miss. elizabethfemale58.0000000.00.011378326.5500C103SNaNNaNNaN
12130.03.0saundercock, mr. william henrymale20.0000000.00.0A/5. 21518.0500NaNSNaNNaNNaN
13140.03.0andersson, mr. anders johanmale39.0000001.05.034708231.2750NaNSNaNNaNNaN
14150.03.0vestrom, miss. hulda amanda adolfinafemale14.0000000.00.03504067.8542NaNSNaNNaNNaN
15161.02.0hewlett, mrs. (mary d kingcome)female55.0000000.00.024870616.0000NaNSNaNNaNNaN
16170.03.0rice, master. eugenemale2.0000004.01.038265229.1250NaNQNaNNaNNaN
17181.02.0williams, mr. charles eugenemale29.6991180.00.024437313.0000NaNSNaNNaNNaN
18190.03.0vander planke, mrs. julius (emelia maria vande...female31.0000001.00.034576318.0000NaNS1.0xiaohong22.0
19201.03.0masselmani, mrs. fatimafemale29.6991180.00.026497.2250NaNCNaNNaNNaN
data_outer.tail(20)#打印后20条看
PassengerIdSurvivedPclassName_xSexAgeSibSpParchTicketFareCabinEmbarkedSurName_ytime
8728730.01.0carlsson, mr. frans olofmale33.0000000.00.06955.0000B51 B53 B55SNaNNaNNaN
8738740.03.0vander cruyssen, mr. victormale47.0000000.00.03457659.0000NaNSNaNNaNNaN
8748751.02.0abelson, mrs. samuel (hannah wizosky)female28.0000001.00.0P/PP 338124.0000NaNCNaNNaNNaN
8758761.03.0najib, miss. adele kiamie "jane"female15.0000000.00.026677.2250NaNCNaNNaNNaN
8768770.03.0gustafsson, mr. alfred ossianmale20.0000000.00.075349.8458NaNSNaNNaNNaN
8778780.03.0petroff, mr. nedeliomale19.0000000.00.03492127.8958NaNSNaNNaNNaN
8788790.03.0laleff, mr. kristomale29.6991180.00.03492177.8958NaNSNaNNaNNaN
8798801.01.0potter, mrs. thomas jr (lily alexenia wilson)female56.0000000.01.01176783.1583C50CNaNNaNNaN
8808811.02.0shelley, mrs. william (imanita parrish hall)female25.0000000.01.023043326.0000NaNSNaNNaNNaN
8818820.03.0markun, mr. johannmale33.0000000.00.03492577.8958NaNSNaNNaNNaN
8828830.03.0dahlberg, miss. gerda ulrikafemale22.0000000.00.0755210.5167NaNSNaNNaNNaN
8838840.02.0banfield, mr. frederick jamesmale28.0000000.00.0C.A./SOTON 3406810.5000NaNSNaNNaNNaN
8848850.03.0sutehall, mr. henry jrmale25.0000000.00.0SOTON/OQ 3920767.0500NaNSNaNNaNNaN
8858860.03.0rice, mrs. william (margaret norton)female39.0000000.05.038265229.1250NaNQNaNNaNNaN
8868870.02.0montvila, rev. juozasmale27.0000000.00.021153613.0000NaNSNaNNaNNaN
8878881.01.0graham, miss. margaret edithfemale19.0000000.00.011205330.0000B42SNaNNaNNaN
8888890.03.0johnston, miss. catherine helen "carrie"female29.6991181.02.0W./C. 660723.4500NaNSNaNNaNNaN
8898901.01.0behr, mr. karl howellmale26.0000000.00.011136930.0000C148CNaNNaNNaN
8908910.03.0dooley, mr. patrickmale32.0000000.00.03703767.7500NaNQNaNNaNNaN
89139999NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0xiaozhao40.0
#追加数据
data2 = pd.DataFrame({'id':[1,2,3],
                     'Sex':['famle','male','male']})
result = data1.append(data2)
result
NamePassengerIdSexSuridtime
0xiaoming9.0NaN0.0NaN10.0
1xiaohong19.0NaN1.0NaN22.0
2xiaozhang29.0NaN0.0NaN30.0
3xiaozhao39999.0NaN1.0NaN40.0
0NaNNaNfamleNaN1.0NaN
1NaNNaNmaleNaN2.0NaN
2NaNNaNmaleNaN3.0NaN
#设置索引列
data_inner.set_index('time')
PassengerIdSurvivedPclassName_xSexAgeSibSpParchTicketFareCabinEmbarkedSurName_y
time
10.0913johnson, mrs. oscar w (elisabeth vilhelmina berg)female27.0000000234774211.1333NaNS0xiaoming
22.01903vander planke, mrs. julius (emelia maria vande...female31.0000001034576318.0000NaNS1xiaohong
30.02913o'dwyer, miss. ellen "nellie"female29.699118003309597.8792NaNQ0xiaozhang
#还原索引
data_inner.reset_index(drop=True)
PassengerIdSurvivedPclassName_xSexAgeSibSpParchTicketFareCabinEmbarkedSurName_ytime
0913johnson, mrs. oscar w (elisabeth vilhelmina berg)female27.0000000234774211.1333NaNS0xiaoming10.0
11903vander planke, mrs. julius (emelia maria vande...female31.0000001034576318.0000NaNS1xiaohong22.0
22913o'dwyer, miss. ellen "nellie"female29.699118003309597.8792NaNQ0xiaozhang30.0
#按照特定列值排序
data_inner.sort_values(by=['Fare'])
PassengerIdSurvivedPclassName_xSexAgeSibSpParchTicketFareCabinEmbarkedSurName_ytime
22913o'dwyer, miss. ellen "nellie"female29.699118003309597.8792NaNQ0xiaozhang30.0
0913johnson, mrs. oscar w (elisabeth vilhelmina berg)female27.0000000234774211.1333NaNS0xiaoming10.0
11903vander planke, mrs. julius (emelia maria vande...female31.0000001034576318.0000NaNS1xiaohong22.0
#还原回按照索引排序
data_inner.sort_index()
PassengerIdSurvivedPclassName_xSexAgeSibSpParchTicketFareCabinEmbarkedSurName_ytime
0913johnson, mrs. oscar w (elisabeth vilhelmina berg)female27.0000000234774211.1333NaNS0xiaoming10.0
11903vander planke, mrs. julius (emelia maria vande...female31.0000001034576318.0000NaNS1xiaohong22.0
22913o'dwyer, miss. ellen "nellie"female29.699118003309597.8792NaNQ0xiaozhang30.0
#生成一个新列标记某项数据的大小关系
data_inner['level']=np.where(data_inner['Fare']>10.0,'good','oh')
data_inner
PassengerIdSurvivedPclassName_xSexAgeSibSpParchTicketFareCabinEmbarkedSurName_ytimelevel
0913johnson, mrs. oscar w (elisabeth vilhelmina berg)female27.0000000234774211.1333NaNS0xiaoming10.0good
11903vander planke, mrs. julius (emelia maria vande...female31.0000001034576318.0000NaNS1xiaohong22.0good
22913o'dwyer, miss. ellen "nellie"female29.699118003309597.8792NaNQ0xiaozhang30.0oh
#生成一个新列标记符合多个条件的数据
data.loc[(data['Sex']=='male') & (data['Age']<=10.0),'tip']=1
data.head(50)
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarkedtip
0103braund, mr. owen harrismale22.00000010A/5 211717.2500NaNSNaN
1211cumings, mrs. john bradley (florence briggs th...female38.00000010PC 1759971.2833C85CNaN
2313heikkinen, miss. lainafemale26.00000000STON/O2. 31012827.9250NaNSNaN
3411futrelle, mrs. jacques heath (lily may peel)female35.0000001011380353.1000C123SNaN
4503allen, mr. william henrymale35.000000003734508.0500NaNSNaN
5603moran, mr. jamesmale29.699118003308778.4583NaNQNaN
6701mccarthy, mr. timothy jmale54.000000001746351.8625E46SNaN
7803palsson, master. gosta leonardmale2.0000003134990921.0750NaNS1.0
8913johnson, mrs. oscar w (elisabeth vilhelmina berg)female27.0000000234774211.1333NaNSNaN
91012nasser, mrs. nicholas (adele achem)female14.0000001023773630.0708NaNCNaN
101113sandstrom, miss. marguerite rutfemale4.00000011PP 954916.7000G6SNaN
111211bonnell, miss. elizabethfemale58.0000000011378326.5500C103SNaN
121303saundercock, mr. william henrymale20.00000000A/5. 21518.0500NaNSNaN
131403andersson, mr. anders johanmale39.0000001534708231.2750NaNSNaN
141503vestrom, miss. hulda amanda adolfinafemale14.000000003504067.8542NaNSNaN
151612hewlett, mrs. (mary d kingcome)female55.0000000024870616.0000NaNSNaN
161703rice, master. eugenemale2.0000004138265229.1250NaNQ1.0
171812williams, mr. charles eugenemale29.6991180024437313.0000NaNSNaN
181903vander planke, mrs. julius (emelia maria vande...female31.0000001034576318.0000NaNSNaN
192013masselmani, mrs. fatimafemale29.6991180026497.2250NaNCNaN
202102fynney, mr. joseph jmale35.0000000023986526.0000NaNSNaN
212212beesley, mr. lawrencemale34.0000000024869813.0000D56SNaN
222313mcgowan, miss. anna "annie"female15.000000003309238.0292NaNQNaN
232411sloper, mr. william thompsonmale28.0000000011378835.5000A6SNaN
242503palsson, miss. torborg danirafemale8.0000003134990921.0750NaNSNaN
252613asplund, mrs. carl oscar (selma augusta emilia...female38.0000001534707731.3875NaNSNaN
262703emir, mr. farred chehabmale29.6991180026317.2250NaNCNaN
272801fortune, mr. charles alexandermale19.0000003219950263.0000C23 C25 C27SNaN
282913o'dwyer, miss. ellen "nellie"female29.699118003309597.8792NaNQNaN
293003todoroff, mr. laliomale29.699118003492167.8958NaNSNaN
303101uruchurtu, don. manuel emale40.00000000PC 1760127.7208NaNCNaN
313211spencer, mrs. william augustus (marie eugenie)female29.69911810PC 17569146.5208B78CNaN
323313glynn, miss. mary agathafemale29.699118003356777.7500NaNQNaN
333402wheadon, mr. edward hmale66.00000000C.A. 2457910.5000NaNSNaN
343501meyer, mr. edgar josephmale28.00000010PC 1760482.1708NaNCNaN
353601holverson, mr. alexander oskarmale42.0000001011378952.0000NaNSNaN
363713mamee, mr. hannamale29.6991180026777.2292NaNCNaN
373803cann, mr. ernest charlesmale21.00000000A./5. 21528.0500NaNSNaN
383903vander planke, miss. augusta mariafemale18.0000002034576418.0000NaNSNaN
394013nicola-yarred, miss. jamilafemale14.00000010265111.2417NaNCNaN
404103ahlin, mrs. johan (johanna persdotter larsson)female40.0000001075469.4750NaNSNaN
414202turpin, mrs. william john robert (dorothy ann ...female27.000000101166821.0000NaNSNaN
424303kraeff, mr. theodormale29.699118003492537.8958NaNCNaN
434412laroche, miss. simonne marie anne andreefemale3.00000012SC/Paris 212341.5792NaNCNaN
444513devaney, miss. margaret deliafemale19.000000003309587.8792NaNQNaN
454603rogers, mr. william johnmale29.69911800S.C./A.4. 235678.0500NaNSNaN
464703lennon, mr. denismale29.6991181037037115.5000NaNQNaN
474813o'driscoll, miss. bridgetfemale29.69911800143117.7500NaNQNaN
484903samaan, mr. youssefmale29.69911820266221.6792NaNCNaN
495003arnold-franchi, mrs. josef (josefine franchi)female18.0000001034923717.8000NaNSNaN
#对某列数据进行再拆分
#对泰坦尼克号Name列拆分,拆为姓和名
data_namesplit = pd.DataFrame((x.split('.') for x in data['Name']),index=data.index,columns=['名','姓','s'])
data_namesplit
s
0braund, mrowen harrisNone
1cumings, mrsjohn bradley (florence briggs thayer)None
2heikkinen, misslainaNone
3futrelle, mrsjacques heath (lily may peel)None
4allen, mrwilliam henryNone
5moran, mrjamesNone
6mccarthy, mrtimothy jNone
7palsson, mastergosta leonardNone
8johnson, mrsoscar w (elisabeth vilhelmina berg)None
9nasser, mrsnicholas (adele achem)None
10sandstrom, missmarguerite rutNone
11bonnell, misselizabethNone
12saundercock, mrwilliam henryNone
13andersson, mranders johanNone
14vestrom, misshulda amanda adolfinaNone
15hewlett, mrs(mary d kingcome)None
16rice, mastereugeneNone
17williams, mrcharles eugeneNone
18vander planke, mrsjulius (emelia maria vandemoortele)None
19masselmani, mrsfatimaNone
20fynney, mrjoseph jNone
21beesley, mrlawrenceNone
22mcgowan, missanna "annie"None
23sloper, mrwilliam thompsonNone
24palsson, misstorborg daniraNone
25asplund, mrscarl oscar (selma augusta emilia johansson)None
26emir, mrfarred chehabNone
27fortune, mrcharles alexanderNone
28o'dwyer, missellen "nellie"None
29todoroff, mrlalioNone
............
861giles, mrfrederick edwardNone
862swift, mrsfrederick joel (margaret welles barron)None
863sage, missdorothy edith "dolly"None
864gill, mrjohn williamNone
865bystrom, mrs(karolina)None
866duran y more, missasuncionNone
867roebling, mrwashington augustus iiNone
868van melkebeke, mrphilemonNone
869johnson, masterharold theodorNone
870balkic, mrcerinNone
871beckwith, mrsrichard leonard (sallie monypeny)None
872carlsson, mrfrans olofNone
873vander cruyssen, mrvictorNone
874abelson, mrssamuel (hannah wizosky)None
875najib, missadele kiamie "jane"None
876gustafsson, mralfred ossianNone
877petroff, mrnedelioNone
878laleff, mrkristoNone
879potter, mrsthomas jr (lily alexenia wilson)None
880shelley, mrswilliam (imanita parrish hall)None
881markun, mrjohannNone
882dahlberg, missgerda ulrikaNone
883banfield, mrfrederick jamesNone
884sutehall, mrhenry jrNone
885rice, mrswilliam (margaret norton)None
886montvila, revjuozasNone
887graham, missmargaret edithNone
888johnston, misscatherine helen "carrie"None
889behr, mrkarl howellNone
890dooley, mrpatrickNone

891 rows × 3 columns

#把新分出来的表合并进原表axis = 1按行合并
data = pd.concat([data_namesplit,data],axis = 1)
data
sPassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarkedtip
0braund, mrowen harrisNone103braund, mr. owen harrismale22.00000010A/5 211717.2500NaNSNaN
1cumings, mrsjohn bradley (florence briggs thayer)None211cumings, mrs. john bradley (florence briggs th...female38.00000010PC 1759971.2833C85CNaN
2heikkinen, misslainaNone313heikkinen, miss. lainafemale26.00000000STON/O2. 31012827.9250NaNSNaN
3futrelle, mrsjacques heath (lily may peel)None411futrelle, mrs. jacques heath (lily may peel)female35.0000001011380353.1000C123SNaN
4allen, mrwilliam henryNone503allen, mr. william henrymale35.000000003734508.0500NaNSNaN
5moran, mrjamesNone603moran, mr. jamesmale29.699118003308778.4583NaNQNaN
6mccarthy, mrtimothy jNone701mccarthy, mr. timothy jmale54.000000001746351.8625E46SNaN
7palsson, mastergosta leonardNone803palsson, master. gosta leonardmale2.0000003134990921.0750NaNS1.0
8johnson, mrsoscar w (elisabeth vilhelmina berg)None913johnson, mrs. oscar w (elisabeth vilhelmina berg)female27.0000000234774211.1333NaNSNaN
9nasser, mrsnicholas (adele achem)None1012nasser, mrs. nicholas (adele achem)female14.0000001023773630.0708NaNCNaN
10sandstrom, missmarguerite rutNone1113sandstrom, miss. marguerite rutfemale4.00000011PP 954916.7000G6SNaN
11bonnell, misselizabethNone1211bonnell, miss. elizabethfemale58.0000000011378326.5500C103SNaN
12saundercock, mrwilliam henryNone1303saundercock, mr. william henrymale20.00000000A/5. 21518.0500NaNSNaN
13andersson, mranders johanNone1403andersson, mr. anders johanmale39.0000001534708231.2750NaNSNaN
14vestrom, misshulda amanda adolfinaNone1503vestrom, miss. hulda amanda adolfinafemale14.000000003504067.8542NaNSNaN
15hewlett, mrs(mary d kingcome)None1612hewlett, mrs. (mary d kingcome)female55.0000000024870616.0000NaNSNaN
16rice, mastereugeneNone1703rice, master. eugenemale2.0000004138265229.1250NaNQ1.0
17williams, mrcharles eugeneNone1812williams, mr. charles eugenemale29.6991180024437313.0000NaNSNaN
18vander planke, mrsjulius (emelia maria vandemoortele)None1903vander planke, mrs. julius (emelia maria vande...female31.0000001034576318.0000NaNSNaN
19masselmani, mrsfatimaNone2013masselmani, mrs. fatimafemale29.6991180026497.2250NaNCNaN
20fynney, mrjoseph jNone2102fynney, mr. joseph jmale35.0000000023986526.0000NaNSNaN
21beesley, mrlawrenceNone2212beesley, mr. lawrencemale34.0000000024869813.0000D56SNaN
22mcgowan, missanna "annie"None2313mcgowan, miss. anna "annie"female15.000000003309238.0292NaNQNaN
23sloper, mrwilliam thompsonNone2411sloper, mr. william thompsonmale28.0000000011378835.5000A6SNaN
24palsson, misstorborg daniraNone2503palsson, miss. torborg danirafemale8.0000003134990921.0750NaNSNaN
25asplund, mrscarl oscar (selma augusta emilia johansson)None2613asplund, mrs. carl oscar (selma augusta emilia...female38.0000001534707731.3875NaNSNaN
26emir, mrfarred chehabNone2703emir, mr. farred chehabmale29.6991180026317.2250NaNCNaN
27fortune, mrcharles alexanderNone2801fortune, mr. charles alexandermale19.0000003219950263.0000C23 C25 C27SNaN
28o'dwyer, missellen "nellie"None2913o'dwyer, miss. ellen "nellie"female29.699118003309597.8792NaNQNaN
29todoroff, mrlalioNone3003todoroff, mr. laliomale29.699118003492167.8958NaNSNaN
...................................................
861giles, mrfrederick edwardNone86202giles, mr. frederick edwardmale21.000000102813411.5000NaNSNaN
862swift, mrsfrederick joel (margaret welles barron)None86311swift, mrs. frederick joel (margaret welles ba...female48.000000001746625.9292D17SNaN
863sage, missdorothy edith "dolly"None86403sage, miss. dorothy edith "dolly"female29.69911882CA. 234369.5500NaNSNaN
864gill, mrjohn williamNone86502gill, mr. john williammale24.0000000023386613.0000NaNSNaN
865bystrom, mrs(karolina)None86612bystrom, mrs. (karolina)female42.0000000023685213.0000NaNSNaN
866duran y more, missasuncionNone86712duran y more, miss. asuncionfemale27.00000010SC/PARIS 214913.8583NaNCNaN
867roebling, mrwashington augustus iiNone86801roebling, mr. washington augustus iimale31.00000000PC 1759050.4958A24SNaN
868van melkebeke, mrphilemonNone86903van melkebeke, mr. philemonmale29.699118003457779.5000NaNSNaN
869johnson, masterharold theodorNone87013johnson, master. harold theodormale4.0000001134774211.1333NaNS1.0
870balkic, mrcerinNone87103balkic, mr. cerinmale26.000000003492487.8958NaNSNaN
871beckwith, mrsrichard leonard (sallie monypeny)None87211beckwith, mrs. richard leonard (sallie monypeny)female47.000000111175152.5542D35SNaN
872carlsson, mrfrans olofNone87301carlsson, mr. frans olofmale33.000000006955.0000B51 B53 B55SNaN
873vander cruyssen, mrvictorNone87403vander cruyssen, mr. victormale47.000000003457659.0000NaNSNaN
874abelson, mrssamuel (hannah wizosky)None87512abelson, mrs. samuel (hannah wizosky)female28.00000010P/PP 338124.0000NaNCNaN
875najib, missadele kiamie "jane"None87613najib, miss. adele kiamie "jane"female15.0000000026677.2250NaNCNaN
876gustafsson, mralfred ossianNone87703gustafsson, mr. alfred ossianmale20.0000000075349.8458NaNSNaN
877petroff, mrnedelioNone87803petroff, mr. nedeliomale19.000000003492127.8958NaNSNaN
878laleff, mrkristoNone87903laleff, mr. kristomale29.699118003492177.8958NaNSNaN
879potter, mrsthomas jr (lily alexenia wilson)None88011potter, mrs. thomas jr (lily alexenia wilson)female56.000000011176783.1583C50CNaN
880shelley, mrswilliam (imanita parrish hall)None88112shelley, mrs. william (imanita parrish hall)female25.0000000123043326.0000NaNSNaN
881markun, mrjohannNone88203markun, mr. johannmale33.000000003492577.8958NaNSNaN
882dahlberg, missgerda ulrikaNone88303dahlberg, miss. gerda ulrikafemale22.00000000755210.5167NaNSNaN
883banfield, mrfrederick jamesNone88402banfield, mr. frederick jamesmale28.00000000C.A./SOTON 3406810.5000NaNSNaN
884sutehall, mrhenry jrNone88503sutehall, mr. henry jrmale25.00000000SOTON/OQ 3920767.0500NaNSNaN
885rice, mrswilliam (margaret norton)None88603rice, mrs. william (margaret norton)female39.0000000538265229.1250NaNQNaN
886montvila, revjuozasNone88702montvila, rev. juozasmale27.0000000021153613.0000NaNSNaN
887graham, missmargaret edithNone88811graham, miss. margaret edithfemale19.0000000011205330.0000B42SNaN
888johnston, misscatherine helen "carrie"None88903johnston, miss. catherine helen "carrie"female29.69911812W./C. 660723.4500NaNSNaN
889behr, mrkarl howellNone89011behr, mr. karl howellmale26.0000000011136930.0000C148CNaN
890dooley, mrpatrickNone89103dooley, mr. patrickmale32.000000003703767.7500NaNQNaN

891 rows × 16 columns

5.提取数据

#----------------------------------------------------------数据提取
#按索引行提取单行的值
data.loc[3]
名                                             futrelle, mrs
姓                             jacques heath (lily may peel)
s                                                      None
PassengerId                                               4
Survived                                                  1
Pclass                                                    1
Name           futrelle, mrs. jacques heath (lily may peel)
Sex                                                  female
Age                                                      35
SibSp                                                     1
Parch                                                     0
Ticket                                               113803
Fare                                                   53.1
Cabin                                                  C123
Embarked                                                  S
tip                                                     NaN
Name: 3, dtype: object
#按照索引提取多行的值,左闭右开
data.iloc[0:3]
sPassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarkedtip
0braund, mrowen harrisNone103braund, mr. owen harrismale22.010A/5 211717.2500NaNSNaN
1cumings, mrsjohn bradley (florence briggs thayer)None211cumings, mrs. john bradley (florence briggs th...female38.010PC 1759971.2833C85CNaN
2heikkinen, misslainaNone313heikkinen, miss. lainafemale26.000STON/O2. 31012827.9250NaNSNaN
#按照要求提取行值
data.loc[data['Age']<3]
sPassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarkedtip
7palsson, mastergosta leonardNone803palsson, master. gosta leonardmale2.003134990921.0750NaNS1.0
16rice, mastereugeneNone1703rice, master. eugenemale2.004138265229.1250NaNQ1.0
78caldwell, masteralden gatesNone7912caldwell, master. alden gatesmale0.830224873829.0000NaNS1.0
119andersson, missellis anna mariaNone12003andersson, miss. ellis anna mariafemale2.004234708231.2750NaNSNaN
164panula, mastereino viljamiNone16503panula, master. eino viljamimale1.0041310129539.6875NaNS1.0
172johnson, misseleanor ileenNone17313johnson, miss. eleanor ileenfemale1.001134774211.1333NaNSNaN
183becker, masterrichard fNone18412becker, master. richard fmale1.002123013639.0000F4S1.0
205strom, misstelma matildaNone20603strom, miss. telma matildafemale2.000134705410.4625G6SNaN
297allison, misshelen loraineNone29801allison, miss. helen lorainefemale2.0012113781151.5500C22 C26SNaN
305allison, masterhudson trevorNone30611allison, master. hudson trevormale0.9212113781151.5500C22 C26S1.0
340navratil, masteredmond rogerNone34112navratil, master. edmond rogermale2.001123008026.0000F2S1.0
381nakid, missmaria ("mary")None38213nakid, miss. maria ("mary")female1.0002265315.7417NaNCNaN
386goodwin, mastersidney leonardNone38703goodwin, master. sidney leonardmale1.0052CA 214446.9000NaNS1.0
469baclini, misshelene barbaraNone47013baclini, miss. helene barbarafemale0.7521266619.2583NaNCNaN
479hirvonen, misshildur eNone48013hirvonen, miss. hildur efemale2.0001310129812.2875NaNSNaN
530quick, missphyllis mayNone53112quick, miss. phyllis mayfemale2.00112636026.0000NaNSNaN
642skoog, missmargit elizabethNone64303skoog, miss. margit elizabethfemale2.003234708827.9000NaNSNaN
644baclini, misseugenieNone64513baclini, miss. eugeniefemale0.7521266619.2583NaNCNaN
755hamalainen, masterviljoNone75612hamalainen, master. viljomale0.671125064914.5000NaNS1.0
788dean, masterbertram vereNone78913dean, master. bertram veremale1.0012C.A. 231520.5750NaNS1.0
803thomas, masterassad alexanderNone80413thomas, master. assad alexandermale0.420126258.5167NaNC1.0
824panula, masterurho abrahamNone82503panula, master. urho abrahammale2.0041310129539.6875NaNS1.0
827mallet, masterandreNone82812mallet, master. andremale1.0002S.C./PARIS 207937.0042NaNC1.0
831richards, mastergeorge sibleyNone83212richards, master. george sibleymale0.83112910618.7500NaNS1.0
#按位置提取数据
data.iloc[:3,:4]
sPassengerId
0braund, mrowen harrisNone1
1cumings, mrsjohn bradley (florence briggs thayer)None2
2heikkinen, misslainaNone3
data.iloc[1:3,3:4]
PassengerId
12
23
#判断某列值是否为某值
data['SibSp'].isin([1,2])
0       True
1       True
2      False
3       True
4      False
5      False
6      False
7      False
8      False
9       True
10      True
11     False
12     False
13      True
14     False
15     False
16     False
17     False
18      True
19     False
20     False
21     False
22     False
23     False
24     False
25      True
26     False
27     False
28     False
29     False
       ...  
861     True
862    False
863    False
864    False
865    False
866     True
867    False
868    False
869     True
870    False
871     True
872    False
873    False
874     True
875    False
876    False
877    False
878    False
879    False
880    False
881    False
882    False
883    False
884    False
885    False
886    False
887    False
888     True
889    False
890    False
Name: SibSp, Length: 891, dtype: bool
#提取符合条件的行
data.loc[data['SibSp'].isin([1,2])]
sPassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarkedtip
0braund, mrowen harrisNone103braund, mr. owen harrismale22.00000010A/5 211717.2500NaNSNaN
1cumings, mrsjohn bradley (florence briggs thayer)None211cumings, mrs. john bradley (florence briggs th...female38.00000010PC 1759971.2833C85CNaN
3futrelle, mrsjacques heath (lily may peel)None411futrelle, mrs. jacques heath (lily may peel)female35.0000001011380353.1000C123SNaN
9nasser, mrsnicholas (adele achem)None1012nasser, mrs. nicholas (adele achem)female14.0000001023773630.0708NaNCNaN
10sandstrom, missmarguerite rutNone1113sandstrom, miss. marguerite rutfemale4.00000011PP 954916.7000G6SNaN
13andersson, mranders johanNone1403andersson, mr. anders johanmale39.0000001534708231.2750NaNSNaN
18vander planke, mrsjulius (emelia maria vandemoortele)None1903vander planke, mrs. julius (emelia maria vande...female31.0000001034576318.0000NaNSNaN
25asplund, mrscarl oscar (selma augusta emilia johansson)None2613asplund, mrs. carl oscar (selma augusta emilia...female38.0000001534707731.3875NaNSNaN
31spencer, mrswilliam augustus (marie eugenie)None3211spencer, mrs. william augustus (marie eugenie)female29.69911810PC 17569146.5208B78CNaN
34meyer, mredgar josephNone3501meyer, mr. edgar josephmale28.00000010PC 1760482.1708NaNCNaN
35holverson, mralexander oskarNone3601holverson, mr. alexander oskarmale42.0000001011378952.0000NaNSNaN
38vander planke, missaugusta mariaNone3903vander planke, miss. augusta mariafemale18.0000002034576418.0000NaNSNaN
39nicola-yarred, missjamilaNone4013nicola-yarred, miss. jamilafemale14.00000010265111.2417NaNCNaN
40ahlin, mrsjohan (johanna persdotter larsson)None4103ahlin, mrs. johan (johanna persdotter larsson)female40.0000001075469.4750NaNSNaN
41turpin, mrswilliam john robert (dorothy ann wonnacott)None4202turpin, mrs. william john robert (dorothy ann ...female27.000000101166821.0000NaNSNaN
43laroche, misssimonne marie anne andreeNone4412laroche, miss. simonne marie anne andreefemale3.00000012SC/Paris 212341.5792NaNCNaN
46lennon, mrdenisNone4703lennon, mr. denismale29.6991181037037115.5000NaNQNaN
48samaan, mryoussefNone4903samaan, mr. youssefmale29.69911820266221.6792NaNCNaN
49arnold-franchi, mrsjosef (josefine franchi)None5003arnold-franchi, mrs. josef (josefine franchi)female18.0000001034923717.8000NaNSNaN
52harper, mrshenry sleeper (myna haxtun)None5311harper, mrs. henry sleeper (myna haxtun)female49.00000010PC 1757276.7292D33CNaN
53faunthorpe, mrslizzie (elizabeth anne wilkinson)None5412faunthorpe, mrs. lizzie (elizabeth anne wilkin...female29.00000010292626.0000NaNSNaN
58west, missconstance miriumNone5912west, miss. constance miriumfemale5.00000012C.A. 3465127.7500NaNSNaN
62harris, mrhenry birkhardtNone6301harris, mr. henry birkhardtmale45.000000103697383.4750C83SNaN
65moubarek, mastergeriosNone6613moubarek, master. geriosmale29.69911811266115.2458NaNCNaN
69kink, mrvincenzNone7003kink, mr. vincenzmale26.000000203151518.6625NaNSNaN
73chronopoulos, mrapostolosNone7403chronopoulos, mr. apostolosmale26.00000010268014.4542NaNCNaN
86ford, mrwilliam nealNone8703ford, mr. william nealmale16.00000013W./C. 660834.3750NaNSNaN
92chaffee, mrherbert fullerNone9301chaffee, mr. herbert fullermale46.00000010W.E.P. 573461.1750E31SNaN
93dean, mrbertram frankNone9403dean, mr. bertram frankmale26.00000012C.A. 231520.5750NaNSNaN
99kantor, mrsinaiNone10002kantor, mr. sinaimale34.0000001024436726.0000NaNSNaN
...................................................
750wells, missjoanNone75112wells, miss. joanfemale4.000000112910323.0000NaNSNaN
754herman, mrssamuel (jane laver)None75512herman, mrs. samuel (jane laver)female48.0000001222084565.0000NaNSNaN
755hamalainen, masterviljoNone75612hamalainen, master. viljomale0.6700001125064914.5000NaNS1.0
763carter, mrswilliam ernest (lucile polk)None76411carter, mrs. william ernest (lucile polk)female36.00000012113760120.0000B96 B98SNaN
765hogeboom, mrsjohn c (anna andrews)None76611hogeboom, mrs. john c (anna andrews)female51.000000101350277.9583D11SNaN
768moran, mrdaniel jNone76903moran, mr. daniel jmale29.6991181037111024.1500NaNQNaN
774hocking, mrselizabeth (eliza needs)None77512hocking, mrs. elizabeth (eliza needs)female54.000000132910523.0000NaNSNaN
781dick, mrsalbert adrian (vera gillespie)None78211dick, mrs. albert adrian (vera gillespie)female17.000000101747457.0000B20SNaN
783johnston, mrandrew gNone78403johnston, mr. andrew gmale29.69911812W./C. 660723.4500NaNSNaN
788dean, masterbertram vereNone78913dean, master. bertram veremale1.00000012C.A. 231520.5750NaNS1.0
799van impe, mrsjean baptiste (rosalie paula govaert)None80003van impe, mrs. jean baptiste (rosalie paula go...female30.0000001134577324.1500NaNSNaN
801collyer, mrsharvey (charlotte annie tate)None80212collyer, mrs. harvey (charlotte annie tate)female31.00000011C.A. 3192126.2500NaNSNaN
802carter, masterwilliam thornton iiNone80311carter, master. william thornton iimale11.00000012113760120.0000B96 B98SNaN
809chambers, mrsnorman campbell (bertha griggs)None81011chambers, mrs. norman campbell (bertha griggs)female33.0000001011380653.1000E8SNaN
817mallet, mralbertNone81802mallet, mr. albertmale31.00000011S.C./PARIS 207937.0042NaNCNaN
820hays, mrscharles melville (clara jennings gregg)None82111hays, mrs. charles melville (clara jennings gr...female52.000000111274993.5000B69SNaN
830yasbeck, mrsantoni (selini alexander)None83113yasbeck, mrs. antoni (selini alexander)female15.00000010265914.4542NaNCNaN
831richards, mastergeorge sibleyNone83212richards, master. george sibleymale0.830000112910618.7500NaNS1.0
835compton, misssara rebeccaNone83611compton, miss. sara rebeccafemale39.00000011PC 1775683.1583E49CNaN
849goldenberg, mrssamuel l (edwiga grabowska)None85011goldenberg, mrs. samuel l (edwiga grabowska)female29.699118101745389.1042C92CNaN
852boulos, missnourelainNone85303boulos, miss. nourelainfemale9.00000011267815.2458NaNCNaN
854carter, mrsernest courtenay (lilian hughes)None85502carter, mrs. ernest courtenay (lilian hughes)female44.0000001024425226.0000NaNSNaN
856wick, mrsgeorge dennick (mary hitchcock)None85711wick, mrs. george dennick (mary hitchcock)female45.0000001136928164.8667NaNSNaN
860hansen, mrclaus peterNone86103hansen, mr. claus petermale41.0000002035002614.1083NaNSNaN
861giles, mrfrederick edwardNone86202giles, mr. frederick edwardmale21.000000102813411.5000NaNSNaN
866duran y more, missasuncionNone86712duran y more, miss. asuncionfemale27.00000010SC/PARIS 214913.8583NaNCNaN
869johnson, masterharold theodorNone87013johnson, master. harold theodormale4.0000001134774211.1333NaNS1.0
871beckwith, mrsrichard leonard (sallie monypeny)None87211beckwith, mrs. richard leonard (sallie monypeny)female47.000000111175152.5542D35SNaN
874abelson, mrssamuel (hannah wizosky)None87512abelson, mrs. samuel (hannah wizosky)female28.00000010P/PP 338124.0000NaNCNaN
888johnston, misscatherine helen "carrie"None88903johnston, miss. catherine helen "carrie"female29.69911812W./C. 660723.4500NaNSNaN

237 rows × 16 columns

6.筛选数据

#--------------------------------------------------------数据筛选
#使用'与'进行筛选
data.loc[(data['Age']<5) & (data['Sex']=='male')]
sPassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarkedtip
7palsson, mastergosta leonardNone803palsson, master. gosta leonardmale2.003134990921.0750NaNS1.0
16rice, mastereugeneNone1703rice, master. eugenemale2.004138265229.1250NaNQ1.0
63skoog, masterharaldNone6403skoog, master. haraldmale4.003234708827.9000NaNS1.0
78caldwell, masteralden gatesNone7912caldwell, master. alden gatesmale0.830224873829.0000NaNS1.0
164panula, mastereino viljamiNone16503panula, master. eino viljamimale1.0041310129539.6875NaNS1.0
171rice, masterarthurNone17203rice, master. arthurmale4.004138265229.1250NaNQ1.0
183becker, masterrichard fNone18412becker, master. richard fmale1.002123013639.0000F4S1.0
193navratil, mastermichel mNone19412navratil, master. michel mmale3.001123008026.0000F2S1.0
261asplund, masteredvin rojj felixNone26213asplund, master. edvin rojj felixmale3.004234707731.3875NaNS1.0
305allison, masterhudson trevorNone30611allison, master. hudson trevormale0.9212113781151.5500C22 C26S1.0
340navratil, masteredmond rogerNone34112navratil, master. edmond rogermale2.001123008026.0000F2S1.0
348coutts, masterwilliam loch "william"None34913coutts, master. william loch "william"male3.0011C.A. 3767115.9000NaNS1.0
386goodwin, mastersidney leonardNone38703goodwin, master. sidney leonardmale1.0052CA 214446.9000NaNS1.0
407richards, masterwilliam roweNone40812richards, master. william rowemale3.00112910618.7500NaNS1.0
445dodge, masterwashingtonNone44611dodge, master. washingtonmale4.00023363881.8583A34S1.0
755hamalainen, masterviljoNone75612hamalainen, master. viljomale0.671125064914.5000NaNS1.0
788dean, masterbertram vereNone78913dean, master. bertram veremale1.0012C.A. 231520.5750NaNS1.0
803thomas, masterassad alexanderNone80413thomas, master. assad alexandermale0.420126258.5167NaNC1.0
824panula, masterurho abrahamNone82503panula, master. urho abrahammale2.0041310129539.6875NaNS1.0
827mallet, masterandreNone82812mallet, master. andremale1.0002S.C./PARIS 207937.0042NaNC1.0
831richards, mastergeorge sibleyNone83212richards, master. george sibleymale0.83112910618.7500NaNS1.0
850andersson, mastersigvard harald eliasNone85103andersson, master. sigvard harald eliasmale4.004234708231.2750NaNS1.0
869johnson, masterharold theodorNone87013johnson, master. harold theodormale4.001134774211.1333NaNS1.0
#使用'或'进行筛选
data.loc[(data['Age']==5) | (data['Age']==2)]
sPassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarkedtip
7palsson, mastergosta leonardNone803palsson, master. gosta leonardmale2.03134990921.0750NaNS1.0
16rice, mastereugeneNone1703rice, master. eugenemale2.04138265229.1250NaNQ1.0
58west, missconstance miriumNone5912west, miss. constance miriumfemale5.012C.A. 3465127.7500NaNSNaN
119andersson, missellis anna mariaNone12003andersson, miss. ellis anna mariafemale2.04234708231.2750NaNSNaN
205strom, misstelma matildaNone20603strom, miss. telma matildafemale2.00134705410.4625G6SNaN
233asplund, misslillian gertrudNone23413asplund, miss. lillian gertrudfemale5.04234707731.3875NaNSNaN
297allison, misshelen loraineNone29801allison, miss. helen lorainefemale2.012113781151.5500C22 C26SNaN
340navratil, masteredmond rogerNone34112navratil, master. edmond rogermale2.01123008026.0000F2S1.0
448baclini, missmarie catherineNone44913baclini, miss. marie catherinefemale5.021266619.2583NaNCNaN
479hirvonen, misshildur eNone48013hirvonen, miss. hildur efemale2.001310129812.2875NaNSNaN
530quick, missphyllis mayNone53112quick, miss. phyllis mayfemale2.0112636026.0000NaNSNaN
642skoog, missmargit elizabethNone64303skoog, miss. margit elizabethfemale2.03234708827.9000NaNSNaN
777emanuel, missvirginia ethelNone77813emanuel, miss. virginia ethelfemale5.00036451612.4750NaNSNaN
824panula, masterurho abrahamNone82503panula, master. urho abrahammale2.041310129539.6875NaNS1.0
#对筛选后的数据按某列计数
data.loc[(data['Age']==5) | (data['Age']==2)].Parch.count()  #计算年龄为5或者1的乘客中船舱等级为1的个数
14
#使用query函数进行筛选
data.query('Age==[5,2]')
sPassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarkedtip
7palsson, mastergosta leonardNone803palsson, master. gosta leonardmale2.03134990921.0750NaNS1.0
16rice, mastereugeneNone1703rice, master. eugenemale2.04138265229.1250NaNQ1.0
58west, missconstance miriumNone5912west, miss. constance miriumfemale5.012C.A. 3465127.7500NaNSNaN
119andersson, missellis anna mariaNone12003andersson, miss. ellis anna mariafemale2.04234708231.2750NaNSNaN
205strom, misstelma matildaNone20603strom, miss. telma matildafemale2.00134705410.4625G6SNaN
233asplund, misslillian gertrudNone23413asplund, miss. lillian gertrudfemale5.04234707731.3875NaNSNaN
297allison, misshelen loraineNone29801allison, miss. helen lorainefemale2.012113781151.5500C22 C26SNaN
340navratil, masteredmond rogerNone34112navratil, master. edmond rogermale2.01123008026.0000F2S1.0
448baclini, missmarie catherineNone44913baclini, miss. marie catherinefemale5.021266619.2583NaNCNaN
479hirvonen, misshildur eNone48013hirvonen, miss. hildur efemale2.001310129812.2875NaNSNaN
530quick, missphyllis mayNone53112quick, miss. phyllis mayfemale2.0112636026.0000NaNSNaN
642skoog, missmargit elizabethNone64303skoog, miss. margit elizabethfemale2.03234708827.9000NaNSNaN
777emanuel, missvirginia ethelNone77813emanuel, miss. virginia ethelfemale5.00036451612.4750NaNSNaN
824panula, masterurho abrahamNone82503panula, master. urho abrahammale2.041310129539.6875NaNS1.0
#对筛选后的数据求和
data.query('Age==[5,2]').SibSp.sum()
28

7.汇总数据

#-------------------------------------------------------------数据汇总
#对某列的不同类进行分组计数汇总
data.groupby('Pclass').count()
sPassengerIdSurvivedNameSexAgeSibSpParchTicketFareCabinEmbarkedtip
Pclass
121621612162162162162162162162162161762142
21841840184184184184184184184184184161849
349149104914914914914914914914914911249122
#对某列的不同类进行分组,只计数某一列
data.groupby('Pclass')['Sex'].count()
Pclass
1    216
2    184
3    491
Name: Sex, dtype: int64
#对两个字段进行汇总
data.groupby(['Pclass','Sex'])['PassengerId'].count()
Pclass  Sex   
1       female     94
        male      122
2       female     76
        male      108
3       female    144
        male      347
Name: PassengerId, dtype: int64
#对某列数据进行汇总,并分析其组内的各值(agg是聚合)
data.groupby(['Pclass','Sex'])['Age'].agg([len,np.sum,np.mean])
lensummean
PclassSex
1female94.03209.29205934.141405
male122.04793.10147139.287717
2female76.02184.89823528.748661
male108.03310.62205930.653908
3female144.03465.86294124.068493
male347.09498.13705927.372153

8.统计数据

#--------------------------------------------------------数据统计
#数据采样,随机抽取数据样本
data.sample(n=10)
sPassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarkedtip
70jenkin, mrstephen curnowNone7102jenkin, mr. stephen curnowmale32.00000000C.A. 3311110.5000NaNSNaN
92chaffee, mrherbert fullerNone9301chaffee, mr. herbert fullermale46.00000010W.E.P. 573461.1750E31SNaN
700astor, mrsjohn jacob (madeleine talmadge force)None70111astor, mrs. john jacob (madeleine talmadge force)female18.00000010PC 17757227.5250C62 C64CNaN
626kirkland, revcharles leonardNone62702kirkland, rev. charles leonardmale57.0000000021953312.3500NaNQNaN
722gillespie, mrwilliam henryNone72302gillespie, mr. william henrymale34.000000001223313.0000NaNSNaN
371wiklund, mrjakob alfredNone37203wiklund, mr. jakob alfredmale18.0000001031012676.4958NaNSNaN
248beckwith, mrrichard leonardNone24911beckwith, mr. richard leonardmale37.000000111175152.5542D35SNaN
795otter, mrrichardNone79602otter, mr. richardmale39.000000002821313.0000NaNSNaN
567palsson, mrsnils (alma cornelia berglund)None56803palsson, mrs. nils (alma cornelia berglund)female29.0000000434990921.0750NaNSNaN
837sirota, mrmauriceNone83803sirota, mr. mauricemale29.699118003920928.0500NaNSNaN
#采样后不放回
data.sample(n=10,replace=False)
sPassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarkedtip
345brown, missamelia "mildred"None34612brown, miss. amelia "mildred"female24.0000000024873313.0000F33SNaN
304williams, mrhoward hugh "harry"None30503williams, mr. howard hugh "harry"male29.69911800A/5 24668.0500NaNSNaN
697mullens, misskatherine "katie"None69813mullens, miss. katherine "katie"female29.69911800358527.7333NaNQNaN
493artagaveytia, mrramonNone49401artagaveytia, mr. ramonmale71.00000000PC 1760949.5042NaNCNaN
226mellors, mrwilliam johnNone22712mellors, mr. william johnmale19.00000000SW/PP 75110.5000NaNSNaN
506quick, mrsfrederick charles (jane richards)None50712quick, mrs. frederick charles (jane richards)female33.000000022636026.0000NaNSNaN
806andrews, mrthomas jrNone80701andrews, mr. thomas jrmale39.000000001120500.0000A36SNaN
335denkoff, mrmittoNone33603denkoff, mr. mittomale29.699118003492257.8958NaNSNaN
560morrow, mrthomas rowanNone56103morrow, mr. thomas rowanmale29.699118003726227.7500NaNQNaN
492molson, mrharry marklandNone49301molson, mr. harry marklandmale55.0000000011378730.5000C30SNaN
#采样后放回
data.sample(n=10,replace=True)  #这里数据太多,看不出区别,放回和不放回的区别在于放回会取到重复值,不放回不会
sPassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarkedtip
703gallagher, mrmartinNone70403gallagher, mr. martinmale25.00000000368647.7417NaNQNaN
398pain, dralfredNone39902pain, dr. alfredmale23.0000000024427810.5000NaNSNaN
721jensen, mrsvend lauritzNone72203jensen, mr. svend lauritzmale17.000000103500487.0542NaNSNaN
708cleaver, missaliceNone70911cleaver, miss. alicefemale22.00000000113781151.5500NaNSNaN
805johansson, mrkarl johanNone80603johansson, mr. karl johanmale31.000000003470637.7750NaNSNaN
262taussig, mremilNone26301taussig, mr. emilmale52.0000001111041379.6500E67SNaN
52harper, mrshenry sleeper (myna haxtun)None5311harper, mrs. henry sleeper (myna haxtun)female49.00000010PC 1757276.7292D33CNaN
659newell, mrarthur websterNone66001newell, mr. arthur webstermale58.0000000235273113.2750D48CNaN
692lam, mraliNone69313lam, mr. alimale29.69911800160156.4958NaNSNaN
597johnson, mralfredNone59803johnson, mr. alfredmale49.00000000LINE0.0000NaNSNaN
#数据表描述性统计
data.describe().round(2).T  #可以一步一步执行,先获得描述性信息,再四舍五入保留两位小数,再转置
countmeanstdmin25%50%75%max
PassengerId891.0446.00257.351.00223.50446.00668.5891.00
Survived891.00.380.490.000.000.001.01.00
Pclass891.02.310.841.002.003.003.03.00
Age891.029.7013.000.4222.0029.7035.080.00
SibSp891.00.521.100.000.000.001.08.00
Parch891.00.380.810.000.000.000.06.00
Fare891.032.2049.690.007.9114.4531.0512.33
tip33.01.000.001.001.001.001.01.00
#计算列的标准差
data['Age'].std()
13.002015226002884
#计算两个字段的协方差
data['Age'].cov(data['SibSp'])
-3.3353450099140782
#计算所有字段的协方差
data.cov()   #我们能发现只能计算数字
PassengerIdSurvivedPclassAgeSibSpParchFaretip
PassengerId66231.000000-0.626966-7.561798111.113042-16.325843-0.342697161.8833690.0
Survived-0.6269660.236772-0.137703-0.441656-0.0189540.0320176.2217870.0
Pclass-7.561798-0.1377030.699015-3.6018550.0765990.012429-22.8301960.0
Age111.113042-0.441656-3.601855169.052400-3.335345-1.87798759.1622000.0
SibSp-16.325843-0.0189540.076599-3.3353451.2160430.3687398.7487340.0
Parch-0.3426970.0320170.012429-1.8779870.3687390.6497288.6610520.0
Fare161.8833696.221787-22.83019659.1622008.7487348.6610522469.4368460.0
tip0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0
#两个字段的相关性分析
data['Age'].corr(data['Survived'])
-0.06980851528714313
#计算所有字段的相关性
data.corr()
PassengerIdSurvivedPclassAgeSibSpParchFaretip
PassengerId1.000000-0.005007-0.0351440.033207-0.057527-0.0016520.012658NaN
Survived-0.0050071.000000-0.338481-0.069809-0.0353220.0816290.257307NaN
Pclass-0.035144-0.3384811.000000-0.3313390.0830810.018443-0.549500NaN
Age0.033207-0.069809-0.3313391.000000-0.232625-0.1791910.091566NaN
SibSp-0.057527-0.0353220.083081-0.2326251.0000000.4148380.159651NaN
Parch-0.0016520.0816290.018443-0.1791910.4148381.0000000.216225NaN
Fare0.0126580.257307-0.5495000.0915660.1596510.2162251.000000NaN
tipNaNNaNNaNNaNNaNNaNNaNNaN
  • 1
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值