1.导入库
import numpy as np
import pandas as pd
2.查看数据
data = pd. read_csv( 'titanic_data.csv' )
data
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked 0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S 1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C85 C 2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S 3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S 4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S 5 6 0 3 Moran, Mr. James male NaN 0 0 330877 8.4583 NaN Q 6 7 0 1 McCarthy, Mr. Timothy J male 54.0 0 0 17463 51.8625 E46 S 7 8 0 3 Palsson, Master. Gosta Leonard male 2.0 3 1 349909 21.0750 NaN S 8 9 1 3 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.0 0 2 347742 11.1333 NaN S 9 10 1 2 Nasser, Mrs. Nicholas (Adele Achem) female 14.0 1 0 237736 30.0708 NaN C 10 11 1 3 Sandstrom, Miss. Marguerite Rut female 4.0 1 1 PP 9549 16.7000 G6 S 11 12 1 1 Bonnell, Miss. Elizabeth female 58.0 0 0 113783 26.5500 C103 S 12 13 0 3 Saundercock, Mr. William Henry male 20.0 0 0 A/5. 2151 8.0500 NaN S 13 14 0 3 Andersson, Mr. Anders Johan male 39.0 1 5 347082 31.2750 NaN S 14 15 0 3 Vestrom, Miss. Hulda Amanda Adolfina female 14.0 0 0 350406 7.8542 NaN S 15 16 1 2 Hewlett, Mrs. (Mary D Kingcome) female 55.0 0 0 248706 16.0000 NaN S 16 17 0 3 Rice, Master. Eugene male 2.0 4 1 382652 29.1250 NaN Q 17 18 1 2 Williams, Mr. Charles Eugene male NaN 0 0 244373 13.0000 NaN S 18 19 0 3 Vander Planke, Mrs. Julius (Emelia Maria Vande... female 31.0 1 0 345763 18.0000 NaN S 19 20 1 3 Masselmani, Mrs. Fatima female NaN 0 0 2649 7.2250 NaN C 20 21 0 2 Fynney, Mr. Joseph J male 35.0 0 0 239865 26.0000 NaN S 21 22 1 2 Beesley, Mr. Lawrence male 34.0 0 0 248698 13.0000 D56 S 22 23 1 3 McGowan, Miss. Anna "Annie" female 15.0 0 0 330923 8.0292 NaN Q 23 24 1 1 Sloper, Mr. William Thompson male 28.0 0 0 113788 35.5000 A6 S 24 25 0 3 Palsson, Miss. Torborg Danira female 8.0 3 1 349909 21.0750 NaN S 25 26 1 3 Asplund, Mrs. Carl Oscar (Selma Augusta Emilia... female 38.0 1 5 347077 31.3875 NaN S 26 27 0 3 Emir, Mr. Farred Chehab male NaN 0 0 2631 7.2250 NaN C 27 28 0 1 Fortune, Mr. Charles Alexander male 19.0 3 2 19950 263.0000 C23 C25 C27 S 28 29 1 3 O'Dwyer, Miss. Ellen "Nellie" female NaN 0 0 330959 7.8792 NaN Q 29 30 0 3 Todoroff, Mr. Lalio male NaN 0 0 349216 7.8958 NaN S ... ... ... ... ... ... ... ... ... ... ... ... ... 861 862 0 2 Giles, Mr. Frederick Edward male 21.0 1 0 28134 11.5000 NaN S 862 863 1 1 Swift, Mrs. Frederick Joel (Margaret Welles Ba... female 48.0 0 0 17466 25.9292 D17 S 863 864 0 3 Sage, Miss. Dorothy Edith "Dolly" female NaN 8 2 CA. 2343 69.5500 NaN S 864 865 0 2 Gill, Mr. John William male 24.0 0 0 233866 13.0000 NaN S 865 866 1 2 Bystrom, Mrs. (Karolina) female 42.0 0 0 236852 13.0000 NaN S 866 867 1 2 Duran y More, Miss. Asuncion female 27.0 1 0 SC/PARIS 2149 13.8583 NaN C 867 868 0 1 Roebling, Mr. Washington Augustus II male 31.0 0 0 PC 17590 50.4958 A24 S 868 869 0 3 van Melkebeke, Mr. Philemon male NaN 0 0 345777 9.5000 NaN S 869 870 1 3 Johnson, Master. Harold Theodor male 4.0 1 1 347742 11.1333 NaN S 870 871 0 3 Balkic, Mr. Cerin male 26.0 0 0 349248 7.8958 NaN S 871 872 1 1 Beckwith, Mrs. Richard Leonard (Sallie Monypeny) female 47.0 1 1 11751 52.5542 D35 S 872 873 0 1 Carlsson, Mr. Frans Olof male 33.0 0 0 695 5.0000 B51 B53 B55 S 873 874 0 3 Vander Cruyssen, Mr. Victor male 47.0 0 0 345765 9.0000 NaN S 874 875 1 2 Abelson, Mrs. Samuel (Hannah Wizosky) female 28.0 1 0 P/PP 3381 24.0000 NaN C 875 876 1 3 Najib, Miss. Adele Kiamie "Jane" female 15.0 0 0 2667 7.2250 NaN C 876 877 0 3 Gustafsson, Mr. Alfred Ossian male 20.0 0 0 7534 9.8458 NaN S 877 878 0 3 Petroff, Mr. Nedelio male 19.0 0 0 349212 7.8958 NaN S 878 879 0 3 Laleff, Mr. Kristo male NaN 0 0 349217 7.8958 NaN S 879 880 1 1 Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) female 56.0 0 1 11767 83.1583 C50 C 880 881 1 2 Shelley, Mrs. William (Imanita Parrish Hall) female 25.0 0 1 230433 26.0000 NaN S 881 882 0 3 Markun, Mr. Johann male 33.0 0 0 349257 7.8958 NaN S 882 883 0 3 Dahlberg, Miss. Gerda Ulrika female 22.0 0 0 7552 10.5167 NaN S 883 884 0 2 Banfield, Mr. Frederick James male 28.0 0 0 C.A./SOTON 34068 10.5000 NaN S 884 885 0 3 Sutehall, Mr. Henry Jr male 25.0 0 0 SOTON/OQ 392076 7.0500 NaN S 885 886 0 3 Rice, Mrs. William (Margaret Norton) female 39.0 0 5 382652 29.1250 NaN Q 886 887 0 2 Montvila, Rev. Juozas male 27.0 0 0 211536 13.0000 NaN S 887 888 1 1 Graham, Miss. Margaret Edith female 19.0 0 0 112053 30.0000 B42 S 888 889 0 3 Johnston, Miss. Catherine Helen "Carrie" female NaN 1 2 W./C. 6607 23.4500 NaN S 889 890 1 1 Behr, Mr. Karl Howell male 26.0 0 0 111369 30.0000 C148 C 890 891 0 3 Dooley, Mr. Patrick male 32.0 0 0 370376 7.7500 NaN Q
891 rows × 12 columns
data. shape
(891, 12)
data. info( )
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
PassengerId 891 non-null int64
Survived 891 non-null int64
Pclass 891 non-null int64
Name 891 non-null object
Sex 891 non-null object
Age 714 non-null float64
SibSp 891 non-null int64
Parch 891 non-null int64
Ticket 891 non-null object
Fare 891 non-null float64
Cabin 204 non-null object
Embarked 889 non-null object
dtypes: float64(2), int64(5), object(5)
memory usage: 83.6+ KB
data. isnull( ) . sum ( ) . sort_values( ascending= False )
Cabin 687
Age 177
Embarked 2
Fare 0
Ticket 0
Parch 0
SibSp 0
Sex 0
Name 0
Pclass 0
Survived 0
PassengerId 0
dtype: int64
data. dtypes
PassengerId int64
Survived int64
Pclass int64
Name object
Sex object
Age float64
SibSp int64
Parch int64
Ticket object
Fare float64
Cabin object
Embarked object
dtype: object
data[ 'Sex' ] . dtype
dtype('O')
data. isnull( )
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked 0 False False False False False False False False False False True False 1 False False False False False False False False False False False False 2 False False False False False False False False False False True False 3 False False False False False False False False False False False False 4 False False False False False False False False False False True False 5 False False False False False True False False False False True False 6 False False False False False False False False False False False False 7 False False False False False False False False False False True False 8 False False False False False False False False False False True False 9 False False False False False False False False False False True False 10 False False False False False False False False False False False False 11 False False False False False False False False False False False False 12 False False False False False False False False False False True False 13 False False False False False False False False False False True False 14 False False False False False False False False False False True False 15 False False False False False False False False False False True False 16 False False False False False False False False False False True False 17 False False False False False True False False False False True False 18 False False False False False False False False False False True False 19 False False False False False True False False False False True False 20 False False False False False False False False False False True False 21 False False False False False False False False False False False False 22 False False False False False False False False False False True False 23 False False False False False False False False False False False False 24 False False False False False False False False False False True False 25 False False False False False False False False False False True False 26 False False False False False True False False False False True False 27 False False False False False False False False False False False False 28 False False False False False True False False False False True False 29 False False False False False True False False False False True False ... ... ... ... ... ... ... ... ... ... ... ... ... 861 False False False False False False False False False False True False 862 False False False False False False False False False False False False 863 False False False False False True False False False False True False 864 False False False False False False False False False False True False 865 False False False False False False False False False False True False 866 False False False False False False False False False False True False 867 False False False False False False False False False False False False 868 False False False False False True False False False False True False 869 False False False False False False False False False False True False 870 False False False False False False False False False False True False 871 False False False False False False False False False False False False 872 False False False False False False False False False False False False 873 False False False False False False False False False False True False 874 False False False False False False False False False False True False 875 False False False False False False False False False False True False 876 False False False False False False False False False False True False 877 False False False False False False False False False False True False 878 False False False False False True False False False False True False 879 False False False False False False False False False False False False 880 False False False False False False False False False False True False 881 False False False False False False False False False False True False 882 False False False False False False False False False False True False 883 False False False False False False False False False False True False 884 False False False False False False False False False False True False 885 False False False False False False False False False False True False 886 False False False False False False False False False False True False 887 False False False False False False False False False False False False 888 False False False False False True False False False False True False 889 False False False False False False False False False False False False 890 False False False False False False False False False False True False
891 rows × 12 columns
data[ 'Age' ] . isnull( )
0 False
1 False
2 False
3 False
4 False
5 True
6 False
7 False
8 False
9 False
10 False
11 False
12 False
13 False
14 False
15 False
16 False
17 True
18 False
19 True
20 False
21 False
22 False
23 False
24 False
25 False
26 True
27 False
28 True
29 True
...
861 False
862 False
863 True
864 False
865 False
866 False
867 False
868 True
869 False
870 False
871 False
872 False
873 False
874 False
875 False
876 False
877 False
878 True
879 False
880 False
881 False
882 False
883 False
884 False
885 False
886 False
887 False
888 True
889 False
890 False
Name: Age, Length: 891, dtype: bool
data[ 'Parch' ] . unique( )
array([0, 1, 2, 5, 3, 4, 6], dtype=int64)
data[ 'Parch' ] . value_counts( dropna= False )
0 678
1 118
2 80
5 5
3 5
4 4
6 1
Name: Parch, dtype: int64
data. values
array([[1, 0, 3, ..., 7.25, nan, 'S'],
[2, 1, 1, ..., 71.2833, 'C85', 'C'],
[3, 1, 3, ..., 7.925, nan, 'S'],
...,
[889, 0, 3, ..., 23.45, nan, 'S'],
[890, 1, 1, ..., 30.0, 'C148', 'C'],
[891, 0, 3, ..., 7.75, nan, 'Q']], dtype=object)
data. head( 4 )
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked 0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S 1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C85 C 2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S 3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S
data. tail( 4 )
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked 887 888 1 1 Graham, Miss. Margaret Edith female 19.0 0 0 112053 30.00 B42 S 888 889 0 3 Johnston, Miss. Catherine Helen "Carrie" female NaN 1 2 W./C. 6607 23.45 NaN S 889 890 1 1 Behr, Mr. Karl Howell male 26.0 0 0 111369 30.00 C148 C 890 891 0 3 Dooley, Mr. Patrick male 32.0 0 0 370376 7.75 NaN Q
3.清洗数据
data[ 'Age' ] . fillna( value= 0 )
0 22.0
1 38.0
2 26.0
3 35.0
4 35.0
5 0.0
6 54.0
7 2.0
8 27.0
9 14.0
10 4.0
11 58.0
12 20.0
13 39.0
14 14.0
15 55.0
16 2.0
17 0.0
18 31.0
19 0.0
20 35.0
21 34.0
22 15.0
23 28.0
24 8.0
25 38.0
26 0.0
27 19.0
28 0.0
29 0.0
...
861 21.0
862 48.0
863 0.0
864 24.0
865 42.0
866 27.0
867 31.0
868 0.0
869 4.0
870 26.0
871 47.0
872 33.0
873 47.0
874 28.0
875 15.0
876 20.0
877 19.0
878 0.0
879 56.0
880 25.0
881 33.0
882 22.0
883 28.0
884 25.0
885 39.0
886 27.0
887 19.0
888 0.0
889 26.0
890 32.0
Name: Age, Length: 891, dtype: float64
data[ 'Age' ] . fillna( method= 'ffill' )
0 22.0
1 38.0
2 26.0
3 35.0
4 35.0
5 35.0
6 54.0
7 2.0
8 27.0
9 14.0
10 4.0
11 58.0
12 20.0
13 39.0
14 14.0
15 55.0
16 2.0
17 2.0
18 31.0
19 31.0
20 35.0
21 34.0
22 15.0
23 28.0
24 8.0
25 38.0
26 38.0
27 19.0
28 19.0
29 19.0
...
861 21.0
862 48.0
863 48.0
864 24.0
865 42.0
866 27.0
867 31.0
868 31.0
869 4.0
870 26.0
871 47.0
872 33.0
873 47.0
874 28.0
875 15.0
876 20.0
877 19.0
878 19.0
879 56.0
880 25.0
881 33.0
882 22.0
883 28.0
884 25.0
885 39.0
886 27.0
887 19.0
888 19.0
889 26.0
890 32.0
Name: Age, Length: 891, dtype: float64
data[ 'Age' ] . fillna( method= 'bfill' )
0 22.0
1 38.0
2 26.0
3 35.0
4 35.0
5 54.0
6 54.0
7 2.0
8 27.0
9 14.0
10 4.0
11 58.0
12 20.0
13 39.0
14 14.0
15 55.0
16 2.0
17 31.0
18 31.0
19 35.0
20 35.0
21 34.0
22 15.0
23 28.0
24 8.0
25 38.0
26 19.0
27 19.0
28 40.0
29 40.0
...
861 21.0
862 48.0
863 24.0
864 24.0
865 42.0
866 27.0
867 31.0
868 4.0
869 4.0
870 26.0
871 47.0
872 33.0
873 47.0
874 28.0
875 15.0
876 20.0
877 19.0
878 56.0
879 56.0
880 25.0
881 33.0
882 22.0
883 28.0
884 25.0
885 39.0
886 27.0
887 19.0
888 26.0
889 26.0
890 32.0
Name: Age, Length: 891, dtype: float64
data[ 'Age' ]
0 22.0
1 38.0
2 26.0
3 35.0
4 35.0
5 NaN
6 54.0
7 2.0
8 27.0
9 14.0
10 4.0
11 58.0
12 20.0
13 39.0
14 14.0
15 55.0
16 2.0
17 NaN
18 31.0
19 NaN
20 35.0
21 34.0
22 15.0
23 28.0
24 8.0
25 38.0
26 NaN
27 19.0
28 NaN
29 NaN
...
861 21.0
862 48.0
863 NaN
864 24.0
865 42.0
866 27.0
867 31.0
868 NaN
869 4.0
870 26.0
871 47.0
872 33.0
873 47.0
874 28.0
875 15.0
876 20.0
877 19.0
878 NaN
879 56.0
880 25.0
881 33.0
882 22.0
883 28.0
884 25.0
885 39.0
886 27.0
887 19.0
888 NaN
889 26.0
890 32.0
Name: Age, Length: 891, dtype: float64
data[ 'Age' ] . fillna( data[ 'Age' ] . mean( ) , inplace= True )
data[ 'Age' ]
0 22.000000
1 38.000000
2 26.000000
3 35.000000
4 35.000000
5 29.699118
6 54.000000
7 2.000000
8 27.000000
9 14.000000
10 4.000000
11 58.000000
12 20.000000
13 39.000000
14 14.000000
15 55.000000
16 2.000000
17 29.699118
18 31.000000
19 29.699118
20 35.000000
21 34.000000
22 15.000000
23 28.000000
24 8.000000
25 38.000000
26 29.699118
27 19.000000
28 29.699118
29 29.699118
...
861 21.000000
862 48.000000
863 29.699118
864 24.000000
865 42.000000
866 27.000000
867 31.000000
868 29.699118
869 4.000000
870 26.000000
871 47.000000
872 33.000000
873 47.000000
874 28.000000
875 15.000000
876 20.000000
877 19.000000
878 29.699118
879 56.000000
880 25.000000
881 33.000000
882 22.000000
883 28.000000
884 25.000000
885 39.000000
886 27.000000
887 19.000000
888 29.699118
889 26.000000
890 32.000000
Name: Age, Length: 891, dtype: float64
' a bb' . strip( )
'a bb'
data[ 'Name' ] = data[ 'Name' ] . map ( str . strip)
data[ 'Name' ]
0 Braund, Mr. Owen Harris
1 Cumings, Mrs. John Bradley (Florence Briggs Th...
2 Heikkinen, Miss. Laina
3 Futrelle, Mrs. Jacques Heath (Lily May Peel)
4 Allen, Mr. William Henry
5 Moran, Mr. James
6 McCarthy, Mr. Timothy J
7 Palsson, Master. Gosta Leonard
8 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)
9 Nasser, Mrs. Nicholas (Adele Achem)
10 Sandstrom, Miss. Marguerite Rut
11 Bonnell, Miss. Elizabeth
12 Saundercock, Mr. William Henry
13 Andersson, Mr. Anders Johan
14 Vestrom, Miss. Hulda Amanda Adolfina
15 Hewlett, Mrs. (Mary D Kingcome)
16 Rice, Master. Eugene
17 Williams, Mr. Charles Eugene
18 Vander Planke, Mrs. Julius (Emelia Maria Vande...
19 Masselmani, Mrs. Fatima
20 Fynney, Mr. Joseph J
21 Beesley, Mr. Lawrence
22 McGowan, Miss. Anna "Annie"
23 Sloper, Mr. William Thompson
24 Palsson, Miss. Torborg Danira
25 Asplund, Mrs. Carl Oscar (Selma Augusta Emilia...
26 Emir, Mr. Farred Chehab
27 Fortune, Mr. Charles Alexander
28 O'Dwyer, Miss. Ellen "Nellie"
29 Todoroff, Mr. Lalio
...
861 Giles, Mr. Frederick Edward
862 Swift, Mrs. Frederick Joel (Margaret Welles Ba...
863 Sage, Miss. Dorothy Edith "Dolly"
864 Gill, Mr. John William
865 Bystrom, Mrs. (Karolina)
866 Duran y More, Miss. Asuncion
867 Roebling, Mr. Washington Augustus II
868 van Melkebeke, Mr. Philemon
869 Johnson, Master. Harold Theodor
870 Balkic, Mr. Cerin
871 Beckwith, Mrs. Richard Leonard (Sallie Monypeny)
872 Carlsson, Mr. Frans Olof
873 Vander Cruyssen, Mr. Victor
874 Abelson, Mrs. Samuel (Hannah Wizosky)
875 Najib, Miss. Adele Kiamie "Jane"
876 Gustafsson, Mr. Alfred Ossian
877 Petroff, Mr. Nedelio
878 Laleff, Mr. Kristo
879 Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)
880 Shelley, Mrs. William (Imanita Parrish Hall)
881 Markun, Mr. Johann
882 Dahlberg, Miss. Gerda Ulrika
883 Banfield, Mr. Frederick James
884 Sutehall, Mr. Henry Jr
885 Rice, Mrs. William (Margaret Norton)
886 Montvila, Rev. Juozas
887 Graham, Miss. Margaret Edith
888 Johnston, Miss. Catherine Helen "Carrie"
889 Behr, Mr. Karl Howell
890 Dooley, Mr. Patrick
Name: Name, Length: 891, dtype: object
data[ 'Name' ] = data[ 'Name' ] . str . lower( )
data[ 'Name' ]
0 braund, mr. owen harris
1 cumings, mrs. john bradley (florence briggs th...
2 heikkinen, miss. laina
3 futrelle, mrs. jacques heath (lily may peel)
4 allen, mr. william henry
5 moran, mr. james
6 mccarthy, mr. timothy j
7 palsson, master. gosta leonard
8 johnson, mrs. oscar w (elisabeth vilhelmina berg)
9 nasser, mrs. nicholas (adele achem)
10 sandstrom, miss. marguerite rut
11 bonnell, miss. elizabeth
12 saundercock, mr. william henry
13 andersson, mr. anders johan
14 vestrom, miss. hulda amanda adolfina
15 hewlett, mrs. (mary d kingcome)
16 rice, master. eugene
17 williams, mr. charles eugene
18 vander planke, mrs. julius (emelia maria vande...
19 masselmani, mrs. fatima
20 fynney, mr. joseph j
21 beesley, mr. lawrence
22 mcgowan, miss. anna "annie"
23 sloper, mr. william thompson
24 palsson, miss. torborg danira
25 asplund, mrs. carl oscar (selma augusta emilia...
26 emir, mr. farred chehab
27 fortune, mr. charles alexander
28 o'dwyer, miss. ellen "nellie"
29 todoroff, mr. lalio
...
861 giles, mr. frederick edward
862 swift, mrs. frederick joel (margaret welles ba...
863 sage, miss. dorothy edith "dolly"
864 gill, mr. john william
865 bystrom, mrs. (karolina)
866 duran y more, miss. asuncion
867 roebling, mr. washington augustus ii
868 van melkebeke, mr. philemon
869 johnson, master. harold theodor
870 balkic, mr. cerin
871 beckwith, mrs. richard leonard (sallie monypeny)
872 carlsson, mr. frans olof
873 vander cruyssen, mr. victor
874 abelson, mrs. samuel (hannah wizosky)
875 najib, miss. adele kiamie "jane"
876 gustafsson, mr. alfred ossian
877 petroff, mr. nedelio
878 laleff, mr. kristo
879 potter, mrs. thomas jr (lily alexenia wilson)
880 shelley, mrs. william (imanita parrish hall)
881 markun, mr. johann
882 dahlberg, miss. gerda ulrika
883 banfield, mr. frederick james
884 sutehall, mr. henry jr
885 rice, mrs. william (margaret norton)
886 montvila, rev. juozas
887 graham, miss. margaret edith
888 johnston, miss. catherine helen "carrie"
889 behr, mr. karl howell
890 dooley, mr. patrick
Name: Name, Length: 891, dtype: object
data[ 'Survived' ] . dtype
dtype('int64')
data[ 'Survived' ] . astype( 'str' )
0 0
1 1
2 1
3 1
4 0
5 0
6 0
7 0
8 1
9 1
10 1
11 1
12 0
13 0
14 0
15 1
16 0
17 1
18 0
19 1
20 0
21 1
22 1
23 1
24 0
25 1
26 0
27 0
28 1
29 0
..
861 0
862 1
863 0
864 0
865 1
866 1
867 0
868 0
869 1
870 0
871 1
872 0
873 0
874 1
875 1
876 0
877 0
878 0
879 1
880 1
881 0
882 0
883 0
884 0
885 0
886 0
887 1
888 0
889 1
890 0
Name: Survived, Length: 891, dtype: object
data. rename( columns= { 'Sex' : '性别 ' , 'Name' : '姓名' } ) . head( )
PassengerId Survived Pclass 姓名 性别 Age SibSp Parch Ticket Fare Cabin Embarked 0 1 0 3 braund, mr. owen harris male 22.0 1 0 A/5 21171 7.2500 NaN S 1 2 1 1 cumings, mrs. john bradley (florence briggs th... female 38.0 1 0 PC 17599 71.2833 C85 C 2 3 1 3 heikkinen, miss. laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S 3 4 1 1 futrelle, mrs. jacques heath (lily may peel) female 35.0 1 0 113803 53.1000 C123 S 4 5 0 3 allen, mr. william henry male 35.0 0 0 373450 8.0500 NaN S
data. columns
Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
dtype='object')
data[ 'Sex' ] . drop_duplicates( )
0 male
1 female
Name: Sex, dtype: object
data[ 'Sex' ] . drop_duplicates( keep= 'last' )
888 female
890 male
Name: Sex, dtype: object
data[ 'Survived' ] . replace( 0 , 'died' )
0 died
1 1
2 1
3 1
4 died
5 died
6 died
7 died
8 1
9 1
10 1
11 1
12 died
13 died
14 died
15 1
16 died
17 1
18 died
19 1
20 died
21 1
22 1
23 1
24 died
25 1
26 died
27 died
28 1
29 died
...
861 died
862 1
863 died
864 died
865 1
866 1
867 died
868 died
869 1
870 died
871 1
872 died
873 died
874 1
875 1
876 died
877 died
878 died
879 1
880 1
881 died
882 died
883 died
884 died
885 died
886 died
887 1
888 died
889 1
890 died
Name: Survived, Length: 891, dtype: object
4.数据合并
data1= pd. DataFrame( { 'PassengerId' : [ 9 , 19 , 29 , 39999 ] ,
'Sur' : [ 0 , 1 , 0 , 1 ] ,
'Name' : [ 'xiaoming' , 'xiaohong' , 'xiaozhang' , 'xiaozhao' ] ,
'time' : [ 10.0 , 22.0 , 30.0 , 40.0 ] } )
data1
PassengerId Sur Name time 0 9 0 xiaoming 10.0 1 19 1 xiaohong 22.0 2 29 0 xiaozhang 30.0 3 39999 1 xiaozhao 40.0
data_inner = pd. merge( data, data1, on= 'PassengerId' , how= 'inner' )
data_inner
PassengerId Survived Pclass Name_x Sex Age SibSp Parch Ticket Fare Cabin Embarked Sur Name_y time 0 9 1 3 johnson, mrs. oscar w (elisabeth vilhelmina berg) female 27.000000 0 2 347742 11.1333 NaN S 0 xiaoming 10.0 1 19 0 3 vander planke, mrs. julius (emelia maria vande... female 31.000000 1 0 345763 18.0000 NaN S 1 xiaohong 22.0 2 29 1 3 o'dwyer, miss. ellen "nellie" female 29.699118 0 0 330959 7.8792 NaN Q 0 xiaozhang 30.0
data_left = pd. merge( data, data1, on= 'PassengerId' , how= 'left' )
data_left. head( 30 )
PassengerId Survived Pclass Name_x Sex Age SibSp Parch Ticket Fare Cabin Embarked Sur Name_y time 0 1 0 3 braund, mr. owen harris male 22.000000 1 0 A/5 21171 7.2500 NaN S NaN NaN NaN 1 2 1 1 cumings, mrs. john bradley (florence briggs th... female 38.000000 1 0 PC 17599 71.2833 C85 C NaN NaN NaN 2 3 1 3 heikkinen, miss. laina female 26.000000 0 0 STON/O2. 3101282 7.9250 NaN S NaN NaN NaN 3 4 1 1 futrelle, mrs. jacques heath (lily may peel) female 35.000000 1 0 113803 53.1000 C123 S NaN NaN NaN 4 5 0 3 allen, mr. william henry male 35.000000 0 0 373450 8.0500 NaN S NaN NaN NaN 5 6 0 3 moran, mr. james male 29.699118 0 0 330877 8.4583 NaN Q NaN NaN NaN 6 7 0 1 mccarthy, mr. timothy j male 54.000000 0 0 17463 51.8625 E46 S NaN NaN NaN 7 8 0 3 palsson, master. gosta leonard male 2.000000 3 1 349909 21.0750 NaN S NaN NaN NaN 8 9 1 3 johnson, mrs. oscar w (elisabeth vilhelmina berg) female 27.000000 0 2 347742 11.1333 NaN S 0.0 xiaoming 10.0 9 10 1 2 nasser, mrs. nicholas (adele achem) female 14.000000 1 0 237736 30.0708 NaN C NaN NaN NaN 10 11 1 3 sandstrom, miss. marguerite rut female 4.000000 1 1 PP 9549 16.7000 G6 S NaN NaN NaN 11 12 1 1 bonnell, miss. elizabeth female 58.000000 0 0 113783 26.5500 C103 S NaN NaN NaN 12 13 0 3 saundercock, mr. william henry male 20.000000 0 0 A/5. 2151 8.0500 NaN S NaN NaN NaN 13 14 0 3 andersson, mr. anders johan male 39.000000 1 5 347082 31.2750 NaN S NaN NaN NaN 14 15 0 3 vestrom, miss. hulda amanda adolfina female 14.000000 0 0 350406 7.8542 NaN S NaN NaN NaN 15 16 1 2 hewlett, mrs. (mary d kingcome) female 55.000000 0 0 248706 16.0000 NaN S NaN NaN NaN 16 17 0 3 rice, master. eugene male 2.000000 4 1 382652 29.1250 NaN Q NaN NaN NaN 17 18 1 2 williams, mr. charles eugene male 29.699118 0 0 244373 13.0000 NaN S NaN NaN NaN 18 19 0 3 vander planke, mrs. julius (emelia maria vande... female 31.000000 1 0 345763 18.0000 NaN S 1.0 xiaohong 22.0 19 20 1 3 masselmani, mrs. fatima female 29.699118 0 0 2649 7.2250 NaN C NaN NaN NaN 20 21 0 2 fynney, mr. joseph j male 35.000000 0 0 239865 26.0000 NaN S NaN NaN NaN 21 22 1 2 beesley, mr. lawrence male 34.000000 0 0 248698 13.0000 D56 S NaN NaN NaN 22 23 1 3 mcgowan, miss. anna "annie" female 15.000000 0 0 330923 8.0292 NaN Q NaN NaN NaN 23 24 1 1 sloper, mr. william thompson male 28.000000 0 0 113788 35.5000 A6 S NaN NaN NaN 24 25 0 3 palsson, miss. torborg danira female 8.000000 3 1 349909 21.0750 NaN S NaN NaN NaN 25 26 1 3 asplund, mrs. carl oscar (selma augusta emilia... female 38.000000 1 5 347077 31.3875 NaN S NaN NaN NaN 26 27 0 3 emir, mr. farred chehab male 29.699118 0 0 2631 7.2250 NaN C NaN NaN NaN 27 28 0 1 fortune, mr. charles alexander male 19.000000 3 2 19950 263.0000 C23 C25 C27 S NaN NaN NaN 28 29 1 3 o'dwyer, miss. ellen "nellie" female 29.699118 0 0 330959 7.8792 NaN Q 0.0 xiaozhang 30.0 29 30 0 3 todoroff, mr. lalio male 29.699118 0 0 349216 7.8958 NaN S NaN NaN NaN
data_right = pd. merge( data, data1, on= 'PassengerId' , how= 'right' )
data_right
PassengerId Survived Pclass Name_x Sex Age SibSp Parch Ticket Fare Cabin Embarked Sur Name_y time 0 9 1.0 3.0 johnson, mrs. oscar w (elisabeth vilhelmina berg) female 27.000000 0.0 2.0 347742 11.1333 NaN S 0 xiaoming 10.0 1 19 0.0 3.0 vander planke, mrs. julius (emelia maria vande... female 31.000000 1.0 0.0 345763 18.0000 NaN S 1 xiaohong 22.0 2 29 1.0 3.0 o'dwyer, miss. ellen "nellie" female 29.699118 0.0 0.0 330959 7.8792 NaN Q 0 xiaozhang 30.0 3 39999 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 1 xiaozhao 40.0
data_outer = pd. merge( data, data1, on= 'PassengerId' , how= 'outer' )
data_outer. head( 20 )
PassengerId Survived Pclass Name_x Sex Age SibSp Parch Ticket Fare Cabin Embarked Sur Name_y time 0 1 0.0 3.0 braund, mr. owen harris male 22.000000 1.0 0.0 A/5 21171 7.2500 NaN S NaN NaN NaN 1 2 1.0 1.0 cumings, mrs. john bradley (florence briggs th... female 38.000000 1.0 0.0 PC 17599 71.2833 C85 C NaN NaN NaN 2 3 1.0 3.0 heikkinen, miss. laina female 26.000000 0.0 0.0 STON/O2. 3101282 7.9250 NaN S NaN NaN NaN 3 4 1.0 1.0 futrelle, mrs. jacques heath (lily may peel) female 35.000000 1.0 0.0 113803 53.1000 C123 S NaN NaN NaN 4 5 0.0 3.0 allen, mr. william henry male 35.000000 0.0 0.0 373450 8.0500 NaN S NaN NaN NaN 5 6 0.0 3.0 moran, mr. james male 29.699118 0.0 0.0 330877 8.4583 NaN Q NaN NaN NaN 6 7 0.0 1.0 mccarthy, mr. timothy j male 54.000000 0.0 0.0 17463 51.8625 E46 S NaN NaN NaN 7 8 0.0 3.0 palsson, master. gosta leonard male 2.000000 3.0 1.0 349909 21.0750 NaN S NaN NaN NaN 8 9 1.0 3.0 johnson, mrs. oscar w (elisabeth vilhelmina berg) female 27.000000 0.0 2.0 347742 11.1333 NaN S 0.0 xiaoming 10.0 9 10 1.0 2.0 nasser, mrs. nicholas (adele achem) female 14.000000 1.0 0.0 237736 30.0708 NaN C NaN NaN NaN 10 11 1.0 3.0 sandstrom, miss. marguerite rut female 4.000000 1.0 1.0 PP 9549 16.7000 G6 S NaN NaN NaN 11 12 1.0 1.0 bonnell, miss. elizabeth female 58.000000 0.0 0.0 113783 26.5500 C103 S NaN NaN NaN 12 13 0.0 3.0 saundercock, mr. william henry male 20.000000 0.0 0.0 A/5. 2151 8.0500 NaN S NaN NaN NaN 13 14 0.0 3.0 andersson, mr. anders johan male 39.000000 1.0 5.0 347082 31.2750 NaN S NaN NaN NaN 14 15 0.0 3.0 vestrom, miss. hulda amanda adolfina female 14.000000 0.0 0.0 350406 7.8542 NaN S NaN NaN NaN 15 16 1.0 2.0 hewlett, mrs. (mary d kingcome) female 55.000000 0.0 0.0 248706 16.0000 NaN S NaN NaN NaN 16 17 0.0 3.0 rice, master. eugene male 2.000000 4.0 1.0 382652 29.1250 NaN Q NaN NaN NaN 17 18 1.0 2.0 williams, mr. charles eugene male 29.699118 0.0 0.0 244373 13.0000 NaN S NaN NaN NaN 18 19 0.0 3.0 vander planke, mrs. julius (emelia maria vande... female 31.000000 1.0 0.0 345763 18.0000 NaN S 1.0 xiaohong 22.0 19 20 1.0 3.0 masselmani, mrs. fatima female 29.699118 0.0 0.0 2649 7.2250 NaN C NaN NaN NaN
data_outer. tail( 20 )
PassengerId Survived Pclass Name_x Sex Age SibSp Parch Ticket Fare Cabin Embarked Sur Name_y time 872 873 0.0 1.0 carlsson, mr. frans olof male 33.000000 0.0 0.0 695 5.0000 B51 B53 B55 S NaN NaN NaN 873 874 0.0 3.0 vander cruyssen, mr. victor male 47.000000 0.0 0.0 345765 9.0000 NaN S NaN NaN NaN 874 875 1.0 2.0 abelson, mrs. samuel (hannah wizosky) female 28.000000 1.0 0.0 P/PP 3381 24.0000 NaN C NaN NaN NaN 875 876 1.0 3.0 najib, miss. adele kiamie "jane" female 15.000000 0.0 0.0 2667 7.2250 NaN C NaN NaN NaN 876 877 0.0 3.0 gustafsson, mr. alfred ossian male 20.000000 0.0 0.0 7534 9.8458 NaN S NaN NaN NaN 877 878 0.0 3.0 petroff, mr. nedelio male 19.000000 0.0 0.0 349212 7.8958 NaN S NaN NaN NaN 878 879 0.0 3.0 laleff, mr. kristo male 29.699118 0.0 0.0 349217 7.8958 NaN S NaN NaN NaN 879 880 1.0 1.0 potter, mrs. thomas jr (lily alexenia wilson) female 56.000000 0.0 1.0 11767 83.1583 C50 C NaN NaN NaN 880 881 1.0 2.0 shelley, mrs. william (imanita parrish hall) female 25.000000 0.0 1.0 230433 26.0000 NaN S NaN NaN NaN 881 882 0.0 3.0 markun, mr. johann male 33.000000 0.0 0.0 349257 7.8958 NaN S NaN NaN NaN 882 883 0.0 3.0 dahlberg, miss. gerda ulrika female 22.000000 0.0 0.0 7552 10.5167 NaN S NaN NaN NaN 883 884 0.0 2.0 banfield, mr. frederick james male 28.000000 0.0 0.0 C.A./SOTON 34068 10.5000 NaN S NaN NaN NaN 884 885 0.0 3.0 sutehall, mr. henry jr male 25.000000 0.0 0.0 SOTON/OQ 392076 7.0500 NaN S NaN NaN NaN 885 886 0.0 3.0 rice, mrs. william (margaret norton) female 39.000000 0.0 5.0 382652 29.1250 NaN Q NaN NaN NaN 886 887 0.0 2.0 montvila, rev. juozas male 27.000000 0.0 0.0 211536 13.0000 NaN S NaN NaN NaN 887 888 1.0 1.0 graham, miss. margaret edith female 19.000000 0.0 0.0 112053 30.0000 B42 S NaN NaN NaN 888 889 0.0 3.0 johnston, miss. catherine helen "carrie" female 29.699118 1.0 2.0 W./C. 6607 23.4500 NaN S NaN NaN NaN 889 890 1.0 1.0 behr, mr. karl howell male 26.000000 0.0 0.0 111369 30.0000 C148 C NaN NaN NaN 890 891 0.0 3.0 dooley, mr. patrick male 32.000000 0.0 0.0 370376 7.7500 NaN Q NaN NaN NaN 891 39999 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 1.0 xiaozhao 40.0
data2 = pd. DataFrame( { 'id' : [ 1 , 2 , 3 ] ,
'Sex' : [ 'famle' , 'male' , 'male' ] } )
result = data1. append( data2)
result
Name PassengerId Sex Sur id time 0 xiaoming 9.0 NaN 0.0 NaN 10.0 1 xiaohong 19.0 NaN 1.0 NaN 22.0 2 xiaozhang 29.0 NaN 0.0 NaN 30.0 3 xiaozhao 39999.0 NaN 1.0 NaN 40.0 0 NaN NaN famle NaN 1.0 NaN 1 NaN NaN male NaN 2.0 NaN 2 NaN NaN male NaN 3.0 NaN
data_inner. set_index( 'time' )
PassengerId Survived Pclass Name_x Sex Age SibSp Parch Ticket Fare Cabin Embarked Sur Name_y time 10.0 9 1 3 johnson, mrs. oscar w (elisabeth vilhelmina berg) female 27.000000 0 2 347742 11.1333 NaN S 0 xiaoming 22.0 19 0 3 vander planke, mrs. julius (emelia maria vande... female 31.000000 1 0 345763 18.0000 NaN S 1 xiaohong 30.0 29 1 3 o'dwyer, miss. ellen "nellie" female 29.699118 0 0 330959 7.8792 NaN Q 0 xiaozhang
data_inner. reset_index( drop= True )
PassengerId Survived Pclass Name_x Sex Age SibSp Parch Ticket Fare Cabin Embarked Sur Name_y time 0 9 1 3 johnson, mrs. oscar w (elisabeth vilhelmina berg) female 27.000000 0 2 347742 11.1333 NaN S 0 xiaoming 10.0 1 19 0 3 vander planke, mrs. julius (emelia maria vande... female 31.000000 1 0 345763 18.0000 NaN S 1 xiaohong 22.0 2 29 1 3 o'dwyer, miss. ellen "nellie" female 29.699118 0 0 330959 7.8792 NaN Q 0 xiaozhang 30.0
data_inner. sort_values( by= [ 'Fare' ] )
PassengerId Survived Pclass Name_x Sex Age SibSp Parch Ticket Fare Cabin Embarked Sur Name_y time 2 29 1 3 o'dwyer, miss. ellen "nellie" female 29.699118 0 0 330959 7.8792 NaN Q 0 xiaozhang 30.0 0 9 1 3 johnson, mrs. oscar w (elisabeth vilhelmina berg) female 27.000000 0 2 347742 11.1333 NaN S 0 xiaoming 10.0 1 19 0 3 vander planke, mrs. julius (emelia maria vande... female 31.000000 1 0 345763 18.0000 NaN S 1 xiaohong 22.0
data_inner. sort_index( )
PassengerId Survived Pclass Name_x Sex Age SibSp Parch Ticket Fare Cabin Embarked Sur Name_y time 0 9 1 3 johnson, mrs. oscar w (elisabeth vilhelmina berg) female 27.000000 0 2 347742 11.1333 NaN S 0 xiaoming 10.0 1 19 0 3 vander planke, mrs. julius (emelia maria vande... female 31.000000 1 0 345763 18.0000 NaN S 1 xiaohong 22.0 2 29 1 3 o'dwyer, miss. ellen "nellie" female 29.699118 0 0 330959 7.8792 NaN Q 0 xiaozhang 30.0
data_inner[ 'level' ] = np. where( data_inner[ 'Fare' ] > 10.0 , 'good' , 'oh' )
data_inner
PassengerId Survived Pclass Name_x Sex Age SibSp Parch Ticket Fare Cabin Embarked Sur Name_y time level 0 9 1 3 johnson, mrs. oscar w (elisabeth vilhelmina berg) female 27.000000 0 2 347742 11.1333 NaN S 0 xiaoming 10.0 good 1 19 0 3 vander planke, mrs. julius (emelia maria vande... female 31.000000 1 0 345763 18.0000 NaN S 1 xiaohong 22.0 good 2 29 1 3 o'dwyer, miss. ellen "nellie" female 29.699118 0 0 330959 7.8792 NaN Q 0 xiaozhang 30.0 oh
data. loc[ ( data[ 'Sex' ] == 'male' ) & ( data[ 'Age' ] <= 10.0 ) , 'tip' ] = 1
data. head( 50 )
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked tip 0 1 0 3 braund, mr. owen harris male 22.000000 1 0 A/5 21171 7.2500 NaN S NaN 1 2 1 1 cumings, mrs. john bradley (florence briggs th... female 38.000000 1 0 PC 17599 71.2833 C85 C NaN 2 3 1 3 heikkinen, miss. laina female 26.000000 0 0 STON/O2. 3101282 7.9250 NaN S NaN 3 4 1 1 futrelle, mrs. jacques heath (lily may peel) female 35.000000 1 0 113803 53.1000 C123 S NaN 4 5 0 3 allen, mr. william henry male 35.000000 0 0 373450 8.0500 NaN S NaN 5 6 0 3 moran, mr. james male 29.699118 0 0 330877 8.4583 NaN Q NaN 6 7 0 1 mccarthy, mr. timothy j male 54.000000 0 0 17463 51.8625 E46 S NaN 7 8 0 3 palsson, master. gosta leonard male 2.000000 3 1 349909 21.0750 NaN S 1.0 8 9 1 3 johnson, mrs. oscar w (elisabeth vilhelmina berg) female 27.000000 0 2 347742 11.1333 NaN S NaN 9 10 1 2 nasser, mrs. nicholas (adele achem) female 14.000000 1 0 237736 30.0708 NaN C NaN 10 11 1 3 sandstrom, miss. marguerite rut female 4.000000 1 1 PP 9549 16.7000 G6 S NaN 11 12 1 1 bonnell, miss. elizabeth female 58.000000 0 0 113783 26.5500 C103 S NaN 12 13 0 3 saundercock, mr. william henry male 20.000000 0 0 A/5. 2151 8.0500 NaN S NaN 13 14 0 3 andersson, mr. anders johan male 39.000000 1 5 347082 31.2750 NaN S NaN 14 15 0 3 vestrom, miss. hulda amanda adolfina female 14.000000 0 0 350406 7.8542 NaN S NaN 15 16 1 2 hewlett, mrs. (mary d kingcome) female 55.000000 0 0 248706 16.0000 NaN S NaN 16 17 0 3 rice, master. eugene male 2.000000 4 1 382652 29.1250 NaN Q 1.0 17 18 1 2 williams, mr. charles eugene male 29.699118 0 0 244373 13.0000 NaN S NaN 18 19 0 3 vander planke, mrs. julius (emelia maria vande... female 31.000000 1 0 345763 18.0000 NaN S NaN 19 20 1 3 masselmani, mrs. fatima female 29.699118 0 0 2649 7.2250 NaN C NaN 20 21 0 2 fynney, mr. joseph j male 35.000000 0 0 239865 26.0000 NaN S NaN 21 22 1 2 beesley, mr. lawrence male 34.000000 0 0 248698 13.0000 D56 S NaN 22 23 1 3 mcgowan, miss. anna "annie" female 15.000000 0 0 330923 8.0292 NaN Q NaN 23 24 1 1 sloper, mr. william thompson male 28.000000 0 0 113788 35.5000 A6 S NaN 24 25 0 3 palsson, miss. torborg danira female 8.000000 3 1 349909 21.0750 NaN S NaN 25 26 1 3 asplund, mrs. carl oscar (selma augusta emilia... female 38.000000 1 5 347077 31.3875 NaN S NaN 26 27 0 3 emir, mr. farred chehab male 29.699118 0 0 2631 7.2250 NaN C NaN 27 28 0 1 fortune, mr. charles alexander male 19.000000 3 2 19950 263.0000 C23 C25 C27 S NaN 28 29 1 3 o'dwyer, miss. ellen "nellie" female 29.699118 0 0 330959 7.8792 NaN Q NaN 29 30 0 3 todoroff, mr. lalio male 29.699118 0 0 349216 7.8958 NaN S NaN 30 31 0 1 uruchurtu, don. manuel e male 40.000000 0 0 PC 17601 27.7208 NaN C NaN 31 32 1 1 spencer, mrs. william augustus (marie eugenie) female 29.699118 1 0 PC 17569 146.5208 B78 C NaN 32 33 1 3 glynn, miss. mary agatha female 29.699118 0 0 335677 7.7500 NaN Q NaN 33 34 0 2 wheadon, mr. edward h male 66.000000 0 0 C.A. 24579 10.5000 NaN S NaN 34 35 0 1 meyer, mr. edgar joseph male 28.000000 1 0 PC 17604 82.1708 NaN C NaN 35 36 0 1 holverson, mr. alexander oskar male 42.000000 1 0 113789 52.0000 NaN S NaN 36 37 1 3 mamee, mr. hanna male 29.699118 0 0 2677 7.2292 NaN C NaN 37 38 0 3 cann, mr. ernest charles male 21.000000 0 0 A./5. 2152 8.0500 NaN S NaN 38 39 0 3 vander planke, miss. augusta maria female 18.000000 2 0 345764 18.0000 NaN S NaN 39 40 1 3 nicola-yarred, miss. jamila female 14.000000 1 0 2651 11.2417 NaN C NaN 40 41 0 3 ahlin, mrs. johan (johanna persdotter larsson) female 40.000000 1 0 7546 9.4750 NaN S NaN 41 42 0 2 turpin, mrs. william john robert (dorothy ann ... female 27.000000 1 0 11668 21.0000 NaN S NaN 42 43 0 3 kraeff, mr. theodor male 29.699118 0 0 349253 7.8958 NaN C NaN 43 44 1 2 laroche, miss. simonne marie anne andree female 3.000000 1 2 SC/Paris 2123 41.5792 NaN C NaN 44 45 1 3 devaney, miss. margaret delia female 19.000000 0 0 330958 7.8792 NaN Q NaN 45 46 0 3 rogers, mr. william john male 29.699118 0 0 S.C./A.4. 23567 8.0500 NaN S NaN 46 47 0 3 lennon, mr. denis male 29.699118 1 0 370371 15.5000 NaN Q NaN 47 48 1 3 o'driscoll, miss. bridget female 29.699118 0 0 14311 7.7500 NaN Q NaN 48 49 0 3 samaan, mr. youssef male 29.699118 2 0 2662 21.6792 NaN C NaN 49 50 0 3 arnold-franchi, mrs. josef (josefine franchi) female 18.000000 1 0 349237 17.8000 NaN S NaN
data_namesplit = pd. DataFrame( ( x. split( '.' ) for x in data[ 'Name' ] ) , index= data. index, columns= [ '名' , '姓' , 's' ] )
data_namesplit
名 姓 s 0 braund, mr owen harris None 1 cumings, mrs john bradley (florence briggs thayer) None 2 heikkinen, miss laina None 3 futrelle, mrs jacques heath (lily may peel) None 4 allen, mr william henry None 5 moran, mr james None 6 mccarthy, mr timothy j None 7 palsson, master gosta leonard None 8 johnson, mrs oscar w (elisabeth vilhelmina berg) None 9 nasser, mrs nicholas (adele achem) None 10 sandstrom, miss marguerite rut None 11 bonnell, miss elizabeth None 12 saundercock, mr william henry None 13 andersson, mr anders johan None 14 vestrom, miss hulda amanda adolfina None 15 hewlett, mrs (mary d kingcome) None 16 rice, master eugene None 17 williams, mr charles eugene None 18 vander planke, mrs julius (emelia maria vandemoortele) None 19 masselmani, mrs fatima None 20 fynney, mr joseph j None 21 beesley, mr lawrence None 22 mcgowan, miss anna "annie" None 23 sloper, mr william thompson None 24 palsson, miss torborg danira None 25 asplund, mrs carl oscar (selma augusta emilia johansson) None 26 emir, mr farred chehab None 27 fortune, mr charles alexander None 28 o'dwyer, miss ellen "nellie" None 29 todoroff, mr lalio None ... ... ... ... 861 giles, mr frederick edward None 862 swift, mrs frederick joel (margaret welles barron) None 863 sage, miss dorothy edith "dolly" None 864 gill, mr john william None 865 bystrom, mrs (karolina) None 866 duran y more, miss asuncion None 867 roebling, mr washington augustus ii None 868 van melkebeke, mr philemon None 869 johnson, master harold theodor None 870 balkic, mr cerin None 871 beckwith, mrs richard leonard (sallie monypeny) None 872 carlsson, mr frans olof None 873 vander cruyssen, mr victor None 874 abelson, mrs samuel (hannah wizosky) None 875 najib, miss adele kiamie "jane" None 876 gustafsson, mr alfred ossian None 877 petroff, mr nedelio None 878 laleff, mr kristo None 879 potter, mrs thomas jr (lily alexenia wilson) None 880 shelley, mrs william (imanita parrish hall) None 881 markun, mr johann None 882 dahlberg, miss gerda ulrika None 883 banfield, mr frederick james None 884 sutehall, mr henry jr None 885 rice, mrs william (margaret norton) None 886 montvila, rev juozas None 887 graham, miss margaret edith None 888 johnston, miss catherine helen "carrie" None 889 behr, mr karl howell None 890 dooley, mr patrick None
891 rows × 3 columns
data = pd. concat( [ data_namesplit, data] , axis = 1 )
data
名 姓 s PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked tip 0 braund, mr owen harris None 1 0 3 braund, mr. owen harris male 22.000000 1 0 A/5 21171 7.2500 NaN S NaN 1 cumings, mrs john bradley (florence briggs thayer) None 2 1 1 cumings, mrs. john bradley (florence briggs th... female 38.000000 1 0 PC 17599 71.2833 C85 C NaN 2 heikkinen, miss laina None 3 1 3 heikkinen, miss. laina female 26.000000 0 0 STON/O2. 3101282 7.9250 NaN S NaN 3 futrelle, mrs jacques heath (lily may peel) None 4 1 1 futrelle, mrs. jacques heath (lily may peel) female 35.000000 1 0 113803 53.1000 C123 S NaN 4 allen, mr william henry None 5 0 3 allen, mr. william henry male 35.000000 0 0 373450 8.0500 NaN S NaN 5 moran, mr james None 6 0 3 moran, mr. james male 29.699118 0 0 330877 8.4583 NaN Q NaN 6 mccarthy, mr timothy j None 7 0 1 mccarthy, mr. timothy j male 54.000000 0 0 17463 51.8625 E46 S NaN 7 palsson, master gosta leonard None 8 0 3 palsson, master. gosta leonard male 2.000000 3 1 349909 21.0750 NaN S 1.0 8 johnson, mrs oscar w (elisabeth vilhelmina berg) None 9 1 3 johnson, mrs. oscar w (elisabeth vilhelmina berg) female 27.000000 0 2 347742 11.1333 NaN S NaN 9 nasser, mrs nicholas (adele achem) None 10 1 2 nasser, mrs. nicholas (adele achem) female 14.000000 1 0 237736 30.0708 NaN C NaN 10 sandstrom, miss marguerite rut None 11 1 3 sandstrom, miss. marguerite rut female 4.000000 1 1 PP 9549 16.7000 G6 S NaN 11 bonnell, miss elizabeth None 12 1 1 bonnell, miss. elizabeth female 58.000000 0 0 113783 26.5500 C103 S NaN 12 saundercock, mr william henry None 13 0 3 saundercock, mr. william henry male 20.000000 0 0 A/5. 2151 8.0500 NaN S NaN 13 andersson, mr anders johan None 14 0 3 andersson, mr. anders johan male 39.000000 1 5 347082 31.2750 NaN S NaN 14 vestrom, miss hulda amanda adolfina None 15 0 3 vestrom, miss. hulda amanda adolfina female 14.000000 0 0 350406 7.8542 NaN S NaN 15 hewlett, mrs (mary d kingcome) None 16 1 2 hewlett, mrs. (mary d kingcome) female 55.000000 0 0 248706 16.0000 NaN S NaN 16 rice, master eugene None 17 0 3 rice, master. eugene male 2.000000 4 1 382652 29.1250 NaN Q 1.0 17 williams, mr charles eugene None 18 1 2 williams, mr. charles eugene male 29.699118 0 0 244373 13.0000 NaN S NaN 18 vander planke, mrs julius (emelia maria vandemoortele) None 19 0 3 vander planke, mrs. julius (emelia maria vande... female 31.000000 1 0 345763 18.0000 NaN S NaN 19 masselmani, mrs fatima None 20 1 3 masselmani, mrs. fatima female 29.699118 0 0 2649 7.2250 NaN C NaN 20 fynney, mr joseph j None 21 0 2 fynney, mr. joseph j male 35.000000 0 0 239865 26.0000 NaN S NaN 21 beesley, mr lawrence None 22 1 2 beesley, mr. lawrence male 34.000000 0 0 248698 13.0000 D56 S NaN 22 mcgowan, miss anna "annie" None 23 1 3 mcgowan, miss. anna "annie" female 15.000000 0 0 330923 8.0292 NaN Q NaN 23 sloper, mr william thompson None 24 1 1 sloper, mr. william thompson male 28.000000 0 0 113788 35.5000 A6 S NaN 24 palsson, miss torborg danira None 25 0 3 palsson, miss. torborg danira female 8.000000 3 1 349909 21.0750 NaN S NaN 25 asplund, mrs carl oscar (selma augusta emilia johansson) None 26 1 3 asplund, mrs. carl oscar (selma augusta emilia... female 38.000000 1 5 347077 31.3875 NaN S NaN 26 emir, mr farred chehab None 27 0 3 emir, mr. farred chehab male 29.699118 0 0 2631 7.2250 NaN C NaN 27 fortune, mr charles alexander None 28 0 1 fortune, mr. charles alexander male 19.000000 3 2 19950 263.0000 C23 C25 C27 S NaN 28 o'dwyer, miss ellen "nellie" None 29 1 3 o'dwyer, miss. ellen "nellie" female 29.699118 0 0 330959 7.8792 NaN Q NaN 29 todoroff, mr lalio None 30 0 3 todoroff, mr. lalio male 29.699118 0 0 349216 7.8958 NaN S NaN ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... 861 giles, mr frederick edward None 862 0 2 giles, mr. frederick edward male 21.000000 1 0 28134 11.5000 NaN S NaN 862 swift, mrs frederick joel (margaret welles barron) None 863 1 1 swift, mrs. frederick joel (margaret welles ba... female 48.000000 0 0 17466 25.9292 D17 S NaN 863 sage, miss dorothy edith "dolly" None 864 0 3 sage, miss. dorothy edith "dolly" female 29.699118 8 2 CA. 2343 69.5500 NaN S NaN 864 gill, mr john william None 865 0 2 gill, mr. john william male 24.000000 0 0 233866 13.0000 NaN S NaN 865 bystrom, mrs (karolina) None 866 1 2 bystrom, mrs. (karolina) female 42.000000 0 0 236852 13.0000 NaN S NaN 866 duran y more, miss asuncion None 867 1 2 duran y more, miss. asuncion female 27.000000 1 0 SC/PARIS 2149 13.8583 NaN C NaN 867 roebling, mr washington augustus ii None 868 0 1 roebling, mr. washington augustus ii male 31.000000 0 0 PC 17590 50.4958 A24 S NaN 868 van melkebeke, mr philemon None 869 0 3 van melkebeke, mr. philemon male 29.699118 0 0 345777 9.5000 NaN S NaN 869 johnson, master harold theodor None 870 1 3 johnson, master. harold theodor male 4.000000 1 1 347742 11.1333 NaN S 1.0 870 balkic, mr cerin None 871 0 3 balkic, mr. cerin male 26.000000 0 0 349248 7.8958 NaN S NaN 871 beckwith, mrs richard leonard (sallie monypeny) None 872 1 1 beckwith, mrs. richard leonard (sallie monypeny) female 47.000000 1 1 11751 52.5542 D35 S NaN 872 carlsson, mr frans olof None 873 0 1 carlsson, mr. frans olof male 33.000000 0 0 695 5.0000 B51 B53 B55 S NaN 873 vander cruyssen, mr victor None 874 0 3 vander cruyssen, mr. victor male 47.000000 0 0 345765 9.0000 NaN S NaN 874 abelson, mrs samuel (hannah wizosky) None 875 1 2 abelson, mrs. samuel (hannah wizosky) female 28.000000 1 0 P/PP 3381 24.0000 NaN C NaN 875 najib, miss adele kiamie "jane" None 876 1 3 najib, miss. adele kiamie "jane" female 15.000000 0 0 2667 7.2250 NaN C NaN 876 gustafsson, mr alfred ossian None 877 0 3 gustafsson, mr. alfred ossian male 20.000000 0 0 7534 9.8458 NaN S NaN 877 petroff, mr nedelio None 878 0 3 petroff, mr. nedelio male 19.000000 0 0 349212 7.8958 NaN S NaN 878 laleff, mr kristo None 879 0 3 laleff, mr. kristo male 29.699118 0 0 349217 7.8958 NaN S NaN 879 potter, mrs thomas jr (lily alexenia wilson) None 880 1 1 potter, mrs. thomas jr (lily alexenia wilson) female 56.000000 0 1 11767 83.1583 C50 C NaN 880 shelley, mrs william (imanita parrish hall) None 881 1 2 shelley, mrs. william (imanita parrish hall) female 25.000000 0 1 230433 26.0000 NaN S NaN 881 markun, mr johann None 882 0 3 markun, mr. johann male 33.000000 0 0 349257 7.8958 NaN S NaN 882 dahlberg, miss gerda ulrika None 883 0 3 dahlberg, miss. gerda ulrika female 22.000000 0 0 7552 10.5167 NaN S NaN 883 banfield, mr frederick james None 884 0 2 banfield, mr. frederick james male 28.000000 0 0 C.A./SOTON 34068 10.5000 NaN S NaN 884 sutehall, mr henry jr None 885 0 3 sutehall, mr. henry jr male 25.000000 0 0 SOTON/OQ 392076 7.0500 NaN S NaN 885 rice, mrs william (margaret norton) None 886 0 3 rice, mrs. william (margaret norton) female 39.000000 0 5 382652 29.1250 NaN Q NaN 886 montvila, rev juozas None 887 0 2 montvila, rev. juozas male 27.000000 0 0 211536 13.0000 NaN S NaN 887 graham, miss margaret edith None 888 1 1 graham, miss. margaret edith female 19.000000 0 0 112053 30.0000 B42 S NaN 888 johnston, miss catherine helen "carrie" None 889 0 3 johnston, miss. catherine helen "carrie" female 29.699118 1 2 W./C. 6607 23.4500 NaN S NaN 889 behr, mr karl howell None 890 1 1 behr, mr. karl howell male 26.000000 0 0 111369 30.0000 C148 C NaN 890 dooley, mr patrick None 891 0 3 dooley, mr. patrick male 32.000000 0 0 370376 7.7500 NaN Q NaN
891 rows × 16 columns
5.提取数据
data. loc[ 3 ]
名 futrelle, mrs
姓 jacques heath (lily may peel)
s None
PassengerId 4
Survived 1
Pclass 1
Name futrelle, mrs. jacques heath (lily may peel)
Sex female
Age 35
SibSp 1
Parch 0
Ticket 113803
Fare 53.1
Cabin C123
Embarked S
tip NaN
Name: 3, dtype: object
data. iloc[ 0 : 3 ]
名 姓 s PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked tip 0 braund, mr owen harris None 1 0 3 braund, mr. owen harris male 22.0 1 0 A/5 21171 7.2500 NaN S NaN 1 cumings, mrs john bradley (florence briggs thayer) None 2 1 1 cumings, mrs. john bradley (florence briggs th... female 38.0 1 0 PC 17599 71.2833 C85 C NaN 2 heikkinen, miss laina None 3 1 3 heikkinen, miss. laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S NaN
data. loc[ data[ 'Age' ] < 3 ]
名 姓 s PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked tip 7 palsson, master gosta leonard None 8 0 3 palsson, master. gosta leonard male 2.00 3 1 349909 21.0750 NaN S 1.0 16 rice, master eugene None 17 0 3 rice, master. eugene male 2.00 4 1 382652 29.1250 NaN Q 1.0 78 caldwell, master alden gates None 79 1 2 caldwell, master. alden gates male 0.83 0 2 248738 29.0000 NaN S 1.0 119 andersson, miss ellis anna maria None 120 0 3 andersson, miss. ellis anna maria female 2.00 4 2 347082 31.2750 NaN S NaN 164 panula, master eino viljami None 165 0 3 panula, master. eino viljami male 1.00 4 1 3101295 39.6875 NaN S 1.0 172 johnson, miss eleanor ileen None 173 1 3 johnson, miss. eleanor ileen female 1.00 1 1 347742 11.1333 NaN S NaN 183 becker, master richard f None 184 1 2 becker, master. richard f male 1.00 2 1 230136 39.0000 F4 S 1.0 205 strom, miss telma matilda None 206 0 3 strom, miss. telma matilda female 2.00 0 1 347054 10.4625 G6 S NaN 297 allison, miss helen loraine None 298 0 1 allison, miss. helen loraine female 2.00 1 2 113781 151.5500 C22 C26 S NaN 305 allison, master hudson trevor None 306 1 1 allison, master. hudson trevor male 0.92 1 2 113781 151.5500 C22 C26 S 1.0 340 navratil, master edmond roger None 341 1 2 navratil, master. edmond roger male 2.00 1 1 230080 26.0000 F2 S 1.0 381 nakid, miss maria ("mary") None 382 1 3 nakid, miss. maria ("mary") female 1.00 0 2 2653 15.7417 NaN C NaN 386 goodwin, master sidney leonard None 387 0 3 goodwin, master. sidney leonard male 1.00 5 2 CA 2144 46.9000 NaN S 1.0 469 baclini, miss helene barbara None 470 1 3 baclini, miss. helene barbara female 0.75 2 1 2666 19.2583 NaN C NaN 479 hirvonen, miss hildur e None 480 1 3 hirvonen, miss. hildur e female 2.00 0 1 3101298 12.2875 NaN S NaN 530 quick, miss phyllis may None 531 1 2 quick, miss. phyllis may female 2.00 1 1 26360 26.0000 NaN S NaN 642 skoog, miss margit elizabeth None 643 0 3 skoog, miss. margit elizabeth female 2.00 3 2 347088 27.9000 NaN S NaN 644 baclini, miss eugenie None 645 1 3 baclini, miss. eugenie female 0.75 2 1 2666 19.2583 NaN C NaN 755 hamalainen, master viljo None 756 1 2 hamalainen, master. viljo male 0.67 1 1 250649 14.5000 NaN S 1.0 788 dean, master bertram vere None 789 1 3 dean, master. bertram vere male 1.00 1 2 C.A. 2315 20.5750 NaN S 1.0 803 thomas, master assad alexander None 804 1 3 thomas, master. assad alexander male 0.42 0 1 2625 8.5167 NaN C 1.0 824 panula, master urho abraham None 825 0 3 panula, master. urho abraham male 2.00 4 1 3101295 39.6875 NaN S 1.0 827 mallet, master andre None 828 1 2 mallet, master. andre male 1.00 0 2 S.C./PARIS 2079 37.0042 NaN C 1.0 831 richards, master george sibley None 832 1 2 richards, master. george sibley male 0.83 1 1 29106 18.7500 NaN S 1.0
data. iloc[ : 3 , : 4 ]
名 姓 s PassengerId 0 braund, mr owen harris None 1 1 cumings, mrs john bradley (florence briggs thayer) None 2 2 heikkinen, miss laina None 3
data. iloc[ 1 : 3 , 3 : 4 ]
data[ 'SibSp' ] . isin( [ 1 , 2 ] )
0 True
1 True
2 False
3 True
4 False
5 False
6 False
7 False
8 False
9 True
10 True
11 False
12 False
13 True
14 False
15 False
16 False
17 False
18 True
19 False
20 False
21 False
22 False
23 False
24 False
25 True
26 False
27 False
28 False
29 False
...
861 True
862 False
863 False
864 False
865 False
866 True
867 False
868 False
869 True
870 False
871 True
872 False
873 False
874 True
875 False
876 False
877 False
878 False
879 False
880 False
881 False
882 False
883 False
884 False
885 False
886 False
887 False
888 True
889 False
890 False
Name: SibSp, Length: 891, dtype: bool
data. loc[ data[ 'SibSp' ] . isin( [ 1 , 2 ] ) ]
名 姓 s PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked tip 0 braund, mr owen harris None 1 0 3 braund, mr. owen harris male 22.000000 1 0 A/5 21171 7.2500 NaN S NaN 1 cumings, mrs john bradley (florence briggs thayer) None 2 1 1 cumings, mrs. john bradley (florence briggs th... female 38.000000 1 0 PC 17599 71.2833 C85 C NaN 3 futrelle, mrs jacques heath (lily may peel) None 4 1 1 futrelle, mrs. jacques heath (lily may peel) female 35.000000 1 0 113803 53.1000 C123 S NaN 9 nasser, mrs nicholas (adele achem) None 10 1 2 nasser, mrs. nicholas (adele achem) female 14.000000 1 0 237736 30.0708 NaN C NaN 10 sandstrom, miss marguerite rut None 11 1 3 sandstrom, miss. marguerite rut female 4.000000 1 1 PP 9549 16.7000 G6 S NaN 13 andersson, mr anders johan None 14 0 3 andersson, mr. anders johan male 39.000000 1 5 347082 31.2750 NaN S NaN 18 vander planke, mrs julius (emelia maria vandemoortele) None 19 0 3 vander planke, mrs. julius (emelia maria vande... female 31.000000 1 0 345763 18.0000 NaN S NaN 25 asplund, mrs carl oscar (selma augusta emilia johansson) None 26 1 3 asplund, mrs. carl oscar (selma augusta emilia... female 38.000000 1 5 347077 31.3875 NaN S NaN 31 spencer, mrs william augustus (marie eugenie) None 32 1 1 spencer, mrs. william augustus (marie eugenie) female 29.699118 1 0 PC 17569 146.5208 B78 C NaN 34 meyer, mr edgar joseph None 35 0 1 meyer, mr. edgar joseph male 28.000000 1 0 PC 17604 82.1708 NaN C NaN 35 holverson, mr alexander oskar None 36 0 1 holverson, mr. alexander oskar male 42.000000 1 0 113789 52.0000 NaN S NaN 38 vander planke, miss augusta maria None 39 0 3 vander planke, miss. augusta maria female 18.000000 2 0 345764 18.0000 NaN S NaN 39 nicola-yarred, miss jamila None 40 1 3 nicola-yarred, miss. jamila female 14.000000 1 0 2651 11.2417 NaN C NaN 40 ahlin, mrs johan (johanna persdotter larsson) None 41 0 3 ahlin, mrs. johan (johanna persdotter larsson) female 40.000000 1 0 7546 9.4750 NaN S NaN 41 turpin, mrs william john robert (dorothy ann wonnacott) None 42 0 2 turpin, mrs. william john robert (dorothy ann ... female 27.000000 1 0 11668 21.0000 NaN S NaN 43 laroche, miss simonne marie anne andree None 44 1 2 laroche, miss. simonne marie anne andree female 3.000000 1 2 SC/Paris 2123 41.5792 NaN C NaN 46 lennon, mr denis None 47 0 3 lennon, mr. denis male 29.699118 1 0 370371 15.5000 NaN Q NaN 48 samaan, mr youssef None 49 0 3 samaan, mr. youssef male 29.699118 2 0 2662 21.6792 NaN C NaN 49 arnold-franchi, mrs josef (josefine franchi) None 50 0 3 arnold-franchi, mrs. josef (josefine franchi) female 18.000000 1 0 349237 17.8000 NaN S NaN 52 harper, mrs henry sleeper (myna haxtun) None 53 1 1 harper, mrs. henry sleeper (myna haxtun) female 49.000000 1 0 PC 17572 76.7292 D33 C NaN 53 faunthorpe, mrs lizzie (elizabeth anne wilkinson) None 54 1 2 faunthorpe, mrs. lizzie (elizabeth anne wilkin... female 29.000000 1 0 2926 26.0000 NaN S NaN 58 west, miss constance mirium None 59 1 2 west, miss. constance mirium female 5.000000 1 2 C.A. 34651 27.7500 NaN S NaN 62 harris, mr henry birkhardt None 63 0 1 harris, mr. henry birkhardt male 45.000000 1 0 36973 83.4750 C83 S NaN 65 moubarek, master gerios None 66 1 3 moubarek, master. gerios male 29.699118 1 1 2661 15.2458 NaN C NaN 69 kink, mr vincenz None 70 0 3 kink, mr. vincenz male 26.000000 2 0 315151 8.6625 NaN S NaN 73 chronopoulos, mr apostolos None 74 0 3 chronopoulos, mr. apostolos male 26.000000 1 0 2680 14.4542 NaN C NaN 86 ford, mr william neal None 87 0 3 ford, mr. william neal male 16.000000 1 3 W./C. 6608 34.3750 NaN S NaN 92 chaffee, mr herbert fuller None 93 0 1 chaffee, mr. herbert fuller male 46.000000 1 0 W.E.P. 5734 61.1750 E31 S NaN 93 dean, mr bertram frank None 94 0 3 dean, mr. bertram frank male 26.000000 1 2 C.A. 2315 20.5750 NaN S NaN 99 kantor, mr sinai None 100 0 2 kantor, mr. sinai male 34.000000 1 0 244367 26.0000 NaN S NaN ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... 750 wells, miss joan None 751 1 2 wells, miss. joan female 4.000000 1 1 29103 23.0000 NaN S NaN 754 herman, mrs samuel (jane laver) None 755 1 2 herman, mrs. samuel (jane laver) female 48.000000 1 2 220845 65.0000 NaN S NaN 755 hamalainen, master viljo None 756 1 2 hamalainen, master. viljo male 0.670000 1 1 250649 14.5000 NaN S 1.0 763 carter, mrs william ernest (lucile polk) None 764 1 1 carter, mrs. william ernest (lucile polk) female 36.000000 1 2 113760 120.0000 B96 B98 S NaN 765 hogeboom, mrs john c (anna andrews) None 766 1 1 hogeboom, mrs. john c (anna andrews) female 51.000000 1 0 13502 77.9583 D11 S NaN 768 moran, mr daniel j None 769 0 3 moran, mr. daniel j male 29.699118 1 0 371110 24.1500 NaN Q NaN 774 hocking, mrs elizabeth (eliza needs) None 775 1 2 hocking, mrs. elizabeth (eliza needs) female 54.000000 1 3 29105 23.0000 NaN S NaN 781 dick, mrs albert adrian (vera gillespie) None 782 1 1 dick, mrs. albert adrian (vera gillespie) female 17.000000 1 0 17474 57.0000 B20 S NaN 783 johnston, mr andrew g None 784 0 3 johnston, mr. andrew g male 29.699118 1 2 W./C. 6607 23.4500 NaN S NaN 788 dean, master bertram vere None 789 1 3 dean, master. bertram vere male 1.000000 1 2 C.A. 2315 20.5750 NaN S 1.0 799 van impe, mrs jean baptiste (rosalie paula govaert) None 800 0 3 van impe, mrs. jean baptiste (rosalie paula go... female 30.000000 1 1 345773 24.1500 NaN S NaN 801 collyer, mrs harvey (charlotte annie tate) None 802 1 2 collyer, mrs. harvey (charlotte annie tate) female 31.000000 1 1 C.A. 31921 26.2500 NaN S NaN 802 carter, master william thornton ii None 803 1 1 carter, master. william thornton ii male 11.000000 1 2 113760 120.0000 B96 B98 S NaN 809 chambers, mrs norman campbell (bertha griggs) None 810 1 1 chambers, mrs. norman campbell (bertha griggs) female 33.000000 1 0 113806 53.1000 E8 S NaN 817 mallet, mr albert None 818 0 2 mallet, mr. albert male 31.000000 1 1 S.C./PARIS 2079 37.0042 NaN C NaN 820 hays, mrs charles melville (clara jennings gregg) None 821 1 1 hays, mrs. charles melville (clara jennings gr... female 52.000000 1 1 12749 93.5000 B69 S NaN 830 yasbeck, mrs antoni (selini alexander) None 831 1 3 yasbeck, mrs. antoni (selini alexander) female 15.000000 1 0 2659 14.4542 NaN C NaN 831 richards, master george sibley None 832 1 2 richards, master. george sibley male 0.830000 1 1 29106 18.7500 NaN S 1.0 835 compton, miss sara rebecca None 836 1 1 compton, miss. sara rebecca female 39.000000 1 1 PC 17756 83.1583 E49 C NaN 849 goldenberg, mrs samuel l (edwiga grabowska) None 850 1 1 goldenberg, mrs. samuel l (edwiga grabowska) female 29.699118 1 0 17453 89.1042 C92 C NaN 852 boulos, miss nourelain None 853 0 3 boulos, miss. nourelain female 9.000000 1 1 2678 15.2458 NaN C NaN 854 carter, mrs ernest courtenay (lilian hughes) None 855 0 2 carter, mrs. ernest courtenay (lilian hughes) female 44.000000 1 0 244252 26.0000 NaN S NaN 856 wick, mrs george dennick (mary hitchcock) None 857 1 1 wick, mrs. george dennick (mary hitchcock) female 45.000000 1 1 36928 164.8667 NaN S NaN 860 hansen, mr claus peter None 861 0 3 hansen, mr. claus peter male 41.000000 2 0 350026 14.1083 NaN S NaN 861 giles, mr frederick edward None 862 0 2 giles, mr. frederick edward male 21.000000 1 0 28134 11.5000 NaN S NaN 866 duran y more, miss asuncion None 867 1 2 duran y more, miss. asuncion female 27.000000 1 0 SC/PARIS 2149 13.8583 NaN C NaN 869 johnson, master harold theodor None 870 1 3 johnson, master. harold theodor male 4.000000 1 1 347742 11.1333 NaN S 1.0 871 beckwith, mrs richard leonard (sallie monypeny) None 872 1 1 beckwith, mrs. richard leonard (sallie monypeny) female 47.000000 1 1 11751 52.5542 D35 S NaN 874 abelson, mrs samuel (hannah wizosky) None 875 1 2 abelson, mrs. samuel (hannah wizosky) female 28.000000 1 0 P/PP 3381 24.0000 NaN C NaN 888 johnston, miss catherine helen "carrie" None 889 0 3 johnston, miss. catherine helen "carrie" female 29.699118 1 2 W./C. 6607 23.4500 NaN S NaN
237 rows × 16 columns
6.筛选数据
data. loc[ ( data[ 'Age' ] < 5 ) & ( data[ 'Sex' ] == 'male' ) ]
名 姓 s PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked tip 7 palsson, master gosta leonard None 8 0 3 palsson, master. gosta leonard male 2.00 3 1 349909 21.0750 NaN S 1.0 16 rice, master eugene None 17 0 3 rice, master. eugene male 2.00 4 1 382652 29.1250 NaN Q 1.0 63 skoog, master harald None 64 0 3 skoog, master. harald male 4.00 3 2 347088 27.9000 NaN S 1.0 78 caldwell, master alden gates None 79 1 2 caldwell, master. alden gates male 0.83 0 2 248738 29.0000 NaN S 1.0 164 panula, master eino viljami None 165 0 3 panula, master. eino viljami male 1.00 4 1 3101295 39.6875 NaN S 1.0 171 rice, master arthur None 172 0 3 rice, master. arthur male 4.00 4 1 382652 29.1250 NaN Q 1.0 183 becker, master richard f None 184 1 2 becker, master. richard f male 1.00 2 1 230136 39.0000 F4 S 1.0 193 navratil, master michel m None 194 1 2 navratil, master. michel m male 3.00 1 1 230080 26.0000 F2 S 1.0 261 asplund, master edvin rojj felix None 262 1 3 asplund, master. edvin rojj felix male 3.00 4 2 347077 31.3875 NaN S 1.0 305 allison, master hudson trevor None 306 1 1 allison, master. hudson trevor male 0.92 1 2 113781 151.5500 C22 C26 S 1.0 340 navratil, master edmond roger None 341 1 2 navratil, master. edmond roger male 2.00 1 1 230080 26.0000 F2 S 1.0 348 coutts, master william loch "william" None 349 1 3 coutts, master. william loch "william" male 3.00 1 1 C.A. 37671 15.9000 NaN S 1.0 386 goodwin, master sidney leonard None 387 0 3 goodwin, master. sidney leonard male 1.00 5 2 CA 2144 46.9000 NaN S 1.0 407 richards, master william rowe None 408 1 2 richards, master. william rowe male 3.00 1 1 29106 18.7500 NaN S 1.0 445 dodge, master washington None 446 1 1 dodge, master. washington male 4.00 0 2 33638 81.8583 A34 S 1.0 755 hamalainen, master viljo None 756 1 2 hamalainen, master. viljo male 0.67 1 1 250649 14.5000 NaN S 1.0 788 dean, master bertram vere None 789 1 3 dean, master. bertram vere male 1.00 1 2 C.A. 2315 20.5750 NaN S 1.0 803 thomas, master assad alexander None 804 1 3 thomas, master. assad alexander male 0.42 0 1 2625 8.5167 NaN C 1.0 824 panula, master urho abraham None 825 0 3 panula, master. urho abraham male 2.00 4 1 3101295 39.6875 NaN S 1.0 827 mallet, master andre None 828 1 2 mallet, master. andre male 1.00 0 2 S.C./PARIS 2079 37.0042 NaN C 1.0 831 richards, master george sibley None 832 1 2 richards, master. george sibley male 0.83 1 1 29106 18.7500 NaN S 1.0 850 andersson, master sigvard harald elias None 851 0 3 andersson, master. sigvard harald elias male 4.00 4 2 347082 31.2750 NaN S 1.0 869 johnson, master harold theodor None 870 1 3 johnson, master. harold theodor male 4.00 1 1 347742 11.1333 NaN S 1.0
data. loc[ ( data[ 'Age' ] == 5 ) | ( data[ 'Age' ] == 2 ) ]
名 姓 s PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked tip 7 palsson, master gosta leonard None 8 0 3 palsson, master. gosta leonard male 2.0 3 1 349909 21.0750 NaN S 1.0 16 rice, master eugene None 17 0 3 rice, master. eugene male 2.0 4 1 382652 29.1250 NaN Q 1.0 58 west, miss constance mirium None 59 1 2 west, miss. constance mirium female 5.0 1 2 C.A. 34651 27.7500 NaN S NaN 119 andersson, miss ellis anna maria None 120 0 3 andersson, miss. ellis anna maria female 2.0 4 2 347082 31.2750 NaN S NaN 205 strom, miss telma matilda None 206 0 3 strom, miss. telma matilda female 2.0 0 1 347054 10.4625 G6 S NaN 233 asplund, miss lillian gertrud None 234 1 3 asplund, miss. lillian gertrud female 5.0 4 2 347077 31.3875 NaN S NaN 297 allison, miss helen loraine None 298 0 1 allison, miss. helen loraine female 2.0 1 2 113781 151.5500 C22 C26 S NaN 340 navratil, master edmond roger None 341 1 2 navratil, master. edmond roger male 2.0 1 1 230080 26.0000 F2 S 1.0 448 baclini, miss marie catherine None 449 1 3 baclini, miss. marie catherine female 5.0 2 1 2666 19.2583 NaN C NaN 479 hirvonen, miss hildur e None 480 1 3 hirvonen, miss. hildur e female 2.0 0 1 3101298 12.2875 NaN S NaN 530 quick, miss phyllis may None 531 1 2 quick, miss. phyllis may female 2.0 1 1 26360 26.0000 NaN S NaN 642 skoog, miss margit elizabeth None 643 0 3 skoog, miss. margit elizabeth female 2.0 3 2 347088 27.9000 NaN S NaN 777 emanuel, miss virginia ethel None 778 1 3 emanuel, miss. virginia ethel female 5.0 0 0 364516 12.4750 NaN S NaN 824 panula, master urho abraham None 825 0 3 panula, master. urho abraham male 2.0 4 1 3101295 39.6875 NaN S 1.0
data. loc[ ( data[ 'Age' ] == 5 ) | ( data[ 'Age' ] == 2 ) ] . Parch. count( )
14
data. query( 'Age==[5,2]' )
名 姓 s PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked tip 7 palsson, master gosta leonard None 8 0 3 palsson, master. gosta leonard male 2.0 3 1 349909 21.0750 NaN S 1.0 16 rice, master eugene None 17 0 3 rice, master. eugene male 2.0 4 1 382652 29.1250 NaN Q 1.0 58 west, miss constance mirium None 59 1 2 west, miss. constance mirium female 5.0 1 2 C.A. 34651 27.7500 NaN S NaN 119 andersson, miss ellis anna maria None 120 0 3 andersson, miss. ellis anna maria female 2.0 4 2 347082 31.2750 NaN S NaN 205 strom, miss telma matilda None 206 0 3 strom, miss. telma matilda female 2.0 0 1 347054 10.4625 G6 S NaN 233 asplund, miss lillian gertrud None 234 1 3 asplund, miss. lillian gertrud female 5.0 4 2 347077 31.3875 NaN S NaN 297 allison, miss helen loraine None 298 0 1 allison, miss. helen loraine female 2.0 1 2 113781 151.5500 C22 C26 S NaN 340 navratil, master edmond roger None 341 1 2 navratil, master. edmond roger male 2.0 1 1 230080 26.0000 F2 S 1.0 448 baclini, miss marie catherine None 449 1 3 baclini, miss. marie catherine female 5.0 2 1 2666 19.2583 NaN C NaN 479 hirvonen, miss hildur e None 480 1 3 hirvonen, miss. hildur e female 2.0 0 1 3101298 12.2875 NaN S NaN 530 quick, miss phyllis may None 531 1 2 quick, miss. phyllis may female 2.0 1 1 26360 26.0000 NaN S NaN 642 skoog, miss margit elizabeth None 643 0 3 skoog, miss. margit elizabeth female 2.0 3 2 347088 27.9000 NaN S NaN 777 emanuel, miss virginia ethel None 778 1 3 emanuel, miss. virginia ethel female 5.0 0 0 364516 12.4750 NaN S NaN 824 panula, master urho abraham None 825 0 3 panula, master. urho abraham male 2.0 4 1 3101295 39.6875 NaN S 1.0
data. query( 'Age==[5,2]' ) . SibSp. sum ( )
28
7.汇总数据
data. groupby( 'Pclass' ) . count( )
名 姓 s PassengerId Survived Name Sex Age SibSp Parch Ticket Fare Cabin Embarked tip Pclass 1 216 216 1 216 216 216 216 216 216 216 216 216 176 214 2 2 184 184 0 184 184 184 184 184 184 184 184 184 16 184 9 3 491 491 0 491 491 491 491 491 491 491 491 491 12 491 22
data. groupby( 'Pclass' ) [ 'Sex' ] . count( )
Pclass
1 216
2 184
3 491
Name: Sex, dtype: int64
data. groupby( [ 'Pclass' , 'Sex' ] ) [ 'PassengerId' ] . count( )
Pclass Sex
1 female 94
male 122
2 female 76
male 108
3 female 144
male 347
Name: PassengerId, dtype: int64
data. groupby( [ 'Pclass' , 'Sex' ] ) [ 'Age' ] . agg( [ len , np. sum , np. mean] )
len sum mean Pclass Sex 1 female 94.0 3209.292059 34.141405 male 122.0 4793.101471 39.287717 2 female 76.0 2184.898235 28.748661 male 108.0 3310.622059 30.653908 3 female 144.0 3465.862941 24.068493 male 347.0 9498.137059 27.372153
8.统计数据
data. sample( n= 10 )
名 姓 s PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked tip 70 jenkin, mr stephen curnow None 71 0 2 jenkin, mr. stephen curnow male 32.000000 0 0 C.A. 33111 10.5000 NaN S NaN 92 chaffee, mr herbert fuller None 93 0 1 chaffee, mr. herbert fuller male 46.000000 1 0 W.E.P. 5734 61.1750 E31 S NaN 700 astor, mrs john jacob (madeleine talmadge force) None 701 1 1 astor, mrs. john jacob (madeleine talmadge force) female 18.000000 1 0 PC 17757 227.5250 C62 C64 C NaN 626 kirkland, rev charles leonard None 627 0 2 kirkland, rev. charles leonard male 57.000000 0 0 219533 12.3500 NaN Q NaN 722 gillespie, mr william henry None 723 0 2 gillespie, mr. william henry male 34.000000 0 0 12233 13.0000 NaN S NaN 371 wiklund, mr jakob alfred None 372 0 3 wiklund, mr. jakob alfred male 18.000000 1 0 3101267 6.4958 NaN S NaN 248 beckwith, mr richard leonard None 249 1 1 beckwith, mr. richard leonard male 37.000000 1 1 11751 52.5542 D35 S NaN 795 otter, mr richard None 796 0 2 otter, mr. richard male 39.000000 0 0 28213 13.0000 NaN S NaN 567 palsson, mrs nils (alma cornelia berglund) None 568 0 3 palsson, mrs. nils (alma cornelia berglund) female 29.000000 0 4 349909 21.0750 NaN S NaN 837 sirota, mr maurice None 838 0 3 sirota, mr. maurice male 29.699118 0 0 392092 8.0500 NaN S NaN
data. sample( n= 10 , replace= False )
名 姓 s PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked tip 345 brown, miss amelia "mildred" None 346 1 2 brown, miss. amelia "mildred" female 24.000000 0 0 248733 13.0000 F33 S NaN 304 williams, mr howard hugh "harry" None 305 0 3 williams, mr. howard hugh "harry" male 29.699118 0 0 A/5 2466 8.0500 NaN S NaN 697 mullens, miss katherine "katie" None 698 1 3 mullens, miss. katherine "katie" female 29.699118 0 0 35852 7.7333 NaN Q NaN 493 artagaveytia, mr ramon None 494 0 1 artagaveytia, mr. ramon male 71.000000 0 0 PC 17609 49.5042 NaN C NaN 226 mellors, mr william john None 227 1 2 mellors, mr. william john male 19.000000 0 0 SW/PP 751 10.5000 NaN S NaN 506 quick, mrs frederick charles (jane richards) None 507 1 2 quick, mrs. frederick charles (jane richards) female 33.000000 0 2 26360 26.0000 NaN S NaN 806 andrews, mr thomas jr None 807 0 1 andrews, mr. thomas jr male 39.000000 0 0 112050 0.0000 A36 S NaN 335 denkoff, mr mitto None 336 0 3 denkoff, mr. mitto male 29.699118 0 0 349225 7.8958 NaN S NaN 560 morrow, mr thomas rowan None 561 0 3 morrow, mr. thomas rowan male 29.699118 0 0 372622 7.7500 NaN Q NaN 492 molson, mr harry markland None 493 0 1 molson, mr. harry markland male 55.000000 0 0 113787 30.5000 C30 S NaN
data. sample( n= 10 , replace= True )
名 姓 s PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked tip 703 gallagher, mr martin None 704 0 3 gallagher, mr. martin male 25.000000 0 0 36864 7.7417 NaN Q NaN 398 pain, dr alfred None 399 0 2 pain, dr. alfred male 23.000000 0 0 244278 10.5000 NaN S NaN 721 jensen, mr svend lauritz None 722 0 3 jensen, mr. svend lauritz male 17.000000 1 0 350048 7.0542 NaN S NaN 708 cleaver, miss alice None 709 1 1 cleaver, miss. alice female 22.000000 0 0 113781 151.5500 NaN S NaN 805 johansson, mr karl johan None 806 0 3 johansson, mr. karl johan male 31.000000 0 0 347063 7.7750 NaN S NaN 262 taussig, mr emil None 263 0 1 taussig, mr. emil male 52.000000 1 1 110413 79.6500 E67 S NaN 52 harper, mrs henry sleeper (myna haxtun) None 53 1 1 harper, mrs. henry sleeper (myna haxtun) female 49.000000 1 0 PC 17572 76.7292 D33 C NaN 659 newell, mr arthur webster None 660 0 1 newell, mr. arthur webster male 58.000000 0 2 35273 113.2750 D48 C NaN 692 lam, mr ali None 693 1 3 lam, mr. ali male 29.699118 0 0 1601 56.4958 NaN S NaN 597 johnson, mr alfred None 598 0 3 johnson, mr. alfred male 49.000000 0 0 LINE 0.0000 NaN S NaN
data. describe( ) . round ( 2 ) . T
count mean std min 25% 50% 75% max PassengerId 891.0 446.00 257.35 1.00 223.50 446.00 668.5 891.00 Survived 891.0 0.38 0.49 0.00 0.00 0.00 1.0 1.00 Pclass 891.0 2.31 0.84 1.00 2.00 3.00 3.0 3.00 Age 891.0 29.70 13.00 0.42 22.00 29.70 35.0 80.00 SibSp 891.0 0.52 1.10 0.00 0.00 0.00 1.0 8.00 Parch 891.0 0.38 0.81 0.00 0.00 0.00 0.0 6.00 Fare 891.0 32.20 49.69 0.00 7.91 14.45 31.0 512.33 tip 33.0 1.00 0.00 1.00 1.00 1.00 1.0 1.00
data[ 'Age' ] . std( )
13.002015226002884
data[ 'Age' ] . cov( data[ 'SibSp' ] )
-3.3353450099140782
data. cov( )
PassengerId Survived Pclass Age SibSp Parch Fare tip PassengerId 66231.000000 -0.626966 -7.561798 111.113042 -16.325843 -0.342697 161.883369 0.0 Survived -0.626966 0.236772 -0.137703 -0.441656 -0.018954 0.032017 6.221787 0.0 Pclass -7.561798 -0.137703 0.699015 -3.601855 0.076599 0.012429 -22.830196 0.0 Age 111.113042 -0.441656 -3.601855 169.052400 -3.335345 -1.877987 59.162200 0.0 SibSp -16.325843 -0.018954 0.076599 -3.335345 1.216043 0.368739 8.748734 0.0 Parch -0.342697 0.032017 0.012429 -1.877987 0.368739 0.649728 8.661052 0.0 Fare 161.883369 6.221787 -22.830196 59.162200 8.748734 8.661052 2469.436846 0.0 tip 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0
data[ 'Age' ] . corr( data[ 'Survived' ] )
-0.06980851528714313
data. corr( )
PassengerId Survived Pclass Age SibSp Parch Fare tip PassengerId 1.000000 -0.005007 -0.035144 0.033207 -0.057527 -0.001652 0.012658 NaN Survived -0.005007 1.000000 -0.338481 -0.069809 -0.035322 0.081629 0.257307 NaN Pclass -0.035144 -0.338481 1.000000 -0.331339 0.083081 0.018443 -0.549500 NaN Age 0.033207 -0.069809 -0.331339 1.000000 -0.232625 -0.179191 0.091566 NaN SibSp -0.057527 -0.035322 0.083081 -0.232625 1.000000 0.414838 0.159651 NaN Parch -0.001652 0.081629 0.018443 -0.179191 0.414838 1.000000 0.216225 NaN Fare 0.012658 0.257307 -0.549500 0.091566 0.159651 0.216225 1.000000 NaN tip NaN NaN NaN NaN NaN NaN NaN NaN