DataWhale之task01-pandas基础

import numpy as np
import pandas as pd
sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}
example_1 = pd.Series(sdata)
example_1
Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada'],
        'year': [2000, 2001, 2002, 2001, 2002, 2003],'pop': [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}

example_2 = pd.DataFrame(data)
example_2
stateyearpop
0Ohio20001.5
1Ohio20011.7
2Ohio20023.6
3Nevada20012.4
4Nevada20022.9
5Nevada20033.2
train = pd.read_csv("train_chinese.csv")
train.head()
乘客ID是否幸存乘客等级(1/2/3等舱位)乘客姓名性别年龄堂兄弟/妹个数父母与小孩个数船票信息票价客舱登船港口
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS

查看某一列所有值

#查看“Cabin”这列的所有项
train["客舱"]
0       NaN
1       C85
2       NaN
3      C123
4       NaN
       ... 
886     NaN
887     B42
888     NaN
889    C148
890     NaN
Name: 客舱, Length: 891, dtype: object
train["客舱"].unique()
array([nan, 'C85', 'C123', 'E46', 'G6', 'C103', 'D56', 'A6',
       'C23 C25 C27', 'B78', 'D33', 'B30', 'C52', 'B28', 'C83', 'F33',
       'F G73', 'E31', 'A5', 'D10 D12', 'D26', 'C110', 'B58 B60', 'E101',
       'F E69', 'D47', 'B86', 'F2', 'C2', 'E33', 'B19', 'A7', 'C49', 'F4',
       'A32', 'B4', 'B80', 'A31', 'D36', 'D15', 'C93', 'C78', 'D35',
       'C87', 'B77', 'E67', 'B94', 'C125', 'C99', 'C118', 'D7', 'A19',
       'B49', 'D', 'C22 C26', 'C106', 'C65', 'E36', 'C54',
       'B57 B59 B63 B66', 'C7', 'E34', 'C32', 'B18', 'C124', 'C91', 'E40',
       'T', 'C128', 'D37', 'B35', 'E50', 'C82', 'B96 B98', 'E10', 'E44',
       'A34', 'C104', 'C111', 'C92', 'E38', 'D21', 'E12', 'E63', 'A14',
       'B37', 'C30', 'D20', 'B79', 'E25', 'D46', 'B73', 'C95', 'B38',
       'B39', 'B22', 'C86', 'C70', 'A16', 'C101', 'C68', 'A10', 'E68',
       'B41', 'A20', 'D19', 'D50', 'D9', 'A23', 'B50', 'A26', 'D48',
       'E58', 'C126', 'B71', 'B51 B53 B55', 'D49', 'B5', 'B20', 'F G63',
       'C62 C64', 'E24', 'C90', 'C45', 'E8', 'B101', 'D45', 'C46', 'D30',
       'E121', 'D11', 'E77', 'F38', 'B3', 'D6', 'B82 B84', 'D17', 'A36',
       'B102', 'B69', 'E49', 'C47', 'D28', 'E17', 'A24', 'C50', 'B42',
       'C148'], dtype=object)
train.客舱
0       NaN
1       C85
2       NaN
3      C123
4       NaN
       ... 
886     NaN
887     B42
888     NaN
889    C148
890     NaN
Name: 客舱, Length: 891, dtype: object
test_1 = pd.read_csv('test_1.csv')
test_1.head()
Unnamed: 0PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarkeda
00103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS100
11211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C100
22313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS100
33411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S100
44503Allen, Mr. William Henrymale35.0003734508.0500NaNS100

用del删除列

del test_1['a']
test_1.head()
Unnamed: 0PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
00103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
11211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
22313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
33411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
44503Allen, Mr. William Henrymale35.0003734508.0500NaNS

删除多余列用drop

test_1.drop(columns=['a']).head()
Unnamed: 0PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
00103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
11211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
22313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
33411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
44503Allen, Mr. William Henrymale35.0003734508.0500NaNS
test_1.drop(['a'], axis=1).head()
Unnamed: 0PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
00103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
11211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
22313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
33411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
44503Allen, Mr. William Henrymale35.0003734508.0500NaNS
df = pd.read_csv("train.csv")
df.head()
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS
#将['PassengerId','Name','Age','Ticket']这几个列元素隐藏,只观察其他几个列元素
df.drop(['PassengerId','Name','Age','Ticket'], axis=1).head()
SurvivedPclassSexSibSpParchFareCabinEmbarked
003male107.2500NaNS
111female1071.2833C85C
213female007.9250NaNS
311female1053.1000C123S
403male008.0500NaNS

筛选行

#年龄在10岁以下的乘客
df[df['Age']<10].head()
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
7803Palsson, Master. Gosta Leonardmale2.03134990921.0750NaNS
101113Sandstrom, Miss. Marguerite Rutfemale4.011PP 954916.7000G6S
161703Rice, Master. Eugenemale2.04138265229.1250NaNQ
242503Palsson, Miss. Torborg Danirafemale8.03134990921.0750NaNS
434412Laroche, Miss. Simonne Marie Anne Andreefemale3.012SC/Paris 212341.5792NaNC
#年龄在10岁以上和50岁以下
midage = df[(df['Age']>10) & (df['Age']<50)]
midage.head()
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS
#重置索引,按自然数顺序排列
midage = midage.reset_index(drop=True)
midage.head()
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS
#将midage的数据中的第100行的“Pclass”和“Sex”显示出来
midage.loc[[99],["Pclass","Sex"]]
PclassSex
992male
#index+列名索引
midage.loc[[100,105,108],["Pclass","Name","Sex"]]
PclassNameSex
1002Byles, Rev. Thomas Roussel Davidsmale
1053Cribb, Mr. John Hatfieldmale
1083Calic, Mr. Jovomale
# iloc 整数索引
midage.iloc[[100,105,108],[2,3,4]]
PclassNameSex
1002Byles, Rev. Thomas Roussel Davidsmale
1053Cribb, Mr. John Hatfieldmale
1083Calic, Mr. Jovomale
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值