data ={'属性1':["A","A","A","A","A","B","B","B","B","C","C","C","C","C"],'属性2':[70.0,90.0,85,95,70,90,78,65,75,80,70,80,80,96],'属性3':["真","真","假","假","假","真","假","真","假","真","真","假","假","假"],'类':["类1","类2","类2","类2","类1","类1","类1","类1","类1","类2","类2","类1","类1","类1"]}
data1 = pd.DataFrame(data)
>>> data[data.columns[0:2]]
name sex
1 lisa f
2 joy f
3 tom m
转化为列表, 可见多行数据不能转为二维列表
>>>list(data[data.columns[0:2]])['name','sex']
查询一个数据
使用loc
选取一个数据
>>> data.loc[1,"name"]'lisa'
查询多个数据,使用loc[rowIndex, column_name]
选取多个数据, 指定范围的行,1: 3都会选取
>>> data.loc[1:3,["name","age"]]
name age
1 lisa 222 joy 223 tom 21
指定行
>>> data.loc[[1,3],["name","age"]]
name age
1 lisa 223 tom 21
使用下标选取数据iloc[rowIndex, column_index], 下标从0 开始
选取一个
>>> data.iloc[0,0]'lisa'
选取多个
多行选取
>>> data.iloc[0:2,0]1 lisa
2 joy
Name: name, dtype:object
>>> data.iloc[[0,2],0]1 lisa
3 tom
Name: name, dtype:object
多列选取
>>> data.iloc[0,0:2]
name lisa
sex f
Name:1, dtype:object
>>> data.iloc[0,[0,2]]
name lisa
age 22
Name:1, dtype:object
多行多列选取与上面类似
条件查询
单条件
>>> data.loc[data["Sex"]=="m",["姓名","Age"]]
姓名 Age
c tom 21
d Python 20
多条件
>>> data.loc[data.loc[:,"Sex"]=="m",["姓名","Age"]]
姓名 Age
c tom 21
d Python 20
a = data.loc[data.loc[:,"Sex"]=="m",["姓名","Age"]]>>>type(a)<class'pandas.core.frame.DataFrame'>>>> a.loc[a["Age"]==20,["姓名","Age"]]
姓名 Age
d Python 20
>>> city =["BeiJing","ShangHai","Gangzhou"]>>> data.insert(3,"city", city)>>> data
name sex age city
1 lisa f 22 BeiJing
2 joy f 22 ShangHai
3 tom m 21 Gangzhou
data[columnName] = [数据] 默认在最后一列增加
>>> job =["Teacher","Driver","Doctor"]>>> data["jobs"]= job
>>> data
name sex age city jobs
1 lisa f 22 BeiJing Teacher
2 joy f 22 ShangHai Driver
3 tom m 21 Gangzhou Doctor
>>> data.loc[:,"salary"]=["1k","2k","3k"]>>> data
name sex age city jobs salary
1 lisa f 22 BeiJing Teacher 1k
2 joy f 22 ShangHai Driver 2k
3 tom m 21 Gangzhou Doctor 3k
>>> data.loc[4,:]=["tomcat","f",20,"ShenZhen","Programer","4k"]>>> data
name sex age city jobs salary
1 lisa f 22 BeiJing Teacher 1k
2 joy f 22 ShangHai Driver 2k
3 tom m 21 GangZhou Doctor 3k
4 tomcat f 20 ShenZhen Programer 4k
>>> data.drop([1], axis=0, inplace=False)
name sex age city jobs salary
2 joy f 22 ShangHai Driver 2k
3 tom m 21 GangZhou Doctor 3k
4 tomcat f 20 ShenZhen Programer 4k
删除一列或多列,使用列表指定
>>> data.drop(["name"], axis=1, inplace=False)
sex age city jobs salary
1 f 22 BeiJing Teacher 1k
2 f 22 ShangHai Driver 2k
3 m 21 GangZhou Doctor 3k
4 f 20 ShenZhen Programer 4k
使用Python关键字 del
深Copy一个数据集
>>> data1 = data.copy(deep=True)>>> data1
name sex age city jobs salary
1 lisa f 22 BeiJing Teacher 1k
2 joy f 22 ShangHai Driver 2k
3 tom m 21 GangZhou Doctor 3k
4 tomcat f 20 ShenZhen Programer 4k
删除,直接对内存空间中的data1进行操作
>>>del data["name"]>>> data
sex age city jobs salary
1 f 22 BeiJing Teacher 1k
2 f 22 ShangHai Driver 2k
3 m 21 GangZhou Doctor 3k
4 f 20 ShenZhen Programer 4k
使用pop 函数, 删除内存里指定的数据后返回
>>> data.pop("age")122222321420
Name: age, dtype:object>>> data
sex city jobs salary
1 f BeiJing Teacher 1k
2 f ShangHai Driver 2k
3 m GangZhou Doctor 3k
4 f ShenZhen Programer 4k
5. 修改数据
改行列
修改列名
>>> data.columns =["姓名","性别","年龄"]>>> data
姓名 性别 年龄
1 lisa f 222 joy f 223 tom m 21
>>> data.rename(columns={"姓名":"Name","性别":"Sex","年龄":"Age"}, inplace=True)>>> data
Name Sex Age
1 lisa f 222 joy f 223 tom m 21
>>> data.rename({"Name":"姓名"}, axis=1, inplace=True)>>> data
姓名 Sex Age
a lisa f 22
b joy f 22
c tom m 21
修改行名
>>> data.index =["a","b","c"]>>> data
Name Sex Age
a lisa f 22
b joy f 22
c tom m 21
>>> data.rename({"a":1}, axis=0, inplace=True)>>> data
Name Sex Age
1 lisa f 22
b joy f 22
c tom m 21
>>> data.rename(index={1:"a"}, inplace=True)>>> data
Name Sex Age
a lisa f 22
b joy f 22
c tom m 21
使用loc 和 iloc 定位一个数据后直接修改
单个修改,修改数据
>>> data.loc["a","Sex"]="m">>> data
姓名 Sex Age
a lisa m 22
b joy f 22
c tom m 21
多个修改使用列表
>>> data.loc["a",["Sex","Age"]]=["f",18]>>> data
姓名 Sex Age
a lisa f 18
b joy f 22
c tom m 21