1.从Series中索引
obj = pd.Series(np.arange(4.), index=['a', 'b', 'c', 'd'])
print(obj)
print(obj['a'])
print(obj[['b', 'a', 'c']])
print(obj[2:4])#这里用基本顺序切出来的,不包含尾部
print(obj[[1, 3]])
print(obj[obj < 2])
print(obj['b':'c'])#这里用指定的index进行切片时,切出来的是包含尾部的。
#可以修改
obj['b':'c'] = 5
print(obj)
结果:
a 0.0
b 1.0
c 2.0
d 3.0
dtype: float64
0.0
b 1.0
a 0.0
c 2.0
dtype: float64
c 2.0
d 3.0
dtype: float64
b 1.0
d 3.0
dtype: float64
a 0.0
b 1.0
dtype: float64
b 1.0
c 2.0
dtype: float64
a 0.0
b 5.0
c 5.0
d 3.0
dtype: float64
2.从DataFrame中索引
data = pd.DataFrame(np.arange(16).reshape(4, 4),
index=['Ohio', 'Colorado', 'Utah', 'New York'],
columns=['one', 'two', 'three', 'four'])
print(data)
print(data['two'])
print(data[['three', 'one']])
print(data[2:4])#这种切片方式只适用于行,如果 data['two':'four']是不正确的
print(data[:2])
print(data[data['three'] > 5])
print(data[(data['four'] > 11) & (data['three'] > 5)])#注意要用&位运算符进行连接,且前后要加小括号
#使用布尔值对DataFrame进行索引
print(data < 5)
data[data < 5] = 0
print(data)
结果:
one two three four
Ohio 0 1 2 3
Colorado 4 5 6 7
Utah 8 9 10 11
New York 12 13 14 15
Ohio 1
Colorado 5
Utah 9
New York 13
Name: two, dtype: int32
three one
Ohio 2 0
Colorado 6 4
Utah 10 8
New York 14 12
one two three four
Utah 8 9 10 11
New York 12 13 14 15
one two three four
Ohio 0 1 2 3
Colorado 4 5 6 7
one two three four
Colorado 4 5 6 7
Utah 8 9 10 11
New York 12 13 14 15
one two three four
New York 12 13 14 15
one two three four
Ohio True True True True
Colorado True False False False
Utah False False False False
New York False False False False
one two three four
Ohio 0 0 0 0
Colorado 0 5 6 7
Utah 8 9 10 11
New York 12 13 14 15
5.2.3.1 使用loc 和 iloc 选择数据。loc是轴标签(指定的标签),iloc是整数标签
通过标签选出单行多列的数据:
print(data.loc['Utah', ['three', 'one', 'four']])
print(data.iloc[1, [0, 3, 1]])
print(data.iloc[[1, 2], [3, 0, 1]])#括号里前一个括号是选取行,后一个括号是选取列
print(data.loc[: 'Utah', 'two'])
print(data.iloc[:, :3][data.three > 5])#这里用iloc多个条件时不用&运算符,并且表示某一列时 用 数据.列的方式
结果:
three 10
one 8
four 11
Name: Utah, dtype: int32
one 4
four 7
two 5
Name: Colorado, dtype: int32
four one two
Colorado 7 4 5
Utah 11 8 9
Ohio 1
Colorado 5
Utah 9
Name: two, dtype: int32
one two three
Colorado 4 5 6
Utah 8 9 10
New York 12 13 14
总结:
DataFrame 索引选项:
print(data)
print(data['two'])#某一列
print(data[['three', 'one']])#某两列
print(data[2:4])#这种切片方式只适用于行,如果 data['two':'four']是不正确的
print(data[:2])#切片行
print(data[data['three'] > 5])#选择符合条件的列
print(data[(data['four'] > 11) & (data['three'] > 5)])#注意要用&位运算符进行连接,且前后要加小括号
#使用布尔值对DataFrame进行索引
print(data < 5)
data[data < 5] = 0
print(data)
print(data.loc['Utah'])#选取行
print(data.loc['Utah', ['three', 'one', 'four']])#选取指定行和列
print(data.loc[: 'Utah', 'two'])
print(data.iloc[1, [0, 3, 1]])
print(data.iloc[[1, 2], [3, 0, 1]])#括号里前一个括号是选取行,后一个括号是选取列
print(data.iloc[:, :3][data.three > 5])#这里用iloc多个条件时不用&运算符,并且表示某一列时 用 数据.列的方式