系列课题:pandas.DataFrame操作(二)索引、切片
df = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
columns=['a', 'b', 'c'])
# 原始:
"""
>>>df
a b c
0 1 2 3
1 4 5 6
2 7 8 9
"""
# 行索引:
"""
>>>df.loc[0]
a 1
b 2
c 3
>>>df.iloc[1]
a 4
b 5
c 6
"""
# 行切片:
"""
>>>df.loc[[0,2]] # 第0、第2行
a b c
0 1 2 3
2 7 8 9
>>>df.loc[:1] # 前2行
a b c
0 1 2 3
1 4 5 6
>>>df.iloc[:2]
a b c
0 1 2 3
1 4 5 6
"""
# 行、列索引
"""
>>>df.loc[0, 'b'] # 某一行,某一列
2
>>>df.loc[[0,1], 'b'] # 多行,某一列
0 2
1 5
>>>df.loc[[0,1], ['b', 'c']] # 行、列挑选
b c
0 2 3
1 5 6
>>>df.loc[:1, :'b'] # 行、列切片
a b
0 1 2
1 4 5
>>>df.iloc[1,1] # 行、列索引
5
>>>df.iloc[:2, :2] # 行、列切片
a b
0 1 2
1 4 5
>>>df.iloc[[0,2], [0,2]] # 行、列挑选
a c
0 1 3
2 7 9
"""
# 列索引
"""
>>>df['b'] # 某一列
0 2
1 5
2 8
>>>df[['b', 'c']] # 多列
b c
0 2 3
1 5 6
2 8 9
"""
# 条件索引
"""
df.iloc[lambda x: x.index % 2 == 0, [0,2]]
a c
0 1 3
2 7 9
"""
df = pd.DataFrame(
[['AP9788', 4, "0004", datetime.datetime.now()],
['AP9768', 2, "0001", datetime.datetime.now()],
['AP9998', 7, "0546B", datetime.datetime.now()],
['AP1722', 5, "0196A", datetime.datetime.now()]],
columns=["LOT_ID", "Wafer_No", "Product_ID", 'Date'],
index=None,
)
# 条件如下
a = (df.LOT_ID.str.find("AP1") != -1)
# b = (df.LOT_ID.str.find("set") != -1)
# c = (df.LOT_ID.str.find("2x") != -1)
# d = (df.LOT_ID.str.find("of 2") != -1)
# pattern = a | b | c | d # 合并后的pattern
pattern = a
df.loc[pattern, :] # 执行过滤
# 输出如下:
LOT_ID Wafer_No Product_ID Date
3 AP1722 5 0196A 2021-06-08 17:04:08.685707