pandas 中的loc和iloc

最新推荐文章于 2022-10-28 23:59:44 发布

_zzh

最新推荐文章于 2022-10-28 23:59:44 发布

阅读量331

点赞数

本文链接：https://blog.csdn.net/king_26852/article/details/94890400

版权

选择自pandas文档

dates = pd.date_range('1/1/2000', periods=8)
df = pd.DataFrame(np.random.randn(8, 4),index=dates, columns=['A', 'B', 'C', 'D'])
                  A         B         C         D
2000-01-01  0.469112 -0.282863 -1.509059 -1.135632
2000-01-02  1.212112 -0.173215  0.119209 -1.044236
2000-01-03 -0.861849 -2.104569 -0.494929  1.071804
2000-01-04  0.721555 -0.706771 -1.039575  0.271860
2000-01-05 -0.424972  0.567020  0.276232 -1.087401
2000-01-06 -0.673690  0.113648 -1.478427  0.524988
2000-01-07  0.404705  0.577046 -1.715002 -1.039268
2000-01-08 -0.370647 -1.157892 -1.344312  0.844885
s = df['A']
s[dates[5]]
也可以进行互换列名称
df[['A','B']] = df[['B', 'A']]
也可以这样
df.loc[:, ['B', 'A']] = df[['A', 'B']].to_numpy()
dfa['A'] = list(range(len(dfa.index)))
x = pd.DataFrame({'x': [1, 2, 3], 'y': [3, 4, 5]})
x.iloc[1] = {'x': 9, 'y': 99}
s[::2]
s[::-1]
s2 = s.copy()
s2[:5] = 0
df1.loc['d':, 'A':'C']
df1.loc['a'] > 0
df1.loc[:, df1.loc['a'] > 0]
df1 = pd.DataFrame(np.random.randn(6, 4),index=list('abcdef'),columns=list('ABCD'))
df1.loc['a'] > 0
df1.loc[:, df1.loc['a'] > 0]
s.sort_index()
s.sort_index().loc[1:6]
df1.iloc[[1, 3, 5], [1, 3]]
df1.iloc[1:3, :]
df1.loc[lambda df: df.A > 0, :]   可以用函数
df1.loc[:, lambda df: ['A', 'B']]
df1.iloc[:, lambda df: [0, 1]]
df1[lambda df: df.columns[0]]
df1.A.loc[lambda s: s > 0]
dfd = pd.DataFrame({'A': [1, 2, 3],'B': [4, 5, 6]},index=list('abc'))
dfd.iloc[[0, 2], dfd.columns.get_indexer(['A', 'B'])]
dfd.iloc[[0, 2], dfd.columns.get_loc('A')]
s.sample(n=3)
s.sample(frac=0.5)
s[(s < -1) | (s > 0.5)]
s[~(s < 0)]
df2 = pd.DataFrame({'a': ['one', 'one', 'two', 'three', 'two', 'one', 'six'],'b': ['x', 'y', 'y', 'x', 'y', 'x', 'x'],'c': np.random.randn(7)})
criterion = df2['a'].map(lambda x: x.startswith('t'))
df2[criterion]
df2[[x.startswith('t') for x in df2['a']]]
df2[criterion & (df2['b'] == 'x')]
df2.loc[criterion & (df2['b'] == 'x'), 'b':'c']
s.isin([2, 4, 6])
s[s.isin([2, 4, 6])]
s[s.index.isin([2, 4, 6])]
s.reindex([2, 4, 6])
s_mi = pd.Series(np.arange(6),index=pd.MultiIndex.from_product([[0, 1], ['a', 'b', 'c']]))
s_mi.iloc[s_mi.index.isin([(1, 'a'), (2, 'b'), (0, 'c')])]
s_mi.iloc[s_mi.index.isin(['a', 'c', 'e'], level=1)]
df = pd.DataFrame({'vals': [1, 2, 3, 4], 'ids': ['a', 'b', 'f', 'n'],'ids2': ['a', 'n', 'c', 'n']})
values = ['a', 'b', 1, 3]
如果要按照某一列对应的匹配可以传递字典
values = {'ids': ['a', 'b'], 'vals': [1, 3]}
df.isin(values)
values = {'ids': ['a', 'b'], 'ids2': ['a', 'c'], 'vals': [1, 3]}
row_mask = df.isin(values).all(1)
return only the selected rows:
return a Series of the same shape as the original:
s.where(s > 0)
df[df < 0]
df.where(df < 0, -df)
df.where(df < 0, -df) == np.where(df < 0, df, -df)
df2[df2[1:4] > 0] = 3
df2.where(df2 > 0, df2['A'], axis='index')
df.apply(lambda x, y: x.where(x > 0, y), y=df['A'])
df3.where(lambda x: x > 4, lambda x: x + 10)
df[(df.a < df.b) & (df.b < df.c)]
df.query('(a < b) & (b < c)')
df.query('index < b < c')
df.query('index > 2')
df.query('ilevel_0 == "red"')
expr = '0.0 <= a <= c <= 0.5'
map(lambda frame: frame.query(expr), [df, df2])
df.query('(a < b) & (b < c)')
df[(df.a < df.b) & (df.b < df.c)]
df.query('a in b')
df[df.a.isin(df.b)]
df.query('a not in b')
df[~df.a.isin(df.b)]
df.query('a in b and c < d')
df[df.b.isin(df.a) & (df.c < df.d)]
df.query('a in b + c + d')
df.query('b == ["a", "b", "c"]')
df[df.b.isin(["a", "b", "c"])]
df.query('c == [1, 2]')
df.query('c != [1, 2]')
df.query('[1, 2] in c')
df.query('[1, 2] not in c')
df[df.c.isin([1, 2])]
df.query('~bools')
df.query('not bools')
df.query('not bools') == df[~df.bools]
shorter = df.query('a < b < c and (not bools) or bools > 2')
longer = df[(df.a < df.b) & (df.b < df.c) & (~df.bools) | (df.bools > 2)]
df2.duplicated(['a', 'b'])
df2.drop_duplicates(['a', 'b'])
df3.index.duplicated()
df3[~df3.index.duplicated()]
df3[~df3.index.duplicated(keep='last')]
df3[~df3.index.duplicated(keep=False)]
s.get('x', default=-1)
dflookup = pd.DataFrame(np.random.rand(20, 4), columns = ['A', 'B', 'C', 'D'])
dflookup.lookup(list(range(0, 10, 2)), ['B', 'C', 'A', 'B', 'D'])
index = pd.Index(['e', 'd', 'a', 'b'], name='something')
index = pd.Index(list(range(5)), name='rows')
columns = pd.Index(['A', 'B', 'C'], name='cols')
df = pd.DataFrame(np.random.randn(5, 3), index=index, columns=columns)
ind = pd.Index([1, 2, 3])
ind.rename("apple")
ind.set_names(["apple"], inplace=True)
ind.name = "bob"
index = pd.MultiIndex.from_product([range(3), ['one', 'two']], names=['first', 'second'])
index.levels[1]
index.set_levels(["a", "b"], level=1)
a = pd.Index(['c', 'b', 'a'])
b = pd.Index(['c', 'e', 'd'])
a | b
a & b
a.difference(b)
Also available is the symmetric_difference (^) operation, which returns elements that appear in either idx1 or idx2, but not in both. This is equivalent to the Index created by idx1.difference(idx2).union(idx2.difference(idx1)), with duplicates dropped.
idx1 = pd.Index([1, 2, 3, 4])
idx2 = pd.Index([2, 3, 4, 5])
idx1.symmetric_difference(idx2)
idx1 ^ idx2
idx1.fillna(2)
ndexed1 = data.set_index('c')
indexed2 = data.set_index(['a', 'b'])
frame = data.set_index('c', drop=False)
frame = frame.set_index(['a', 'b'], append=True)
data.set_index('c', drop=False)
data.set_index(['a', 'b'], inplace=True)
data.reset_index()
frame.reset_index(level=1)
dfmi.loc[:, ('one', 'second')] = value
dfmi.loc.__setitem__((slice(None), ('one', 'second')), value)
dfmi['one']['second'] = value
dfmi.__getitem__('one').__setitem__('second', value)

_zzh

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
pandas 中的loc和iloc

选择自pandas文档dates = pd.date_range('1/1/2000', periods=8)df = pd.DataFrame(np.random.randn(8, 4),index=dates, columns=['A', 'B', 'C', 'D']) A B C D2000-01...
复制链接

扫一扫