1.查询数据库
pd.read_sql(sql, engine)
2.分组
for key, val in dn.groupby(by='code'):
3.移动平均
val['close'].rolling(200).mean() # 200分钟移动平均数
4.向下移位
val['close2'].shift(60)
5.删除nan
.dropna(inplace=True)
6.关于<class 'pandas.core.indexes.multi.MultiIndex'>的处理 这是一种多索引 取的时候比较麻烦
result_frame = result.to_frame()
print(result_frame)
print(type(result_frame.index))
print(result_frame.index.levels)
print(result_frame.index.labels[1])
print(result_frame.index.names)
详情查看pandas.MultiIndex — pandas 1.5.3 documentation
7 pandas 赋值
pd.at['btc'] = 2
8 pandas append
new_list = list()
new_dataframe = pd.DataFrame()
for x, y in outer_pd.groupby('ss'):
new_list.append(y)
new_dataframe = new_dataframe.append(new_list)
# 可以使用先插入到list 再最后一次append 到该DataFrame中
# TODO append 在之后的版本已被弃用 推荐以下写法
df1 = pd.DataFrame([['a', 1], ['b', 2]],
columns=['letter', 'number'])
df2 = pd.DataFrame([['a', 3], ['b', 4]],
columns=['letter', 'number'])
df3 = pd.DataFrame([['a', 5], ['b', 6]],
columns=['letter', 'number'])
sort_list = [df1, df2, df3]
new_df = sort_list[0].copy()
for sl in sort_list[1:]:
new_df = pd.concat([new_df, sl], ignore_index=True)
print(new_df)
9 pandas astype
df['price'] = df['price'].astype(float)
10 MultiIndex 获取某一个索引中每一级的标签:
MultiIndex(levels=[[2019-07-01 18:00:00, 2019-07-01 21:00:00, 2019-07-02 00:00:00, 2019-07-02 03:00:00, 2019-07-02 06:00:00, 2019-07-02 09:00:00], ['ALL_USDT', 'BCH', 'BSV', 'BTC', 'EOS', 'ETC', 'ETH', 'LTC', 'USDT', 'XRP']],
labels=[[0, 1, 2, 3, 4, 5], [0, 0, 0, 0, 0, 0]],
names=['dateTime', 'currency'])
print(m_index4.labels[0])
print(m_index4.labels[1])
11 pandas 字符串比较
# 后缀包含-USDT
usdt_df.index.str.endswith('-USDT', na=False)
usdt_df.index.str.contains('-USDT$', na=False)
# 不包括USDT
usdt_df.index.str.contains('^((?!USDT).)*$', na=False, regex=True)
12 pandas 转为[{row}] 形式:
list(df.T.to_dict().values())
13 日期索引生成
def time_group(t, unit='h'):
if isinstance(t, datetime.datetime):
if unit == 'h':
return t.replace(minute=0, second=0, microsecond=0)
elif unit == 'd':
return t.replace(hour=0, minute=0, second=0, microsecond=0)
if isinstance(t, str):
if unit == 'h':
return datetime.datetime.strptime(t[:13] + ':00:00', '%Y-%m-%d %H:%M:%S')
elif unit == 'd':
return datetime.datetime.strptime(t[:10] + ' 00:00:00', '%Y-%m-%d %H:%M:%S')
print(type(t))
log.warning(type(t))
return t
dn = pd.DataFrame(new_list)
print(dn)
start_time_str = dn['hour'].loc[dn['hour'].index[0]]
end_time_str = dn['hour'].loc[dn['hour'].index[-1]]
print(start_time_str, end_time_str)
se = pd.date_range(start_time_str, end_time_str, freq='H')
se_str = pd.Series(se).apply(time_group)
print(se_str)
print(se)
dn.set_index('hour', inplace=True)
dc = dn.reindex(se_str)
print(dc)
14 pandas 删除列
dc.drop(['batch_tag', 'crawl_at'], axis=1, inplace=True)
15 pandas 填充0
dc.fillna(0, inplace=True)
16 pandas 修改列名称
# 将名为index的列修改为last_share_time
dc.rename({'index': 'last_share_time'}, axis=1, inplace=True)