#_*_ coding: utf-8 _*_
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar','foo', 'bar', 'foo', 'foo'],
'B': ['one', 'one', 'two', 'three','two', 'two', 'one', 'three'],
'C': np.random.randn(8),
'D': np.random.randn(8)})
#分组(Grouping)
print(df.groupby('A').describe())
print(df.groupby(['A','B']).describe())
# C ... D
# count mean std ... 50% 75% max
# A B ...
# bar one 1.0 1.635945 NaN ... -2.127976 -2.127976 -2.127976
# three 1.0 -1.323161 NaN ... -1.473265 -1.473265 -1.473265
# two 1.0 -0.273423 NaN ... -0.635216 -0.635216 -0.635216
# foo one 2.0 0.413473 0.268983 ... 0.443146 0.558410 0.673674
# three 1.0 -1.413352 NaN ... -0.902088 -0.902088 -0.902088
# two 2.0 -0.081993 1.494454 ... -0.481973 -0.429782 -0.377591
#数据透视表(Pivot Tables)
print(pd.pivot_table(df,values=['C','D'],index='A',columns='B'))
# C D
# B one three two one three two
# A
# bar 1.635945 -1.323161 -0.273423 -2.127976 -1.473265 -0.635216
# foo 0.413473 -1.413352 -0.081993 0.443146 -0.902088 -0.481973
#时间序列(TimeSeries)
#开始时pandas频率为10S,这样有100条数据,当将时间频率转换为5分钟后,有4条数据。
rng = pd.date_range('1/1/2021', periods=100, freq='10S')
ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng)
tp = ts.resample('5Min').sum()
print(tp)
# 2021-01-01 00:00:00 6402
# 2021-01-01 00:05:00 6853
# 2021-01-01 00:10:00 7344
# 2021-01-01 00:15:00 3633
# Freq: 5T, dtype: int32
#时区转换
print(tp.tz_localize('UTC'))
print(tp.tz_localize('US/Eastern'))
#可视化
# ts = df.cumsum()
# df.plot()
# plt.show()
#数据输入 / 输出
df = df.set_index(df['A'])
df.to_csv('foo.csv')
df = pd.read_csv('foo.csv')
df.to_excel('foo.xlsx', sheet_name='Sheet1')
t = pd.read_excel('foo.xlsx', 'Sheet1', index_col='A', na_values=['NA'])
python pandas入门(2)
最新推荐文章于 2024-04-22 10:13:44 发布