# pandas入门2

import numpy as np
import pandas as pd

# 排序
obj = pd.Series(range(4), index=['d', 'a', 'b', 'c'])
print(obj)
print(obj.sort_index())  # 对列进行排序

frame = pd.DataFrame(np.arange(8).reshape(2, 4), index=['three', 'one'], columns=['d', 'a', 'b', 'c'])
print(frame)
print(frame.sort_index())
print(frame.sort_index(axis=1))  # 任意一个轴都可以进行排序
print(frame.sort_index(axis=1, ascending=False))  # 正常按升序排, False为降序(列的降序)

# obj = pd.Series([4, 7, -3, 2])
# print(obj)
# print(obj.order())    # 没有属性order

frame = pd.DataFrame({'b': [4, 7, -3, 2], 'a': [0, 1, 0, 1]})
print(frame)
print(frame.sort_index(by='b'))  # 通过列名b来进行排序
print(frame.sort_index(by=['a', 'b']))  # 优先a进行排序

# 排名
obj = pd.Series([7, -5, 7, 4, 2, 0, 4])
print(obj)
print(obj.rank())   # 为各组分配一个平均排名   按列排序
print(obj.rank(method='first'))   # 按顺序给出排名
print(obj.rank(ascending=False, method='max'))   # 没有给出平均值

frame = pd.DataFrame({'b': [4.3, 7, -3, 2], 'a': [0, 1, 0, 1], 'c': [-2, 5, 8, -2.5]})
print(frame)
print(frame.rank(axis=1))   # 按行排序

# 带有重复索引的轴索引
obj = pd.Series(range(5), index=['a', 'd', 'a', 'b', 'c'])
print(obj)
print(obj.index.is_unique)
print(obj['a'])
print(obj['c'])

frame = pd.DataFrame(np.arange(8).reshape(4, 2), index=['a', 'a', 'b', 'c'])
print(frame)
print(frame.ix['a'])

df = pd.DataFrame([[1.4, np.nan], [7.1, -4.5], [np.nan, np.nan], [0.75, -1.3]], index=['a', 'b', 'c', 'd'],
columns=['one', 'two'])
print(df)
print(df.sum())  # 列的求和
print(df.sum(axis=1))   # 每一行求和  NA值自动被排除
print(df.sum(axis=1, skipna=False))  # NA值不排除
print(df.mean(axis=1, skipna=False))   # 求平均值
print(df.idxmax())
print(df.cumsum())  # 累计性计算
print(df.describe())  # 一次性产生多个汇总统计

obj = pd.Series(['a', 'a', 'b', 'c'] * 4)
print(obj)
print(obj.describe())

# 相关系数和协方差  可能拿不到雅虎的数据
# all_data = {}
# for ticker in ['AAPL', 'IBM', 'MSFT', 'GOOG']:
#     all_data[ticker] = web.get_data_yahoo(ticker, '1/1/2000', '1/1/2010')
# price = pd.DataFrame({tic: data['Adj Close'] for tic,data in all_data.items()})
# print(price)

# 唯一值,值计数,成员资格
obj = pd.Series(['a', 'd', 'a', 'c', 'd', 'd'])
print(obj.unique())
print(obj.value_counts())
# print(obj.value_counts(obj.values, sort=False))  # 测试不可用
mask = obj.isin(['b', 'c'])