说白了还是关于pandas包包的操练。
备忘:
1. 列的drop(基本);
2. dataframe的merge;
###这个计算方法可以得到最具时效性的单季度报表
import pandas as pd
######################
def get_quarterly_financials(data_points,quarters,max_info_date,order_book_ids):
datasets = pd.DataFrame()
databulk = pd.DataFrame()
#Begin: financial pit表的字典
fields_table_map = {}
for table_name,table in pit_financials.__dict__.items():
if table_name.startswith('_'):
continue
for f in table.__dict__:
if not f.startswith('_'):
fields_table_map[f] = table_name
financial_dict = fields_table_map
#End
for data_point in data_points:
if financial_dict.get(data_point) != 'balance_sheet' : #非资产负债表的数据点的单季计算
for quarter in quarters:
for order_book_id in order_book_ids:
dataset = pd.DataFrame()
dataset = get_pit_financials(data_point,quarter,max_info_date,order_book_id)
if not dataset.empty is True:
dataset.pop('if_adjusted')
if int(quarter[-1]) - 1 == 0: #是第一季度的时候就直接取最新的
dataset = dataset.sort_values('info_date',ascending = False).head(1)
dataset.pop('info_date')
datasets = datasets.append(dataset)
dataset = pd.DataFrame()
else:
pre_quarter = quarter[:4] + 'q' + str(int(quarter[-1]) - 1)
pre_dataset = get_pit_financials(data_point,pre_quarter,max_info_date,order_book_id)
if not pre_dataset.empty is True:
pre_dataset.pop('if_adjusted')
pre_dataset = pre_dataset.sort_values('info_date',ascending = False).head(1)
dataset = dataset.sort_values('info_date',ascending = False).head(1)
pre_dataset.pop('info_date')
dataset.pop('info_date')
#需要判断下两个dp具体的值的情况
dataset[data_point] = dataset[data_point].values[0] - pre_dataset[data_point].values[0]
datasets = datasets.append(dataset)
dataset = pd.DataFrame()
else:
continue
else: #资产负债表字段的单季返回
for quarter in quarters:
for order_book_id in order_book_ids:
dataset = get_pit_financials(data_point,quarter,max_info_date,order_book_id)
if not dataset.empty is True:
dataset = dataset.sort_values('info_date',ascending=False).head(1)
dataset.pop('info_date')
dataset.pop('if_adjusted')
datasets = datasets.append(dataset)
dataset=pd.DataFrame()
if not databulk.empty is True:
databulk = databulk.merge(datasets, how='outer', left_index=True, right_index=True)
datasets = pd.DataFrame()
else:
databulk = databulk.append(datasets)
datasets = pd.DataFrame()
return databulk
#################
get_quarterly_financials(data_points=['operating_revenue','net_profit','total_assets'],quarters=['2018q1','2018q2'], max_info_date=None,\
order_book_ids=['000001.XSHE','000048.XSHE'])