10.5.3 数据处理
在接下来介绍的函数的功能是,对提取到财务数据进行处理和整理,以便进一步的定量分析和生成报告。
(1)函数merge_subsets_yearly的主要功能是将子集数据合并到总集中,以便获得更全面的数据集。在执行合并时,函数会根据必须包含的条件来确定是否需要包含某些特定日期的数据。最终,总集中将包含所有子集数据,并且按照日期进行排序。
def merge_subsets_yearly(superset, subsets, must_include=None):
to_add = {"dates":[],"values":[]}
# 如果没有必须包含的日期
if must_include is None:
# 对于每个子集
for s in subsets:
# 对于子集中的每个日期
for i, d in enumerate(s["dates"]):
# 如果该日期不在总集中
if d not in superset["dates"]:
# 如果这是第一个具有该日期的子集,则将该值添加到要添加的数据中
if d not in to_add["dates"]:
to_add["dates"].append(d)
to_add["values"].append(s["values"][i])
# 否则将值添加到现有值中
else:
idx = to_add["dates"].index(d)
to_add["values"][idx] += s["values"][i]
# 如果有必须包含的日期
else:
# 如果must_include不是元组,则引发异常
if not isinstance(must_include, tuple):
raise Exception("must_include必须是元组")
# 获取第一个must_include的日期(所有其他must_include中的日期必须相同)
tmp_dates = subsets[must_include[0]]["dates"]
remove_dates = []
# 对于每个日期
for d in tmp_dates:
# 对于must_include中的每个索引
for m in must_include:
# 如果子集不包含该日期,则将其删除
s = subsets[m]
if d not in s["dates"]:
remove_dates.append(d)
# 仅保留所有must_include子集中都有值的日期
must_include_dates = [x for x in tmp_dates if x not in remove_dates and x not in superset["dates"]]
# 如果没有日期,则返回
if len(must_include_dates) == 0:
return
# 将值设置为0
for m in must_include_dates:
to_add["dates"].append(m)
to_add["values"].append(0)
# 对于每个子集,添加日期的值
for s in subsets:
for i, d in enumerate(s["dates"]):
if d in to_add["dates"]:
idx = to_add["dates"].index(d)
to_add["values"][idx] += s["values"][i]
# 将要添加的日期和值合并到总集中
for i, d in enumerate(to_add["dates"]):
superset["dates"].append(d)
superset["values"].append(to_add["values"][i])
# 按日期对总集中的日期和值进行排序
sort = sorted(zip(superset["dates"], superset["values"]))
superset["dates"] = [x for x, _ in sort]
superset["values"] = [x for _, x in sort]
(2)函数merge_subsets_most_recent的功能是将多个子集中的最新值合并成一个总集,用于合并最新的财务数据。
def merge_subsets_most_recent(superset, subsets):
replace = False
for s in subsets:
if superset["date"] is None or (s["date"] is not None and s["date"] > superset["date"]):
replace = True
break
if replace:
dates = [x["date"] for x in subsets if x["date"] is not None]
# we are here if neither the superset nor the subsets have any value
if len(dates) == 0:
return
d = max(dates)
superset["date"] = d
superset["value"] = 0
for s in subsets:
if s["date"] == d:
superset["value"] += s["value"]
(3)函数extract_shares的功能是从财务文件中提取股份信息,并处理不同的股份度量单位,以确保数据的一致性。另外,它还处理了一些异常情况,例如财务文件中股份单位错误的问题。
def extract_shares(doc, quarter_of_annual_report, years_diff):
df = build_financial_df(doc, "EntityCommonStockSharesOutstanding", unit="shares", tax="dei")
debug = False
if debug:
print(df.to_markdown())
try:
most_recent_shares = get_most_recent_value_from_df(df)
except:
most_recent_shares = {"date":None, "value":0}
measures = ["CommonStockSharesOutstanding"]
mr_common_shares, _, yearly_common_shares = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report,
years_diff=years_diff, get_ttm=False,
get_most_recent=True, debug=debug, unit="shares")
measures = ["WeightedAverageNumberOfSharesOutstandingBasic"]
mr_average_shares, _, yearly_average_shares = get_values_from_measures(
doc, measures, instant=False, quarter_of_annual_report=quarter_of_annual_report,
years_diff=years_diff, get_ttm=False,
get_most_recent=True, debug=debug, unit="shares")
merge_subsets_most_recent(most_recent_shares, [mr_common_shares])
merge_subsets_most_recent(most_recent_shares, [mr_average_shares])
try:
yearly_shares = get_yearly_values_from_df(df, instant=True, quarter_of_annual_report=quarter_of_annual_report,
years_diff=years_diff)
merge_subsets_yearly(yearly_common_shares, [yearly_average_shares])
merge_subsets_yearly(yearly_shares, [yearly_common_shares])
except:
merge_subsets_yearly(yearly_common_shares, [yearly_average_shares])
yearly_shares = yearly_common_shares
# in some filings the company report shares with a wrong unit of measure (million shares instead of thousand shares)
try:
max_num_shares = max(yearly_shares["values"])
except:
raise NoSharesException()
yearly_shares["values"] = [x * 1000 if x / max_num_shares < 0.01 else x for x in yearly_shares["values"]]
if most_recent_shares["value"] / max_num_shares < 0.01:
most_recent_shares["value"] *= 1000
return {
"mr_shares": most_recent_shares,
"shares": yearly_shares,
}
(4)函数extract_income_statement的功能是从财务文件中提取收入表信息,并返回了一系列指标,包括总收入、毛利润、研发费用、利润等。它还处理了一些异常情况,例如财务文件中利息支出的不同度量单位问题。
def extract_income_statement(doc):
measures = [
"Revenues",
"RevenueFromContractWithCustomerExcludingAssessedTax",
"RevenueFromContractWithCustomerIncludingAssessedTax",
"SalesRevenueNet"
]
_, ttm_revenue, yearly_revenue = get_values_from_measures(doc, measures, get_most_recent=False, debug=False)
last_annual_report_date = None
last_annual_report_fy = None
for m in measures:
df = build_financial_df(doc, m)
if df is not None and not df.empty and "frame" in df.columns:
annual_rd, annual_fy = get_last_annual_report_date_and_fy(df)
if last_annual_report_date is None or (annual_rd is not None and annual_rd > last_annual_report_date):
last_annual_report_date = annual_rd
last_annual_report_fy = annual_fy
#### R and D ####
measures = ["ResearchAndDevelopmentExpense"]
_, _, yearly_rd = get_values_from_measures(
doc, measures, get_ttm=False, get_most_recent=False, debug=False)
measures = ["ResearchAndDevelopmentExpenseExcludingAcquiredInProcessCost"]
_, _, yearly_rd_not_inprocess = get_values_from_measures(
doc, measures, get_ttm=False, get_most_recent=False, debug=False)
measures = ["ResearchAndDevelopmentInProcess"]
_, _, yearly_rd_inprocess = get_values_from_measures(
doc, measures, get_ttm=False, get_most_recent=False, debug=False)
merge_subsets_yearly(yearly_rd, [yearly_rd_not_inprocess, yearly_rd_inprocess])
#### Net Income ####
measures = [
"NetIncomeLoss",
"NetIncomeLossAvailableToCommonStockholdersBasic",
"NetIncomeLossAvailableToCommonStockholdersDiluted",
"ComprehensiveIncomeNetOfTax",
"IncomeLossFromContinuingOperations",
# including minority interest
"ProfitLoss",
"IncomeLossFromContinuingOperationsIncludingPortionAttributableToNoncontrollingInterest",
"IncomeLossFromSubsidiariesNetOfTax"
]
_, ttm_net_income, yearly_net_income = get_values_from_measures(doc, measures, get_most_recent=False, debug=False)
#### Interest Expenses ####
measures = [
"InterestExpense",
"InterestAndDebtExpense",
"InterestPaid",
"InterestPaidNet",
"InterestCostsIncurred"]
_, ttm_interest_expenses, _ = get_values_from_measures(doc, measures, get_most_recent=False, get_yearly=False,
debug=False)
if ttm_interest_expenses == 0:
measures = ["InterestExpenseBorrowings"]
_, ttm_ie_borrowings, _ = get_values_from_measures(doc, measures, get_most_recent=False, get_yearly=False,
debug=False)
if ttm_ie_borrowings["value"] == 0:
measures = ["InterestExpenseDebt",
"InterestExpenseDebtExcludingAmortization"]
_, ttm_ie_debt, _ = get_values_from_measures(doc, measures, get_most_recent=False, get_yearly=False,
debug=False)
if ttm_ie_debt["value"] == 0:
measures = ["InterestExpenseLongTermDebt"]
_, ttm_ie_debt_lt, _ = get_values_from_measures(doc, measures, get_most_recent=False, get_yearly=False,
debug=False)
measures = ["InterestExpenseShortTermBorrowings"]
_, ttm_ie_debt_st, _ = get_values_from_measures(doc, measures, get_most_recent=False, get_yearly=False,
debug=False)
merge_subsets_most_recent(ttm_ie_debt, [ttm_ie_debt_lt, ttm_ie_debt_st])
measures = ["InterestExpenseDeposits"]
_, ttm_ie_deposits, _ = get_values_from_measures(doc, measures, get_most_recent=False, get_yearly=False,
debug=False)
measures = ["InterestExpenseOther"]
_, ttm_ie_others, _ = get_values_from_measures(doc, measures, get_most_recent=False, get_yearly=False,
debug=False)
measures = ["InterestExpenseRelatedParty"]
_, ttm_ie_related, _ = get_values_from_measures(doc, measures, get_most_recent=False, get_yearly=False,
debug=False)
ttm_ie_borrowings = merge_subsets_most_recent(ttm_ie_borrowings,
[ttm_ie_debt, ttm_ie_deposits, ttm_ie_others, ttm_ie_related])
ttm_interest_expenses = ttm_ie_borrowings
measures = ["Gross Profit"]
_, ttm_gross_profit, yearly_gross_profit = get_values_from_measures(doc, measures, get_most_recent=False,
debug=False)
#### Depreciation ####
measures = [
"DepreciationDepletionAndAmortization",
"DepreciationAmortizationAndAccretionNet"]
_, _, yearly_depreciation_amortization = get_values_from_measures(doc, measures, get_most_recent=False, get_ttm=False,
debug=False)
measures = ["Depreciation"]
_, _, yearly_depreciation = get_values_from_measures(doc, measures, get_most_recent=False, get_ttm=False,
debug=False)
measures = ["AmortizationOfFinancingCostsAndDiscounts"]
_, _, yearly_amortization_fincost_disc = get_values_from_measures(doc, measures, get_most_recent=False, get_ttm=False,
debug=False)
measures = ["AmortizationOfDebtDiscountPremium"]
_, _, yearly_amortization_disc = get_values_from_measures(doc, measures, get_most_recent=False, get_ttm=False,
debug=False)
measures = ["AmortizationOfFinancingCosts"]
_, _, yearly_amortization_fincost = get_values_from_measures(doc, measures, get_most_recent=False, get_ttm=False,
debug=False)
merge_subsets_yearly(yearly_amortization_fincost_disc, [yearly_amortization_disc, yearly_amortization_fincost])
measures = ["AmortizationOfDeferredCharges"]
_, _, yearly_amortization_charges = get_values_from_measures(doc, measures, get_most_recent=False, get_ttm=False,
debug=False)
measures = ["AmortizationOfDeferredSalesCommissions"]
_, _, yearly_amortization_comm = get_values_from_measures(doc, measures, get_most_recent=False, get_ttm=False,
debug=False)
measures = ["AmortizationOfIntangibleAssets"]
_, _, yearly_amortization_intan = get_values_from_measures(doc, measures, get_most_recent=False, get_ttm=False,
debug=False)
yearly_amortization = {"dates":[], "values":[]}
merge_subsets_yearly(yearly_amortization, [yearly_amortization_fincost_disc, yearly_amortization_charges,
yearly_amortization_comm, yearly_amortization_intan])
merge_subsets_yearly(yearly_depreciation_amortization, [yearly_depreciation, yearly_amortization])
#### EBIT ####
measures = ["OperatingIncomeLoss",
"IncomeLossFromContinuingOperationsBeforeInterestExpenseInterestIncomeIncomeTaxesExtraordinaryItemsNoncontrollingInterestsNet",
"IncomeLossFromContinuingOperationsBeforeIncomeTaxesMinorityInterestAndIncomeLossFromEquityMethodInvestments",
"IncomeLossFromContinuingOperationsBeforeIncomeTaxesExtraordinaryItemsNoncontrollingInterest",
"IncomeLossFromContinuingOperationsBeforeIncomeTaxesForeign",
"IncomeLossFromContinuingOperationsBeforeIncomeTaxesDomestic",
]
_, ttm_ebit, yearly_ebit = get_values_from_measures(doc, measures, get_most_recent=False,
debug=False)
return {
"ttm_revenue": ttm_revenue,
"ttm_gross_profit": ttm_gross_profit,
"ttm_ebit": ttm_ebit,
"ttm_net_income": ttm_net_income,
"ttm_interest_expenses": ttm_interest_expenses,
"revenue": yearly_revenue,
"gross_profit": yearly_gross_profit,
"rd": yearly_rd,
"ebit": yearly_ebit,
"depreciation": yearly_depreciation_amortization,
"net_income": yearly_net_income,
"last_annual_report_date": last_annual_report_date,
"last_annual_report_fy": last_annual_report_fy
}
(5)编写函数xtract_balance_sheet_current_assets,从财务文件中提取资产负债表中的流动资产信息,通过提取和整合这些信息,可以帮助分析者了解公司的流动资产结构,评估其财务健康状况,并进行进一步的财务分析和预测。
def extract_balance_sheet_current_assets(doc, quarter_of_annual_report, years_diff):
measures = ["CashCashEquivalentsRestrictedCashAndRestrictedCashEquivalents"]
most_recent_cash_and_restricted, _, yearly_cash_and_restricted = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, debug=False)
measures = ["CashAndCashEquivalentsAtCarryingValue", "Cash"]
most_recent_cash, _, yearly_cash = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, debug=False)
measures = [
"RestrictedCashAndCashEquivalentsAtCarryingValue",
"RestrictedCashAndCashEquivalents",
"RestrictedCash",
"RestrictedCashAndInvestmentsCurrent",
"RestrictedCashCurrent"
]
most_recent_restrictedcash, _, yearly_restrictedcash = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, debug=False)
merge_subsets_yearly(yearly_cash_and_restricted, [yearly_cash, yearly_restrictedcash], must_include=(0,))
if most_recent_cash_and_restricted["date"] is None \
or (most_recent_cash["date"] is not None and most_recent_cash["date"] > most_recent_cash_and_restricted["date"]):
most_recent_cash_and_restricted["date"] = most_recent_cash["date"]
most_recent_cash_and_restricted["value"] = most_recent_cash["value"]
if most_recent_restrictedcash["date"] == most_recent_cash["date"]:
most_recent_cash_and_restricted["value"] += most_recent_restrictedcash["value"]
#### Inventory ####
measures = [
"InventoryNet",
"InventoryGross",
"FIFOInventoryAmount",
"InventoryLIFOReserve",
"LIFOInventoryAmount",
]
most_recent_inventory, _, yearly_inventory = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, debug=False)
measures = [
"RetailRelatedInventory",
"RetailRelatedInventoryMerchandise"
]
most_recent_inventory_retail, _, yearly_inventory_retail = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, debug=False)
measures = [
"EnergyRelatedInventory"
]
most_recent_inventory_energy, _, yearly_inventory_energy = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, debug=False)
measures = [
"PublicUtilitiesInventory"
]
most_recent_inventory_utilities, _, yearly_inventory_utilities = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, debug=False)
measures = [
"InventoryRealEstate"
]
most_recent_inventory_re, _, yearly_inventory_re = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, debug=False)
measures = [
"AirlineRelatedInventory"
]
most_recent_inventory_airline, _, yearly_inventory_airline = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, debug=False)
merge_subsets_most_recent(most_recent_inventory,
[most_recent_inventory_retail, most_recent_inventory_airline,
most_recent_inventory_energy, most_recent_inventory_re, most_recent_inventory_utilities])
merge_subsets_yearly(yearly_inventory, [yearly_inventory_retail, yearly_inventory_airline, yearly_inventory_energy,
yearly_inventory_re, yearly_inventory_utilities])
#### Other Assets ####
measures = [
"OtherAssetsCurrent",
"OtherAssetsMiscellaneousCurrent",
"PrepaidExpenseAndOtherAssetsCurrent",
"OtherAssetsFairValueDisclosure",
"OtherAssetsMiscellaneous",
"PrepaidExpenseAndOtherAssets"
]
most_recent_other_current_assets, _, yearly_other_current_assets = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, debug=False)
measures = ["PrepaidExpenseCurrent"]
most_recent_prepaid_exp, _, yearly_prepaid_exp = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, debug=False)
measures = ["PrepaidInsurance"]
most_recent_prepaid_ins, _, yearly_prepaid_ins = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, debug=False)
measures = ["PrepaidTaxes",
"IncomeTaxesReceivable",
"IncomeTaxReceivable"]
most_recent_prepaid_tax, _, yearly_prepaid_tax = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, get_yearly=False, debug=False)
merge_subsets_yearly(yearly_other_current_assets, [yearly_prepaid_exp, yearly_prepaid_ins, yearly_prepaid_tax])
merge_subsets_most_recent(most_recent_other_current_assets,
[most_recent_prepaid_exp, most_recent_prepaid_ins, most_recent_prepaid_tax])
#### Receivables ####
measures = [
"AccountsAndOtherReceivablesNetCurrent",
"AccountsNotesAndLoansReceivableNetCurrent",
"ReceivablesNetCurrent",
"NontradeReceivablesCurrent"]
most_recent_receivables, _, yearly_receivables = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, debug=False)
measures = ["AccountsReceivableNetCurrent",
"AccountsReceivableNet",
"AccountsReceivableGrossCurrent",
"AccountsReceivableGross"]
most_recent_ar, _, yearly_ar = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, debug=False)
measures = ["LoansAndLeasesReceivableNetReportedAmount",
"LoansAndLeasesReceivableNetOfDeferredIncome",
"LoansReceivableFairValueDisclosure",
"LoansAndLeasesReceivableGrossCarryingAmount"]
most_recent_loans_rec, _, yearly_loans_rec = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, debug=False)
measures = ["NotesReceivableNet",
"NotesReceivableFairValueDisclosure",
"NotesReceivableGross"]
most_recent_notes_rec, _, yearly_notes_rec = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, debug=False)
merge_subsets_yearly(yearly_receivables, [yearly_ar, yearly_loans_rec, yearly_notes_rec])
merge_subsets_most_recent(most_recent_receivables,
[most_recent_ar, most_recent_loans_rec, most_recent_notes_rec])
#### Securities ####
measures = [
"MarketableSecurities"
"AvailableForSaleSecurities"]
most_recent_securities, _, yearly_securities = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, debug=False)
measures = ["AvailableForSaleSecuritiesDebtSecurities"]
most_recent_debtsecurities, _, yearly_debtsecurities = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, debug=False)
measures = ["AvailableForSaleSecuritiesEquitySecurities"]
most_recent_equitysecurities, _, yearly_equitysecurities = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, debug=False)
merge_subsets_yearly(yearly_securities, [yearly_debtsecurities, yearly_equitysecurities])
merge_subsets_most_recent(most_recent_securities,
[most_recent_debtsecurities, most_recent_equitysecurities])
measures = ["DerivativeAssets",
"DerivativeAssetsCurrent"]
most_recent_derivatives, _, yearly_derivatives = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, debug=False)
measures = ["HeldToMaturitySecurities",
"HeldToMaturitySecuritiesFairValue",
"HeldToMaturitySecuritiesCurrent",
]
most_recent_held_securities, _, yearly_held_securities = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, debug=False)
measures = ["AvailableForSaleSecuritiesNoncurrent",
"AvailableForSaleSecuritiesDebtSecuritiesNoncurrent",
]
most_recent_non_curr_sec, _, yearly_non_curr_sec = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, debug=False)
measures = ["MarketableSecuritiesCurrent",
"AvailableForSaleSecuritiesDebtSecuritiesCurrent"]
most_recent_marksecurities_cur, _, yearly_marksecurities_cur = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, debug=False)
measures = ["ShortTermInvestments"]
most_recent_st_inv, _, yearly_st_inv = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, debug=False)
measures = ["MoneyMarketFundsAtCarryingValue"]
most_recent_mm, _, yearly_mm = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, debug=False)
merge_subsets_yearly(yearly_securities, [yearly_derivatives, yearly_held_securities, yearly_non_curr_sec,
yearly_marksecurities_cur, yearly_st_inv, yearly_mm])
merge_subsets_most_recent(most_recent_securities,
[most_recent_derivatives, most_recent_held_securities, most_recent_non_curr_sec,
most_recent_marksecurities_cur, most_recent_st_inv, most_recent_mm])
return {
"mr_cash": most_recent_cash_and_restricted,
"cash": yearly_cash_and_restricted,
"mr_inventory": most_recent_inventory,
"inventory": yearly_inventory,
"mr_other_assets": most_recent_other_current_assets,
"other_assets": yearly_other_current_assets,
"mr_receivables": most_recent_receivables,
"receivables": yearly_receivables,
"mr_securities": most_recent_securities,
"securities": yearly_securities
}
上述代码的功能如下:
- 提取现金和受限制的现金流,并与年度数据合并。
- 提取库存信息,包括不同类型的库存(如零售、能源相关、公用事业等),并与年度数据合并。
- 提取其他流动资产信息,如预付费用、其他杂项资产等,并与年度数据合并。
- 提取应收账款信息,包括账款、贷款、票据等,并与年度数据合并。
- 提取证券信息,包括可供交易证券、持有至到期投资、衍生工具等,并与年度数据合并。
(6)编写函数extract_balance_sheet_noncurrent_assets,用于从财务文件中提取资产负债表中的非流动资产信息,包括:
- 股权投资信息,包括权益法投资等。
- 其他金融资产信息,如长期预付费用、受限制的现金等。
- 不动产、厂房及设备(PP&E)信息。
- 投资性房地产信息,包括房地产投资及土地等。
- 税务优惠信息,如未确认的税收优惠和应收非流动所得税等。
通过提取这些数据,可以帮助分析者了解公司长期资产的结构和规模,评估其长期投资策略和未来发展潜力,以及了解其税务风险和优惠情况。
def extract_balance_sheet_noncurrent_assets(doc, quarter_of_annual_report, years_diff):
#### Equity Investments ####
measures = [
"EquityMethodInvestmentAggregateCost",
"EquityMethodInvestments",
"InvestmentOwnedAtCost",
"Investments",
"InvestmentsInAffiliatesSubsidiariesAssociatesAndJointVentures",
]
most_recent_equity_investments, _, _ = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, get_yearly=False,
debug=False)
measures = [
"EquityMethodInvestmentsFairValueDisclosure",
"InvestmentOwnedAtFairValue",
"InvestmentsFairValueDisclosure",
]
most_recent_equity_inv_fv, _, _ = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, get_yearly=False, debug=False)
measures = ["EquitySecuritiesWithoutReadilyDeterminableFairValueAmount", ]
most_recent_equity_inv_notfv, _, _ = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, get_yearly=False, debug=False)
# merge_subsets_yearly(yearly_equity_investments, [yearly_equity_inv_fv, yearly_equity_inv_notfv])
merge_subsets_most_recent(most_recent_equity_investments,
[most_recent_equity_inv_fv, most_recent_equity_inv_notfv])
measures = ["MarketableSecuritiesNoncurrent"]
most_recent_securities_non_curr, _, _ = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, get_yearly=False, debug=False)
merge_subsets_most_recent(most_recent_equity_investments, [most_recent_securities_non_curr])
#### Other financial assets ####
measures = [
"PrepaidExpenseNoncurrent",
"PrepaidExpenseOtherNoncurrent",
]
most_recent_prepaid_non_curr, _, _ = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, get_yearly=False, debug=False)
measures = [
"RestrictedCashAndCashEquivalentsNoncurrent",
"RestrictedCashAndInvestmentsNoncurrent",
"RestrictedCashNoncurrent"
]
most_recent_cash_non_curr, _, _ = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, get_yearly=False, debug=False)
measures = ["DerivativeAssetsNoncurrent", ]
most_recent_derivatives_non_curr, _, _ = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, get_yearly=False, debug=False)
measures = ["EscrowDeposit"]
most_recent_escrow, _, _ = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, get_yearly=False, debug=False)
most_recent_other_financial_assets = {"date":None, "value":0}
merge_subsets_most_recent(most_recent_other_financial_assets,
[most_recent_prepaid_non_curr, most_recent_cash_non_curr,
most_recent_derivatives_non_curr, most_recent_escrow])
#### PP&E ####
measures = [
"PropertyPlantAndEquipmentNet",
"PropertyPlantAndEquipmentAndFinanceLeaseRightOfUseAssetAfterAccumulatedDepreciationAndAmortization"
]
most_recent_ppe, _, _ = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, get_yearly=False, debug=False)
#### Investment property ####
measures = [
"RealEstateInvestments",
"RealEstateInvestmentPropertyNet",
"RealEstateInvestmentPropertyAtCost",
"RealEstateHeldforsale"
]
most_recent_property, _, _ = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, get_yearly=False, debug=False)
measures = ["InvestmentBuildingAndBuildingImprovements"]
most_recent_buildings, _, _ = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, get_yearly=False, debug=False)
measures = [
"LandAndLandImprovements",
"Land",
]
most_recent_land, _, _ = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, get_yearly=False, debug=False)
merge_subsets_most_recent(most_recent_property,
[most_recent_buildings, most_recent_land])
#### Tax Benefits ####
measures = [
"UnrecognizedTaxBenefits",
"UnrecognizedTaxBenefitsThatWouldImpactEffectiveTaxRate",
"IncomeTaxesReceivableNoncurrent",
]
most_recent_tax_benefit, _, _ = get_values_from_measures(
doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
get_ttm=False, get_yearly=False, debug=False)
return {
"mr_equity_investments": most_recent_equity_investments,
"mr_other_financial_assets": most_recent_other_financial_assets,
"mr_ppe": most_recent_ppe,
"mr_investment_property": most_recent_property,
"mr_tax_benefits": most_recent_tax_benefit
}