(26-5-02)基于OpenAI和LangChain的上市公司估值系统:定量分析(Quantitative Analysis)(2)数据处理

10.5.3  数据处理

在接下来介绍的函数的功能是,对提取到财务数据进行处理和整理,以便进一步的定量分析和生成报告。

(1)函数merge_subsets_yearly的主要功能是将子集数据合并到总集中,以便获得更全面的数据集。在执行合并时,函数会根据必须包含的条件来确定是否需要包含某些特定日期的数据。最终,总集中将包含所有子集数据,并且按照日期进行排序。

def merge_subsets_yearly(superset, subsets, must_include=None):
    to_add = {"dates":[],"values":[]}
# 如果没有必须包含的日期
if must_include is None:
    # 对于每个子集
    for s in subsets:
        # 对于子集中的每个日期
        for i, d in enumerate(s["dates"]):
            # 如果该日期不在总集中
            if d not in superset["dates"]:
                # 如果这是第一个具有该日期的子集,则将该值添加到要添加的数据中
                if d not in to_add["dates"]:
                    to_add["dates"].append(d)
                    to_add["values"].append(s["values"][i])
                # 否则将值添加到现有值中
                else:
                    idx = to_add["dates"].index(d)
                    to_add["values"][idx] += s["values"][i]
# 如果有必须包含的日期
else:
    # 如果must_include不是元组,则引发异常
    if not isinstance(must_include, tuple):
        raise Exception("must_include必须是元组")
    
    # 获取第一个must_include的日期(所有其他must_include中的日期必须相同)
    tmp_dates = subsets[must_include[0]]["dates"]
    remove_dates = []
    
    # 对于每个日期
    for d in tmp_dates:
        # 对于must_include中的每个索引
        for m in must_include:
            # 如果子集不包含该日期,则将其删除
            s = subsets[m]
            if d not in s["dates"]:
                remove_dates.append(d)
    
    # 仅保留所有must_include子集中都有值的日期
    must_include_dates = [x for x in tmp_dates if x not in remove_dates and x not in superset["dates"]]
    
    # 如果没有日期,则返回
    if len(must_include_dates) == 0:
        return
    
    # 将值设置为0
    for m in must_include_dates:
        to_add["dates"].append(m)
        to_add["values"].append(0)
    
    # 对于每个子集,添加日期的值
    for s in subsets:
        for i, d in enumerate(s["dates"]):
            if d in to_add["dates"]:
                idx = to_add["dates"].index(d)
                to_add["values"][idx] += s["values"][i]

# 将要添加的日期和值合并到总集中
for i, d in enumerate(to_add["dates"]):
    superset["dates"].append(d)
    superset["values"].append(to_add["values"][i])

# 按日期对总集中的日期和值进行排序
sort = sorted(zip(superset["dates"], superset["values"]))
superset["dates"] = [x for x, _ in sort]
superset["values"] = [x for _, x in sort]

(2)函数merge_subsets_most_recent的功能是将多个子集中的最新值合并成一个总集,用于合并最新的财务数据。

def merge_subsets_most_recent(superset, subsets):
    replace = False
    for s in subsets:
        if superset["date"] is None or (s["date"] is not None and s["date"] > superset["date"]):
            replace = True
            break

    if replace:

        dates = [x["date"] for x in subsets if x["date"] is not None]

        # we are here if neither the superset nor the subsets have any value
        if len(dates) == 0:
            return

        d = max(dates)

        superset["date"] = d
        superset["value"] = 0

        for s in subsets:
            if s["date"] == d:
                superset["value"] += s["value"]

(3)函数extract_shares的功能是从财务文件中提取股份信息,并处理不同的股份度量单位,以确保数据的一致性。另外,它还处理了一些异常情况,例如财务文件中股份单位错误的问题。

def extract_shares(doc, quarter_of_annual_report, years_diff):
    df = build_financial_df(doc, "EntityCommonStockSharesOutstanding", unit="shares", tax="dei")

    debug = False

    if debug:
        print(df.to_markdown())

    try:
        most_recent_shares = get_most_recent_value_from_df(df)
    except:
        most_recent_shares = {"date":None, "value":0}

    measures = ["CommonStockSharesOutstanding"]

    mr_common_shares, _, yearly_common_shares = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report,
        years_diff=years_diff, get_ttm=False,
        get_most_recent=True, debug=debug, unit="shares")

    measures = ["WeightedAverageNumberOfSharesOutstandingBasic"]

    mr_average_shares, _, yearly_average_shares = get_values_from_measures(
        doc, measures, instant=False, quarter_of_annual_report=quarter_of_annual_report,
        years_diff=years_diff, get_ttm=False,
        get_most_recent=True, debug=debug, unit="shares")

    merge_subsets_most_recent(most_recent_shares, [mr_common_shares])
    merge_subsets_most_recent(most_recent_shares, [mr_average_shares])

    try:
        yearly_shares = get_yearly_values_from_df(df, instant=True, quarter_of_annual_report=quarter_of_annual_report,
        years_diff=years_diff)

        merge_subsets_yearly(yearly_common_shares, [yearly_average_shares])
        merge_subsets_yearly(yearly_shares, [yearly_common_shares])

    except:
        merge_subsets_yearly(yearly_common_shares, [yearly_average_shares])
        yearly_shares = yearly_common_shares

    # in some filings the company report shares with a wrong unit of measure (million shares instead of thousand shares)

    try:
        max_num_shares = max(yearly_shares["values"])
    except:
        raise NoSharesException()

    yearly_shares["values"] = [x * 1000 if x / max_num_shares < 0.01 else x for x in yearly_shares["values"]]
    if most_recent_shares["value"] / max_num_shares < 0.01:
        most_recent_shares["value"] *= 1000
    return {
        "mr_shares": most_recent_shares,
        "shares": yearly_shares,
    }

(4)函数extract_income_statement的功能是从财务文件中提取收入表信息,并返回了一系列指标,包括总收入、毛利润、研发费用、利润等。它还处理了一些异常情况,例如财务文件中利息支出的不同度量单位问题。

def extract_income_statement(doc):
    measures = [
        "Revenues",
        "RevenueFromContractWithCustomerExcludingAssessedTax",
        "RevenueFromContractWithCustomerIncludingAssessedTax",
        "SalesRevenueNet"
    ]
    _, ttm_revenue, yearly_revenue = get_values_from_measures(doc, measures, get_most_recent=False, debug=False)

    last_annual_report_date = None
    last_annual_report_fy = None
    for m in measures:
        df = build_financial_df(doc, m)
        if df is not None and not df.empty and "frame" in df.columns:
            annual_rd, annual_fy = get_last_annual_report_date_and_fy(df)
            if last_annual_report_date is None or (annual_rd is not None and annual_rd > last_annual_report_date):
                last_annual_report_date = annual_rd
                last_annual_report_fy = annual_fy

    #### R and D ####
    measures = ["ResearchAndDevelopmentExpense"]
    _, _, yearly_rd = get_values_from_measures(
        doc, measures, get_ttm=False, get_most_recent=False, debug=False)

    measures = ["ResearchAndDevelopmentExpenseExcludingAcquiredInProcessCost"]
    _, _, yearly_rd_not_inprocess = get_values_from_measures(
        doc, measures, get_ttm=False, get_most_recent=False, debug=False)

    measures = ["ResearchAndDevelopmentInProcess"]
    _, _, yearly_rd_inprocess = get_values_from_measures(
        doc, measures, get_ttm=False, get_most_recent=False, debug=False)

    merge_subsets_yearly(yearly_rd, [yearly_rd_not_inprocess, yearly_rd_inprocess])

    #### Net Income ####
    measures = [
        "NetIncomeLoss",
        "NetIncomeLossAvailableToCommonStockholdersBasic",
        "NetIncomeLossAvailableToCommonStockholdersDiluted",
        "ComprehensiveIncomeNetOfTax",
        "IncomeLossFromContinuingOperations",

        # including minority interest
        "ProfitLoss",
        "IncomeLossFromContinuingOperationsIncludingPortionAttributableToNoncontrollingInterest",
        "IncomeLossFromSubsidiariesNetOfTax"
    ]

    _, ttm_net_income, yearly_net_income = get_values_from_measures(doc, measures, get_most_recent=False, debug=False)

    #### Interest Expenses ####
    measures = [
        "InterestExpense",
        "InterestAndDebtExpense",
        "InterestPaid",
        "InterestPaidNet",
        "InterestCostsIncurred"]

    _, ttm_interest_expenses, _ = get_values_from_measures(doc, measures, get_most_recent=False, get_yearly=False,
                                                                                  debug=False)

    if ttm_interest_expenses == 0:

        measures = ["InterestExpenseBorrowings"]
        _, ttm_ie_borrowings, _ = get_values_from_measures(doc, measures, get_most_recent=False, get_yearly=False,
                                                           debug=False)

        if ttm_ie_borrowings["value"] == 0:

            measures = ["InterestExpenseDebt",
                        "InterestExpenseDebtExcludingAmortization"]
            _, ttm_ie_debt, _ = get_values_from_measures(doc, measures, get_most_recent=False, get_yearly=False,
                                                                      debug=False)

            if ttm_ie_debt["value"] == 0:

                measures = ["InterestExpenseLongTermDebt"]
                _, ttm_ie_debt_lt, _ = get_values_from_measures(doc, measures, get_most_recent=False, get_yearly=False,
                                                                debug=False)
                measures = ["InterestExpenseShortTermBorrowings"]
                _, ttm_ie_debt_st, _ = get_values_from_measures(doc, measures, get_most_recent=False, get_yearly=False,
                                                                debug=False)

                merge_subsets_most_recent(ttm_ie_debt, [ttm_ie_debt_lt, ttm_ie_debt_st])


            measures = ["InterestExpenseDeposits"]
            _, ttm_ie_deposits, _ = get_values_from_measures(doc, measures, get_most_recent=False, get_yearly=False,
                                                                              debug=False)
            measures = ["InterestExpenseOther"]
            _, ttm_ie_others, _ = get_values_from_measures(doc, measures, get_most_recent=False, get_yearly=False,
                                                                          debug=False)
            measures = ["InterestExpenseRelatedParty"]
            _, ttm_ie_related, _ = get_values_from_measures(doc, measures, get_most_recent=False, get_yearly=False,
                                                                            debug=False)

            ttm_ie_borrowings = merge_subsets_most_recent(ttm_ie_borrowings,
                                                          [ttm_ie_debt, ttm_ie_deposits, ttm_ie_others, ttm_ie_related])

        ttm_interest_expenses = ttm_ie_borrowings

    measures = ["Gross Profit"]
    _, ttm_gross_profit, yearly_gross_profit = get_values_from_measures(doc, measures, get_most_recent=False,
                                                                        debug=False)

    #### Depreciation ####
    measures = [
        "DepreciationDepletionAndAmortization",
        "DepreciationAmortizationAndAccretionNet"]
    _, _, yearly_depreciation_amortization = get_values_from_measures(doc, measures, get_most_recent=False, get_ttm=False,
                                                                                                  debug=False)

    measures = ["Depreciation"]
    _, _, yearly_depreciation = get_values_from_measures(doc, measures, get_most_recent=False, get_ttm=False,
                                                                        debug=False)

    measures = ["AmortizationOfFinancingCostsAndDiscounts"]
    _, _, yearly_amortization_fincost_disc = get_values_from_measures(doc, measures, get_most_recent=False, get_ttm=False,
                                                                                                  debug=False)

    measures = ["AmortizationOfDebtDiscountPremium"]
    _, _, yearly_amortization_disc = get_values_from_measures(doc, measures, get_most_recent=False, get_ttm=False,
                                                                                  debug=False)
    measures = ["AmortizationOfFinancingCosts"]
    _, _, yearly_amortization_fincost = get_values_from_measures(doc, measures, get_most_recent=False, get_ttm=False,
                                                                                        debug=False)

    merge_subsets_yearly(yearly_amortization_fincost_disc, [yearly_amortization_disc, yearly_amortization_fincost])

    measures = ["AmortizationOfDeferredCharges"]
    _, _, yearly_amortization_charges = get_values_from_measures(doc, measures, get_most_recent=False, get_ttm=False,
                                                                                        debug=False)
    measures = ["AmortizationOfDeferredSalesCommissions"]
    _, _, yearly_amortization_comm = get_values_from_measures(doc, measures, get_most_recent=False, get_ttm=False,
                                                                                  debug=False)
    measures = ["AmortizationOfIntangibleAssets"]
    _, _, yearly_amortization_intan = get_values_from_measures(doc, measures, get_most_recent=False, get_ttm=False,
                                                                                    debug=False)

    yearly_amortization = {"dates":[], "values":[]}
    merge_subsets_yearly(yearly_amortization, [yearly_amortization_fincost_disc, yearly_amortization_charges,
                                               yearly_amortization_comm, yearly_amortization_intan])
    merge_subsets_yearly(yearly_depreciation_amortization, [yearly_depreciation, yearly_amortization])

    #### EBIT ####
    measures = ["OperatingIncomeLoss",
                "IncomeLossFromContinuingOperationsBeforeInterestExpenseInterestIncomeIncomeTaxesExtraordinaryItemsNoncontrollingInterestsNet",
                "IncomeLossFromContinuingOperationsBeforeIncomeTaxesMinorityInterestAndIncomeLossFromEquityMethodInvestments",
                "IncomeLossFromContinuingOperationsBeforeIncomeTaxesExtraordinaryItemsNoncontrollingInterest",
                "IncomeLossFromContinuingOperationsBeforeIncomeTaxesForeign",
                "IncomeLossFromContinuingOperationsBeforeIncomeTaxesDomestic",
                ]
    _, ttm_ebit, yearly_ebit = get_values_from_measures(doc, measures, get_most_recent=False,
                                                        debug=False)

    return {
        "ttm_revenue": ttm_revenue,
        "ttm_gross_profit": ttm_gross_profit,
        "ttm_ebit": ttm_ebit,
        "ttm_net_income": ttm_net_income,
        "ttm_interest_expenses": ttm_interest_expenses,
        "revenue": yearly_revenue,
        "gross_profit": yearly_gross_profit,
        "rd": yearly_rd,
        "ebit": yearly_ebit,
        "depreciation": yearly_depreciation_amortization,
        "net_income": yearly_net_income,
        "last_annual_report_date": last_annual_report_date,
        "last_annual_report_fy": last_annual_report_fy
    }

(5)编写函数xtract_balance_sheet_current_assets,从财务文件中提取资产负债表中的流动资产信息,通过提取和整合这些信息,可以帮助分析者了解公司的流动资产结构,评估其财务健康状况,并进行进一步的财务分析和预测。

def extract_balance_sheet_current_assets(doc, quarter_of_annual_report, years_diff):
    measures = ["CashCashEquivalentsRestrictedCashAndRestrictedCashEquivalents"]
    most_recent_cash_and_restricted, _, yearly_cash_and_restricted = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, debug=False)

    measures = ["CashAndCashEquivalentsAtCarryingValue", "Cash"]
    most_recent_cash, _, yearly_cash = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, debug=False)

    measures = [
        "RestrictedCashAndCashEquivalentsAtCarryingValue",
        "RestrictedCashAndCashEquivalents",
        "RestrictedCash",
        "RestrictedCashAndInvestmentsCurrent",
        "RestrictedCashCurrent"
    ]
    most_recent_restrictedcash, _, yearly_restrictedcash = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, debug=False)

    merge_subsets_yearly(yearly_cash_and_restricted, [yearly_cash, yearly_restrictedcash], must_include=(0,))

    if most_recent_cash_and_restricted["date"] is None \
            or (most_recent_cash["date"] is not None and most_recent_cash["date"] > most_recent_cash_and_restricted["date"]):
        most_recent_cash_and_restricted["date"] = most_recent_cash["date"]
        most_recent_cash_and_restricted["value"] = most_recent_cash["value"]

        if most_recent_restrictedcash["date"] == most_recent_cash["date"]:
            most_recent_cash_and_restricted["value"] += most_recent_restrictedcash["value"]

    #### Inventory ####
    measures = [
        "InventoryNet",
        "InventoryGross",
        "FIFOInventoryAmount",
        "InventoryLIFOReserve",
        "LIFOInventoryAmount",
    ]
    most_recent_inventory, _, yearly_inventory = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, debug=False)

    measures = [
        "RetailRelatedInventory",
        "RetailRelatedInventoryMerchandise"
    ]
    most_recent_inventory_retail, _, yearly_inventory_retail = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, debug=False)

    measures = [
        "EnergyRelatedInventory"
    ]
    most_recent_inventory_energy, _, yearly_inventory_energy = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, debug=False)

    measures = [
        "PublicUtilitiesInventory"
    ]
    most_recent_inventory_utilities, _, yearly_inventory_utilities = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, debug=False)

    measures = [
        "InventoryRealEstate"
    ]
    most_recent_inventory_re, _, yearly_inventory_re = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, debug=False)

    measures = [
        "AirlineRelatedInventory"
    ]
    most_recent_inventory_airline, _, yearly_inventory_airline = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, debug=False)

    merge_subsets_most_recent(most_recent_inventory,
                              [most_recent_inventory_retail, most_recent_inventory_airline,
                               most_recent_inventory_energy, most_recent_inventory_re, most_recent_inventory_utilities])
    merge_subsets_yearly(yearly_inventory, [yearly_inventory_retail, yearly_inventory_airline, yearly_inventory_energy,
                                            yearly_inventory_re, yearly_inventory_utilities])

    #### Other Assets ####
    measures = [
        "OtherAssetsCurrent",
        "OtherAssetsMiscellaneousCurrent",
        "PrepaidExpenseAndOtherAssetsCurrent",
        "OtherAssetsFairValueDisclosure",
        "OtherAssetsMiscellaneous",
        "PrepaidExpenseAndOtherAssets"
    ]
    most_recent_other_current_assets, _, yearly_other_current_assets = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, debug=False)

    measures = ["PrepaidExpenseCurrent"]
    most_recent_prepaid_exp, _, yearly_prepaid_exp = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, debug=False)
    measures = ["PrepaidInsurance"]
    most_recent_prepaid_ins, _, yearly_prepaid_ins = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, debug=False)
    measures = ["PrepaidTaxes",
                "IncomeTaxesReceivable",
                "IncomeTaxReceivable"]
    most_recent_prepaid_tax, _, yearly_prepaid_tax = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, get_yearly=False, debug=False)
    merge_subsets_yearly(yearly_other_current_assets, [yearly_prepaid_exp, yearly_prepaid_ins, yearly_prepaid_tax])

    merge_subsets_most_recent(most_recent_other_current_assets,
                              [most_recent_prepaid_exp, most_recent_prepaid_ins, most_recent_prepaid_tax])

    #### Receivables ####
    measures = [
        "AccountsAndOtherReceivablesNetCurrent",
        "AccountsNotesAndLoansReceivableNetCurrent",
        "ReceivablesNetCurrent",
        "NontradeReceivablesCurrent"]
    most_recent_receivables, _, yearly_receivables = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, debug=False)

    measures = ["AccountsReceivableNetCurrent",
                "AccountsReceivableNet",
                "AccountsReceivableGrossCurrent",
                "AccountsReceivableGross"]
    most_recent_ar, _, yearly_ar = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, debug=False)

    measures = ["LoansAndLeasesReceivableNetReportedAmount",
                "LoansAndLeasesReceivableNetOfDeferredIncome",
                "LoansReceivableFairValueDisclosure",
                "LoansAndLeasesReceivableGrossCarryingAmount"]
    most_recent_loans_rec, _, yearly_loans_rec = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, debug=False)

    measures = ["NotesReceivableNet",
                "NotesReceivableFairValueDisclosure",
                "NotesReceivableGross"]
    most_recent_notes_rec, _, yearly_notes_rec = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, debug=False)

    merge_subsets_yearly(yearly_receivables, [yearly_ar, yearly_loans_rec, yearly_notes_rec])
    merge_subsets_most_recent(most_recent_receivables,
                              [most_recent_ar, most_recent_loans_rec, most_recent_notes_rec])

    #### Securities ####
    measures = [
        "MarketableSecurities"
        "AvailableForSaleSecurities"]
    most_recent_securities, _, yearly_securities = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, debug=False)

    measures = ["AvailableForSaleSecuritiesDebtSecurities"]
    most_recent_debtsecurities, _, yearly_debtsecurities = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, debug=False)

    measures = ["AvailableForSaleSecuritiesEquitySecurities"]
    most_recent_equitysecurities, _, yearly_equitysecurities = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, debug=False)

    merge_subsets_yearly(yearly_securities, [yearly_debtsecurities, yearly_equitysecurities])
    merge_subsets_most_recent(most_recent_securities,
                              [most_recent_debtsecurities, most_recent_equitysecurities])

    measures = ["DerivativeAssets",
                "DerivativeAssetsCurrent"]
    most_recent_derivatives, _, yearly_derivatives = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, debug=False)

    measures = ["HeldToMaturitySecurities",
                "HeldToMaturitySecuritiesFairValue",
                "HeldToMaturitySecuritiesCurrent",
                ]
    most_recent_held_securities, _, yearly_held_securities = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, debug=False)

    measures = ["AvailableForSaleSecuritiesNoncurrent",
                "AvailableForSaleSecuritiesDebtSecuritiesNoncurrent",
                ]
    most_recent_non_curr_sec, _, yearly_non_curr_sec = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, debug=False)

    measures = ["MarketableSecuritiesCurrent",
                "AvailableForSaleSecuritiesDebtSecuritiesCurrent"]
    most_recent_marksecurities_cur, _, yearly_marksecurities_cur = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, debug=False)

    measures = ["ShortTermInvestments"]
    most_recent_st_inv, _, yearly_st_inv = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, debug=False)

    measures = ["MoneyMarketFundsAtCarryingValue"]
    most_recent_mm, _, yearly_mm = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, debug=False)

    merge_subsets_yearly(yearly_securities, [yearly_derivatives, yearly_held_securities, yearly_non_curr_sec,
                                             yearly_marksecurities_cur, yearly_st_inv, yearly_mm])
    merge_subsets_most_recent(most_recent_securities,
                              [most_recent_derivatives, most_recent_held_securities, most_recent_non_curr_sec,
                               most_recent_marksecurities_cur, most_recent_st_inv, most_recent_mm])

    return {
        "mr_cash": most_recent_cash_and_restricted,
        "cash": yearly_cash_and_restricted,
        "mr_inventory": most_recent_inventory,
        "inventory": yearly_inventory,
        "mr_other_assets": most_recent_other_current_assets,
        "other_assets": yearly_other_current_assets,
        "mr_receivables": most_recent_receivables,
        "receivables": yearly_receivables,
        "mr_securities": most_recent_securities,
        "securities": yearly_securities
    }

上述代码的功能如下:

  1. 提取现金和受限制的现金流,并与年度数据合并。
  2. 提取库存信息,包括不同类型的库存(如零售、能源相关、公用事业等),并与年度数据合并。
  3. 提取其他流动资产信息,如预付费用、其他杂项资产等,并与年度数据合并。
  4. 提取应收账款信息,包括账款、贷款、票据等,并与年度数据合并。
  5. 提取证券信息,包括可供交易证券、持有至到期投资、衍生工具等,并与年度数据合并。

(6)编写函数extract_balance_sheet_noncurrent_assets,用于从财务文件中提取资产负债表中的非流动资产信息,包括:

  1. 股权投资信息,包括权益法投资等。
  2. 其他金融资产信息,如长期预付费用、受限制的现金等。
  3. 不动产、厂房及设备(PP&E)信息。
  4. 投资性房地产信息,包括房地产投资及土地等。
  5. 税务优惠信息,如未确认的税收优惠和应收非流动所得税等。

通过提取这些数据,可以帮助分析者了解公司长期资产的结构和规模,评估其长期投资策略和未来发展潜力,以及了解其税务风险和优惠情况。

def extract_balance_sheet_noncurrent_assets(doc, quarter_of_annual_report, years_diff):
    #### Equity Investments ####
    measures = [
        "EquityMethodInvestmentAggregateCost",
        "EquityMethodInvestments",
        "InvestmentOwnedAtCost",
        "Investments",
        "InvestmentsInAffiliatesSubsidiariesAssociatesAndJointVentures",
    ]
    most_recent_equity_investments, _, _ = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, get_yearly=False,
        debug=False)

    measures = [
        "EquityMethodInvestmentsFairValueDisclosure",
        "InvestmentOwnedAtFairValue",
        "InvestmentsFairValueDisclosure",
    ]
    most_recent_equity_inv_fv, _, _ = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, get_yearly=False, debug=False)

    measures = ["EquitySecuritiesWithoutReadilyDeterminableFairValueAmount", ]
    most_recent_equity_inv_notfv, _, _ = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, get_yearly=False, debug=False)

    # merge_subsets_yearly(yearly_equity_investments, [yearly_equity_inv_fv, yearly_equity_inv_notfv])
    merge_subsets_most_recent(most_recent_equity_investments,
                              [most_recent_equity_inv_fv, most_recent_equity_inv_notfv])

    measures = ["MarketableSecuritiesNoncurrent"]
    most_recent_securities_non_curr, _, _ = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, get_yearly=False, debug=False)

    merge_subsets_most_recent(most_recent_equity_investments, [most_recent_securities_non_curr])

    #### Other financial assets ####
    measures = [
        "PrepaidExpenseNoncurrent",
        "PrepaidExpenseOtherNoncurrent",
    ]
    most_recent_prepaid_non_curr, _, _ = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, get_yearly=False, debug=False)

    measures = [
        "RestrictedCashAndCashEquivalentsNoncurrent",
        "RestrictedCashAndInvestmentsNoncurrent",
        "RestrictedCashNoncurrent"
    ]
    most_recent_cash_non_curr, _, _ = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, get_yearly=False, debug=False)

    measures = ["DerivativeAssetsNoncurrent", ]
    most_recent_derivatives_non_curr, _, _ = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, get_yearly=False, debug=False)

    measures = ["EscrowDeposit"]
    most_recent_escrow, _, _ = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, get_yearly=False, debug=False)

    most_recent_other_financial_assets = {"date":None, "value":0}
    merge_subsets_most_recent(most_recent_other_financial_assets,
                              [most_recent_prepaid_non_curr, most_recent_cash_non_curr,
                               most_recent_derivatives_non_curr, most_recent_escrow])

    #### PP&E ####
    measures = [
        "PropertyPlantAndEquipmentNet",
        "PropertyPlantAndEquipmentAndFinanceLeaseRightOfUseAssetAfterAccumulatedDepreciationAndAmortization"
    ]
    most_recent_ppe, _, _ = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, get_yearly=False, debug=False)

    #### Investment property ####
    measures = [
        "RealEstateInvestments",
        "RealEstateInvestmentPropertyNet",
        "RealEstateInvestmentPropertyAtCost",
        "RealEstateHeldforsale"
    ]
    most_recent_property, _, _ = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, get_yearly=False, debug=False)

    measures = ["InvestmentBuildingAndBuildingImprovements"]
    most_recent_buildings, _, _ = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, get_yearly=False, debug=False)

    measures = [
        "LandAndLandImprovements",
        "Land",
    ]
    most_recent_land, _, _ = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, get_yearly=False, debug=False)
    merge_subsets_most_recent(most_recent_property,
                              [most_recent_buildings, most_recent_land])
    #### Tax Benefits ####
    measures = [
        "UnrecognizedTaxBenefits",
        "UnrecognizedTaxBenefitsThatWouldImpactEffectiveTaxRate",
        "IncomeTaxesReceivableNoncurrent",
    ]
    most_recent_tax_benefit, _, _ = get_values_from_measures(
        doc, measures, instant=True, quarter_of_annual_report=quarter_of_annual_report, years_diff=years_diff,
        get_ttm=False, get_yearly=False, debug=False)

    return {
        "mr_equity_investments": most_recent_equity_investments,
        "mr_other_financial_assets": most_recent_other_financial_assets,
        "mr_ppe": most_recent_ppe,
        "mr_investment_property": most_recent_property,
        "mr_tax_benefits": most_recent_tax_benefit
    }

未完待续

  • 11
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

码农三叔

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值