【无标题】

最新推荐文章于 2024-07-15 15:37:59 发布

白了又了白之小白

最新推荐文章于 2024-07-15 15:37:59 发布

阅读量42

点赞数

文章标签： python

本文链接：https://blog.csdn.net/m0_37579507/article/details/132279477

版权

#!-- coding:utf-8 --

from urllib import request
import ssl

ssl._create_default_https_context = ssl._create_unverified_context
from scrapy.selector import Selector
from scrapy.http import HtmlResponse
import json
import os
import time

def main():
codes = {
“000977”,
“002230”,
“603019”,
“300457”,
“002049”,
}
syear, eyear = 0, 6
detail_dir = ‘./’
overwrite = False

for code in codes:
    detail_url = "https://money.finance.sina.com.cn/corp/go.php/vFD_BalanceSheet/stockid/%s/ctrl/%s/displaytype/4.phtml"
    tr_xpath = '//*[@id="BalanceSheetNewTable0"]/tbody/tr'
    data_sign = 'balance_sheet'
    scrapy_finance_indices(code, detail_url, syear, eyear, tr_xpath, data_sign, detail_dir, overwrite)

def scrapy_finance_indices(code, detail_url, syear, eyear, tr_xpath, data_sign, detail_dir, overwrite):
res_file = “%s/%s_%s.json” % (detail_dir, data_sign, code)
if not overwrite and os.path.isfile(res_file):
print(“skip scrapy, result exist in:” + res_file)
return False

if syear >= eyear:
    print("skip scrapy, no vaild year range (%d, %d)" % (syear, eyear))
    return False

# year parameters
pdate_list = []
nyear = int(time.strftime('%Y', time.localtime()))
for i in range(syear, eyear):
    pdate_list.append(str(nyear - i))

date_dict = {}

for year_parameter in pdate_list:
    resp = request.urlopen(detail_url % (code, year_parameter), timeout=30)
    print("request url:" + resp.geturl())
    rt_code = resp.getcode()
    if rt_code == 200:
        content = resp.read()
        response = HtmlResponse(url=detail_url, body=content)
        sel = Selector(response=response)
        tr_list_sel = sel.xpath(tr_xpath)

        header_title = None
        first_val = None
        for tr_sel in tr_list_sel:
            td_sel = tr_sel.xpath('td//text()').extract()
            if not td_sel:
                continue

            title_key = td_sel[0]
            # one column means parent level title
            if len(td_sel) == 1:
                header_title = title_key
                continue

            # parse columns to val_list
            val_list = []
            for i in range(1, len(td_sel)):
                val_list.append(td_sel[i])

            # first row is report date row and finance data rows following
            if not first_val:
                first_val = val_list
            else:
                for i in range(0, len(val_list)):
                    info_dict = date_dict.get(first_val[i], {})
                    date_dict[first_val[i]] = info_dict
                    if header_title:
                        header_dict = info_dict.get(header_title, {})
                        info_dict[header_title] = header_dict
                    else:
                        header_dict = info_dict

                    header_dict[title_key] = val_list[i].strip()

        write_file(date_dict, res_file)

    else:
        print("error return code: %s" % rt_code)
return True

def write_file(info_dict, res_file):
if info_dict:
try:
fb = open(res_file, ‘w’, encoding=‘utf-8’)
fb.write(json.dumps(info_dict) + ‘\n’)
except IOError as err:
print(‘IO Error:’, err)
else:
fb.close()

if name == ‘main’:
main()

11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111

import pandas as pd
from sklearn.linear_model import LinearRegression
import datetime

clear_data = []

def get_risk_score_by_json(json_filename, next_date):
raw_data = pd.read_json(json_filename, encoding=‘utf-8’)
data_df = pd.DataFrame()

for date, items in raw_data.items():
    # 将数据转换为DataFrame
    data = pd.DataFrame({
        '日期': [date],
        '流动资产合计': [float(raw_data[date]['流动资产']['流动资产合计'].replace(',', ''))],
        '非流动资产合计': [float(raw_data[date]['非流动资产']['非流动资产合计'].replace(',', ''))],
        '资产总计': [float(raw_data[date]['非流动资产']['资产总计'].replace(',', ''))],
        '流动负债合计': [float(raw_data[date]['流动负债']['流动负债合计'].replace(',', ''))],
        '非流动负债合计': [float(raw_data[date]['非流动负债']['非流动负债合计'].replace(',', ''))],
        '负债合计': [float(raw_data[date]['非流动负债']['负债合计'].replace(',', ''))],
        '存货': [float(raw_data[date]['流动资产']['存货'].replace(',', ''))],
    })
    clear_data.append(data)
    # 计算资产负债率
    data['资产负债率'] = data['负债合计'] / data['资产总计']
    # 计算流动比率
    data['流动比率'] = data['流动资产合计'] / data['流动负债合计']
    # 计算速动比率
    data['速动比率'] = (data['流动资产合计'] - data['存货']) / data['流动负债合计']

    # selected_indicators = ['资产负债率', '流动比率', '速动比率']
    # # 计算评分指标得分
    # weights = [0.4, 0.3, 0.3]  # 指标权重
    # for indicator in selected_indicators:
    #     data[indicator + '_Score'] = data[indicator] * weights[selected_indicators.index(indicator)]
    #
    # # 建立评分模型
    # data['Risk_Score'] = data[selected_indicators[0] + '_Score'] + data[selected_indicators[1] + '_Score'] + data[
    #     selected_indicators[2] + '_Score']
    data['Risk_Score'] = data['资产负债率'] * 0.5 + data['流动比率'] * 0.3 + data['速动比率']*0.2

    # print("data_df：", data)
    # 打印风险评分
    # print(data[['日期', 'Risk_Score']])
    # 将日期列转换为数值型
    data['日期n'] = data['日期'].apply(lambda x: x.timestamp())
    # 合并dataframe
    data_df = pd.concat([data_df, data])

# 划分训练集和测试集
x_train = data_df[['日期n']]
y_train = data_df['Risk_Score']

# 将日期时间类型转换为数字类型
x_test = pd.DataFrame({'日期n': [datetime.datetime.strptime(next_date, '%Y-%m-%d').timestamp()]})

# 训练线性回归模型
model = LinearRegression()
model.fit(x_train, y_train)

# 预测下一个日期的Risk_Score
next_date_score = model.predict(x_test)

# print("下一个日期的Risk_Score预测值：", next_date_score)
return next_date_score

def main():
codes = {
“000977”: [“浪潮信息”, 0],
“002230”: [“科大讯飞”, 0],
“603019”: [“中科曙光”, 0],
“300457”: [“赢合科技”, 0],
“002049”: [“紫光国微”, 0],
}
corporate_risk_score = {}
for code, items in codes.items():
corporate_risk_score[items[0] + “(” + code + “)”] = get_risk_score_by_json(‘balance_sheet_’ + code + ‘.json’,
“2023-6-30”)[0]
# print(corporate_risk_score)
sorted_dict = sorted(corporate_risk_score.items(), key=lambda x: x[1])
for k, v in sorted_dict:
print(k, v)

if name == ‘main’:
main()

11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111

#!-- coding:utf-8 --

from urllib import request
import ssl

ssl._create_default_https_context = ssl._create_unverified_context
from scrapy.selector import Selector
from scrapy.http import HtmlResponse
import json
import os
import time

def main():
codes = {
“000977”,
“002230”,
“603019”,
“300457”,
“002049”,
}
syear, eyear = 0, 6
detail_dir = ‘./’
overwrite = False

for code in codes:
    detail_url = "https://money.finance.sina.com.cn/corp/go.php/vFD_BalanceSheet/stockid/%s/ctrl/%s/displaytype/4.phtml"
    tr_xpath = '//*[@id="BalanceSheetNewTable0"]/tbody/tr'
    data_sign = 'balance_sheet'
    scrapy_finance_indices(code, detail_url, syear, eyear, tr_xpath, data_sign, detail_dir, overwrite)

if syear >= eyear:
    print("skip scrapy, no vaild year range (%d, %d)" % (syear, eyear))
    return False

# year parameters
pdate_list = []
nyear = int(time.strftime('%Y', time.localtime()))
for i in range(syear, eyear):
    pdate_list.append(str(nyear - i))

date_dict = {}

for year_parameter in pdate_list:
    resp = request.urlopen(detail_url % (code, year_parameter), timeout=30)
    print("request url:" + resp.geturl())
    rt_code = resp.getcode()
    if rt_code == 200:
        content = resp.read()
        response = HtmlResponse(url=detail_url, body=content)
        sel = Selector(response=response)
        tr_list_sel = sel.xpath(tr_xpath)

        header_title = None
        first_val = None
        for tr_sel in tr_list_sel:
            td_sel = tr_sel.xpath('td//text()').extract()
            if not td_sel:
                continue

            title_key = td_sel[0]
            # one column means parent level title
            if len(td_sel) == 1:
                header_title = title_key
                continue

            # parse columns to val_list
            val_list = []
            for i in range(1, len(td_sel)):
                val_list.append(td_sel[i])

            # first row is report date row and finance data rows following
            if not first_val:
                first_val = val_list
            else:
                for i in range(0, len(val_list)):
                    info_dict = date_dict.get(first_val[i], {})
                    date_dict[first_val[i]] = info_dict
                    if header_title:
                        header_dict = info_dict.get(header_title, {})
                        info_dict[header_title] = header_dict
                    else:
                        header_dict = info_dict

                    header_dict[title_key] = val_list[i].strip()

        write_file(date_dict, res_file)

    else:
        print("error return code: %s" % rt_code)
return True

if name == ‘main’:
main()

11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111

import pandas as pd
from sklearn.linear_model import LinearRegression
import datetime

clear_data = []

def get_risk_score_by_json(json_filename, next_date):
raw_data = pd.read_json(json_filename, encoding=‘utf-8’)
data_df = pd.DataFrame()

for date, items in raw_data.items():
    # 将数据转换为DataFrame
    data = pd.DataFrame({
        '日期': [date],
        '流动资产合计': [float(raw_data[date]['流动资产']['流动资产合计'].replace(',', ''))],
        '非流动资产合计': [float(raw_data[date]['非流动资产']['非流动资产合计'].replace(',', ''))],
        '资产总计': [float(raw_data[date]['非流动资产']['资产总计'].replace(',', ''))],
        '流动负债合计': [float(raw_data[date]['流动负债']['流动负债合计'].replace(',', ''))],
        '非流动负债合计': [float(raw_data[date]['非流动负债']['非流动负债合计'].replace(',', ''))],
        '负债合计': [float(raw_data[date]['非流动负债']['负债合计'].replace(',', ''))],
        '存货': [float(raw_data[date]['流动资产']['存货'].replace(',', ''))],
    })
    clear_data.append(data)
    # 计算资产负债率
    data['资产负债率'] = data['负债合计'] / data['资产总计']
    # 计算流动比率
    data['流动比率'] = data['流动资产合计'] / data['流动负债合计']
    # 计算速动比率
    data['速动比率'] = (data['流动资产合计'] - data['存货']) / data['流动负债合计']

    # selected_indicators = ['资产负债率', '流动比率', '速动比率']
    # # 计算评分指标得分
    # weights = [0.4, 0.3, 0.3]  # 指标权重
    # for indicator in selected_indicators:
    #     data[indicator + '_Score'] = data[indicator] * weights[selected_indicators.index(indicator)]
    #
    # # 建立评分模型
    # data['Risk_Score'] = data[selected_indicators[0] + '_Score'] + data[selected_indicators[1] + '_Score'] + data[
    #     selected_indicators[2] + '_Score']
    data['Risk_Score'] = data['资产负债率'] * 0.5 + data['流动比率'] * 0.3 + data['速动比率']*0.2

    # print("data_df：", data)
    # 打印风险评分
    # print(data[['日期', 'Risk_Score']])
    # 将日期列转换为数值型
    data['日期n'] = data['日期'].apply(lambda x: x.timestamp())
    # 合并dataframe
    data_df = pd.concat([data_df, data])

# 划分训练集和测试集
x_train = data_df[['日期n']]
y_train = data_df['Risk_Score']

# 将日期时间类型转换为数字类型
x_test = pd.DataFrame({'日期n': [datetime.datetime.strptime(next_date, '%Y-%m-%d').timestamp()]})

# 训练线性回归模型
model = LinearRegression()
model.fit(x_train, y_train)

# 预测下一个日期的Risk_Score
next_date_score = model.predict(x_test)

# print("下一个日期的Risk_Score预测值：", next_date_score)
return next_date_score

if name == ‘main’:
main()

11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111

companies_data = {}
num_companies = 5
quarters_per_year = 20

for i in range(num_companies):
company_start_index = i * (quarters_per_year + 1)
company_end_index = company_start_index + quarters_per_year + 1
company_data = clear_data[company_start_index:company_end_index]
companies_data[f’Company {i+1}'] = company_data

打印每家公司的数据

for company, data in companies_data.items():
print(f"{company} Data:“)
for entry in data:
print(entry)
print(”=" * 50)

计算每年的负债率并存储结果

liabilities_ratios_by_year = {}

遍历每家公司的数据

for company, data_frames in companies_data.items():
# 创建一个字典来存储每年的负债合计和资产总计
yearly_liabilities = {}
yearly_assets = {}

# 遍历每个时间点的数据框
for df in data_frames:
    year = df['日期'].iloc[0].year  # 假设日期列名是这样的
    
    # 累加每年的负债合计和资产总计
    yearly_liabilities[year] = yearly_liabilities.get(year, 0) + df['负债合计'].iloc[0]
    yearly_assets[year] = yearly_assets.get(year, 0) + df['资产总计'].iloc[0]

# 计算每年的负债率并存储结果
liabilities_ratios_by_year[company] = {
    year: liabilities / assets if assets != 0 else None
    for year, liabilities in yearly_liabilities.items()
    for year, assets in yearly_assets.items()  # 使用两个for循环，遍历两个字典的键和值
}

打印计算结果

for company, ratios_by_year in liabilities_ratios_by_year.items():
print(f"{company} 的负债率如下:“)
for year, ratio in ratios_by_year.items():
print(f”{year}: {ratio}")
22222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222

import pandas as pd
import matplotlib.pyplot as plt

公司负债率数据

companies_data = {
‘000977’: {
2022: 0.33782028921390805,
2021: 0.38122553365530343,
2020: 0.4269912156296573,
2019: 0.6147255061188343,
2018: 0.6601938767030497
},
‘002230’: {
2022: 0.1937557406842251,
2021: 0.22159547006297672,
2020: 0.2717665662231457,
2019: 0.33819468982170936,
2018: 0.42798182086290537
},
‘603019’: {
2022: 0.25472321965381806,
2021: 0.30979914956635357,
2020: 0.41427850181554887,
2019: 0.45239789459122415,
2018: 0.6687705744490087
},
‘300457’: {
2022: 0.15622576789820164,
2021: 0.20946994930684334,
2020: 0.319094753398101,
2019: 0.40719460546266983,
2018: 0.48014811975158794
},
‘002049’: {
2022: 0.1308696989339981,
2021: 0.17703936060827447,
2020: 0.2518915792080338,
2019: 0.28782042409524033,
2018: 0.3256419971764099
}
}

创建折线图

plt.figure(figsize=(10, 6))
for company, ratios_by_year in companies_data.items():
years = list(ratios_by_year.keys())
ratios = list(ratios_by_year.values())
plt.plot(years, ratios, marker=‘o’, label=company)

plt.title(‘Asset Liability Ratio Trends’)
plt.xlabel(‘Year’)
plt.ylabel(‘Asset Liability Ratio’)
plt.legend()
plt.grid(True)
plt.show()

3.333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333

import json
import os

存储文件名和股票代码的映射关系

file_mapping = {
“000977”: “balance_sheet_000977.json”,
“002049”: “balance_sheet_002049.json”,
“002230”: “balance_sheet_002230.json”,
“300457”: “balance_sheet_300457.json”,
“603019”: “balance_sheet_603019.json”
}

存储每个股票的数据

stock_data = {}

循环读取数据

for stock_code, file_name in file_mapping.items():
file_path = os.path.join(“./”, file_name) # 替换为您的数据文件所在的目录
with open(file_path, ‘r’) as file:
data = json.load(file)
stock_data[stock_code] = data
data_list = []

遍历第一层（公司代码）

for company_code, quarters in stock_data.items():
# 遍历第二层（季度日期）和第三层（资产类型）
for quarter_date, assets in quarters.items():
# 检查是否有应收账款这个资产类型
if ‘应收账款’ in assets[‘流动资产’]:
# 提取公司代码、日期和应收账款的值
receivables_value = assets[‘流动资产’][‘应收账款’]
# 将日期格式转换为标准格式
formatted_date = quarter_date.replace(‘-’, ‘/’)
# 将提取的数据添加到列表中
data_list.append([company_code, formatted_date, receivables_value])
from pyecharts import options as opts
from pyecharts.charts import Bar
from pyecharts.globals import ThemeType

整理数据，构建 ECharts 数据格式

companies = set()
years = [2018, 2019, 2020, 2021, 2022] # 近五年的年份
series_data = {year: [] for year in years}

for entry in data_list:
company_code = entry[0]
year = int(entry[1].split(‘/’)[0])
receivables = float(entry[2].replace(‘,’, ‘’))

companies.add(company_code)
if year in years:
    series_data[year].append((company_code, receivables))

使用 pyecharts 绘制分组柱状图

bar = Bar(init_opts=opts.InitOpts(theme=ThemeType.LIGHT))
for year in years:
bar.add_xaxis([item[0] for item in series_data[year]])
bar.add_yaxis(f’{year}', [item[1] for item in series_data[year]])

bar.set_global_opts(
title_opts=opts.TitleOpts(title=“近五年每个季度的应收账款”),
xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=45)),
datazoom_opts=[opts.DataZoomOpts(range_start=0, range_end=100)],
toolbox_opts=opts.ToolboxOpts(),
)
bar.render(“bar_chart.html”) # 将图表保存为 HTML 文件

白了又了白之小白

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
【无标题】

import sslimport oscodes = {“000977”,“002230”,“603019”,“300457”,“002049”,try:else:fb.close()if== ‘’:main()codes = {“000977”: [“浪潮信息”, 0],“002230”: [“科大讯飞”, 0],“603019”: [“中科曙光”, 0],
复制链接

扫一扫