目录
已有数据准备:
1.公司所对应的行业
2.各个公司每天对应的风险值
步骤一:将公司按行业进行分类
最终得出以行业为键,公司列表为值的字典
import pandas as pd
excel_path = XX
# 证券按行业分类
def classify_securities_by_industry(excel_path, industry_column='行业', abbreviation_column='证券简称', total_capital_column='总股本', min_count=5):
# 读取Excel文件
df = pd.read_excel(excel_path)
# 按行业属性分类
grouped_by_industry = df.groupby(industry_column)
# 初始化字典用于存储结果
industry_data = {}
# 遍历分组并筛选出数量大于等于min_count的行业
for industry, group in grouped_by_industry:
# 按照总股本列进行排序
sorted_group = group.sort_values(by=total_capital_column, ascending=False)
# 获取前5个证券简称
top_securities = sorted_group.head(5)[abbreviation_column].tolist()
# 如果数量大于等于min_count,存储结果
if len(top_securities) >= min_count:
industry_data[industry] = {'securities': top_securities}
return industry_data
# 示例使用
industry_column_name = '行业' # 替换为Excel中的行业列名
abbreviation_column_name = '证券简称' # 替换为Excel中的证券简称列名
total_capital_column_name = '总股本' # 替换为Excel中的总股本列名
excel_path = XX
result = classify_securities_by_industry(excel_path, industry_column_name, abbreviation_column_name, total_capital_column_name, min_count=5)
# 打印结果
for industry, data in result.items():
print(f"行业: {industry}, 前5个证券简称: {data['securities']}")
#运行结果:
#行业: A03 畜牧业, 前5个证券简称: ['温氏股份', '牧原股份', '天邦食品', '圣农发展', '立华股份']
步骤二:将所对应行业的公司的风险数据合并
import Kind
import pandas as pd
industry_column_name = '行业' # 替换为Excel中的行业列名
abbreviation_column_name = '证券简称' # 替换为Excel中的证券简称列名
total_capital_column_name = '总股本' # 替换为Excel中的总股本列名
excel_path = XX
result = Kind.classify_securities_by_industry(excel_path, industry_column_name, abbreviation_column_name, total_capital_column_name, min_count=5)
# 打印结果
for industry, data in result.items():
print(f"行业: {industry}, 前5个证券简称: {data['securities']}")
def CompanySelect(industry):
if industry in result.keys():
companies = result[industry]['securities'] # 获取特定行业的前五个公司列表
return companies[:5]
else:
return False
def DataCombine(companies):
df1 = pd.read_csv(XX)
df2 = pd.read_csv(XX)
df3 = pd.read_csv(XX)
df4 = pd.read_csv(XX)
df5 = pd.read_csv(XX)
column0 = df1.iloc[:, 0]
column1 = df1.iloc[:, 1]
column2 = df2.iloc[:, 1]
column3 = df3.iloc[:, 1]
column4 = df4.iloc[:, 1]
column5 = df5.iloc[:, 1]
df = pd.concat([column0, column1, column2, column3, column4, column5], axis=1)
df.columns = ["日期","指数1","指数2","指数3","指数4","指数5"]
return df
companies = CompanySelect('A03 畜牧业')
print(companies)
print(DataCombine(companies))
步骤三:折线图的绘制
import pandas as pd
import Kind
import DataPrepare
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif']=['SimHei']
#生成行业-公司对应字典
industry_column_name = '行业' # 替换为Excel中的行业列名
abbreviation_column_name = '证券简称' # 替换为Excel中的证券简称列名
total_capital_column_name = '总股本' # 替换为Excel中的总股本列名
excel_path = r"D:\大学学习资料\大二上学习资料\花旗杯\折线图\案例-深证\指数样本股.xlsx"
result = Kind.classify_securities_by_industry(excel_path, industry_column_name, abbreviation_column_name, total_capital_column_name, min_count=5)
# 打印结果
for industry, data in result.items():
print(f"行业: {industry}, 前5个证券简称: {data['securities']}")
#获取公司数据
companies = DataPrepare.CompanySelect('A03 畜牧业')
print(companies)
df_companies = DataPrepare.DataCombine(companies)
#画折线图
def Linechart(companies,df):
# # 将日期列转换为日期时间对象,并按照日期进行排序
# df['日期'] = pd.to_datetime(df['日期'], format='%Y%m%d')
# df.sort_values(by='日期', inplace=True)
plt.plot(df["日期"],df["指数1"],label=companies[0],linewidth=3,color='green',marker='o')
plt.plot(df["日期"],df["指数2"],label=companies[1],linewidth=3,color='deepskyblue',marker='o')
plt.plot(df["日期"], df["指数3"], label=companies[2], linewidth=3, color='mediumblue', marker='o')
plt.plot(df["日期"], df["指数4"], label=companies[3], linewidth=3, color='grey', marker='o')
plt.plot(df["日期"], df["指数5"], label=companies[4], linewidth=3, color='black', marker='o')
plt.xlabel("日期")
plt.ylabel('风险值')
plt.title("风险值折线图")
plt.legend()
plt.grid()
plt.show()
print(Linechart(companies,df_companies))