很久没更新了,我不太擅长利用琐碎的时间去做事情,不过寒假来了。
引入市值列之后就要对个股基于市值进行拆分,先搞定A50类的大盘票看看效果吧。
def add_real_market_value(code, date, threshold=0.002):
# 提取股票代码的前6位
format_code = code[0:6]
# 根据股票代码的前缀确定市场,拼接成 akshare 需要的格式
if format_code.startswith('0') or format_code.startswith('3'): # 深交所的股票代码通常以 0 或 3 开头
format_code = 'sz' + format_code
elif format_code.startswith('6'): # 上交所的股票代码通常以 6 开头
format_code = 'sh' + format_code
# 将 'YYYYMMDD' 格式转换为 'YYYY-MM-DD' 格式
if len(date) == 8:
date_str = f'{date[:4]}-{date[4:6]}-{date[6:]}'
date_obj = datetime.strptime(date_str, '%Y-%m-%d')
# 定义季度的时间节点
quarter_dates = [
(date_obj.year, 3, 31),
(date_obj.year, 6, 30),
(date_obj.year, 9, 30),
(date_obj.year, 12, 31),
(date_obj.year - 1, 12, 31), # 跨年的12月31日
(date_obj.year - 1, 9, 30),
(date_obj.year - 1, 6, 30),
(date_obj.year - 1, 3, 31),
]
# 把日期对象转换为 datetime 对象
quarter_dates = [datetime(y, m, d) for y, m, d in quarter_dates]
# 过滤出比传入的 date 小的季度末节点,取最近的 3 个
previous_dates = [d for d in quarter_dates if d < date_obj][:3]
if len(previous_dates) < 3:
raise ValueError("无法找到足够的季度末日期")
# 将 datetime 对象转换为字符串格式
time1, time2, time3 = [d.strftime('%Y%m%d') for d in previous_dates]
# 查询最近三次报告的股东数据,使用 akshare 的接口获取十大股东信息
df1 = ak.stock_gdfx_free_top_10_em(symbol=format_code, date=time1)
df2 = ak.stock_gdfx_free_top_10_em(symbol=format_code, date=time2)
df3 = ak.stock_gdfx_free_top_10_em(symbol=format_code, date=time3)
# 将所有年份的股东信息合并
merged_df = pd.merge(df1, df2, on='股东名称', suffixes=('_year1', '_year2'))
merged_df = pd.merge(merged_df, df3, on='股东名称')
merged_df.rename(columns={'占总流通股本持股比例': 'share_ratio_year3'}, inplace=True)
# 计算每两年之间的持股比例差异
merged_df['diff_year1_year2'] = abs(merged_df['占总流通股本持股比例_year1'] - merged_df['占总流通股本持股比例_year2'])
merged_df['diff_year2_year3'] = abs(merged_df['占总流通股本持股比例_year2'] - merged_df['share_ratio_year3'])
merged_df['diff_year1_year3'] = abs(merged_df['占总流通股本持股比例_year1'] - merged_df['share_ratio_year3'])
# 筛选所有年份之间的差异都小于threshold的股东
stable_shareholders = merged_df[
(merged_df['diff_year1_year2'] <= threshold) &
(merged_df['diff_year2_year3'] <= threshold) &
(merged_df['diff_year1_year3'] <= threshold)
]
# 计算符合条件的股东持股总和占总流通股的比例
total_weight = stable_shareholders['占总流通股本持股比例_year1'].sum() # year1, year2 或 year3 皆可
real_weight=100-total_weight
real_ratio=real_weight/100
formatted_code=code.split('.')[0]
free_market_value = ak.stock_individual_info_em(symbol=formatted_code)
free_market_value=free_market_value['value'][5]
real_market_value = real_ratio*free_market_value
return real_market_value
# 示例调用
# real_value=add_real_market_value('601595.SH', '20240902')
# print(f"真实流通市值: {real_value}")
```python
import os
import pandas as pd
from tqdm import tqdm
# 设置文件夹路径
folder_path = '/Users/gusuqi/我的/人工智能/量化交易实验/第一版净额数据'
# 获取文件夹中的所有.csv文件
csv_files = [filename for filename in os.listdir(folder_path) if filename.endswith('.csv')]
# 使用 tqdm 包裹 csv_files 列表来显示进度条
with tqdm(csv_files, desc="Processing Files", unit="file", ncols=100, dynamic_ncols=True) as pbar:
for filename in pbar:
# 构建文件路径
file_path = os.path.join(folder_path, filename)
# 提取文件名前的股票代码
stock_code = filename.split('.')[0] # 获取"000001.SZ"中的"000001"
try:
# 读取CSV文件
df = pd.read_csv(file_path)
# 调用 add_real_market_value 获取市场价值
real_market_value = add_real_market_value(stock_code, '20240902')
# 将市场价值添加为新列
df['Real_Market_Value'] = real_market_value
# 将修改后的 DataFrame 保存回文件
df.to_csv(file_path, index=False)
# 更新进度条旁的最新信息
pbar.set_postfix(file=filename, stock_code=stock_code)
except Exception as e:
# 如果出现错误,更新进度条并显示错误信息
pbar.set_postfix(file=filename, error=str(e))