2023到至今，大市值小市值的一些分析！

最新推荐文章于 2025-05-16 14:49:58 发布

V_fanglue3705

最新推荐文章于 2025-05-16 14:49:58 发布

阅读量270

点赞数 5

文章标签： python 开发语言

本文链接：https://blog.csdn.net/fanglue3705/article/details/138072243

版权

#  关于未来5天预测的准确性验证
from jqdata import *
from jqfactor import *
import jqdata
from jqfactor import *

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split  #  分割数据集
from xgboost import XGBClassifier  #  XGBoost 分类器的实现
import matplotlib.pyplot as plt
import jqdata
from jqfactor import *
from tqdm import tqdm  
import warnings  #  创建进度条warnings.filterwarnings("ignore")用于抑制警告消息。
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
from pandas.tseries.offsets import CustomBusinessDay
# from datetime import datetime
import datetime
import xgboost as xgb
import talib
from sklearn.preprocessing import StandardScaler         
from scipy import stats
import seaborn as sns
# 获取当前时间
current_date = datetime.datetime.now()
current_time = current_date.strftime("%Y-%m-%d")

#去除上市距beginDate不足3个月的股票
def delect_stop(stocks,beginDate,n=365):
    stockList=[]
    beginDate = datetime.datetime.strptime(beginDate, "%Y-%m-%d")
    for stock in stocks:
        start_date=get_security_info(stock).start_date
        if start_date<(beginDate-datetime.timedelta(days=n)).date():
            stockList.append(stock)
    return stockList
#获取股票池
def get_stock(stockPool,begin_date):
    if stockPool=='HS300':
        stockList=get_index_stocks('000300.XSHG',begin_date)
    elif stockPool=='ZZ500':
        stockList=get_index_stocks('399905.XSHE',begin_date)
    elif stockPool=='ZZ800':
        stockList=get_index_stocks('399906.XSHE',begin_date)   
    elif stockPool=='CYBZ':
        stockList=get_index_stocks('399006.XSHE',begin_date)
    elif stockPool=='ZXBZ':
        stockList=get_index_stocks('399005.XSHE',begin_date)
    elif stockPool=='A':
        stockList=get_index_stocks('000002.XSHG',begin_date)+get_index_stocks('399107.XSHE',begin_date)
        stockList = [stock for stock in stockList if not stock.startswith(('3', '68', '4', '8'))]
    elif stockPool=='AA':
        stockList=get_index_stocks('000985.XSHG',begin_date)
        stockList = [stock for stock in stockList if not stock.startswith(('3', '68', '4', '8'))]

    #剔除ST股
    st_data=get_extras('is_st',stockList, count = 1,end_date=begin_date)
    stockList = [stock for stock in stockList if not st_data[stock][0]]
    #剔除停牌、新股及退市股票
    stockList=delect_stop(stockList,begin_date)
    return stockList

stockList=get_stock('AA',current_time)


q = query(valuation.code, valuation.circulating_market_cap, indicator.eps).filter(
    valuation.code.in_(stockList)).order_by(valuation.circulating_market_cap.asc())
df = get_fundamentals(q)
# df = df[df['circulating_market_cap'] < 40]
# df = df[df['circulating_market_cap'] > 20]
df = df[df['eps'] > 0]
df=df.head(200)



result_df = pd.DataFrame()
for i in df['code'].tolist():
    df = get_price(i,  frequency='1d', start_date='2023-01-01', end_date=current_time,
                   fields=['open','close','high','low','volume','money']
                  )

    result_df = result_df.add(df, fill_value=0)


df = result_df
#  函数detect_via_cusum_lg来使用带有日志返回的累积和 (CUSUM) 来检测时间序列
def detect_via_cusum_lg(ts, istart=30, threshold_times=5):
    S_h = 0
    S_l = 0
    S_list = np.zeros(istart)  #  每次迭代，它都会计算过去几天对数返回的平均值和标准差istart
    for i in range(istart+1, len(ts)-1):  #  函数采用时间序列ts作为输入，以及可选参数istart（计算过去的天数）和threshold_times（标准差阈值的乘数）。从索引开始迭代时间序列istart+1
        meanArray = talib.SMA(ts[i-istart:i],timeperiod = istart)       #  计算istart时间至今均线
        stdArray = talib.STDDEV(np.log(ts[i-istart:i]/meanArray[- 1]))  #  计算istart时间至今对数收益率的标准差。
        #滑窗至i，避免读到未来数据[i-istart:i]
        tslog = np.log(ts[i] / meanArray[- 1])
        #计算当前时点相对于历史时间序列的均值的对数收益率
        S_h_ = max(0, S_h + tslog - stdArray[-1])  #  初始化累积和变量 (S_h和S_l) 以及用于存储检测到的信号的列表 ( S_list)
        S_l_ = min(0, S_l + tslog + stdArray[-1])
        #计算上下边界
        if S_h_> threshold_times * stdArray[-1]: #  上边界超过了设定的阈值（threshold_times倍的历史标准差）
            S_list = np.append(S_list,1)         #  上升
        elif abs(S_l_)> threshold_times *  stdArray[-1]:  #  下边界绝对值超过了设定的阈值（threshold_times倍的历史标准差）
            S_list = np.append(S_list, -1)
        else:
            S_list = np.append(S_list, 0)  #  如果上下边界都没有超过阈值，则在 S_list 中追加值 0，表示无信号。没变点
    return S_list
df["price"] = (df["open"]+df['close'])/2

dt0 = np.array(df["price"])

#  可视化时间序列数据中识别的向上和向下信号
listup,listdown = [],[]  #  初始化两个空列表（listup和）来存储出现向上和向下信号的索引
s_list = detect_via_cusum_lg(dt0,istart=5, threshold_times=0.1)  #  循环迭代二进制信号列表 ( s_list) 并收集信号出现的索引
for i in range(0,len(s_list)):
    if s_list[i] == 1:
        listup.append(i)
    elif s_list[i] == -1 :
        listdown.append(i)
plt.figure(figsize=(10, 5))      #  生成画布
plt.plot(dt0, color='y', lw=2.)  #  时间序列数据用向上和向下信号绘制，分别用“^”和“v”标记
plt.plot(dt0, '^', markersize=5, color='r', label='买入信号 UP signal', markevery=listup)
plt.plot(dt0, 'v', markersize=5, color='g', label='卖出信号 DOWN signal', markevery=listdown)



plt.xlabel('时间序列：2023年至今', fontsize=16) 
plt.ylabel('指数点位', fontsize=16) 
plt.title(f'微市值股指数' , fontsize=16)
plt.legend(loc='best')
plt.show()


StockList=get_stock('AA',current_time)
q1 = query(valuation.code, valuation.circulating_market_cap, indicator.eps).filter(
    valuation.code.in_(StockList)).order_by(valuation.circulating_market_cap.asc())
DF = get_fundamentals(q1)
# DF = DF[DF['circulating_market_cap'] > 500]
DF = DF[DF['eps'] > 0]
DF = DF.tail(200)

result_DF = pd.DataFrame()
for i in DF['code'].tolist():
    DF = get_price(i,  frequency='1d', start_date='2023-01-01', end_date=current_time,
                   fields=['open','close','high','low','volume','money']
                  )

    result_DF = result_df.add(DF, fill_value=0)


DF = result_DF
#  函数detect_via_cusum_lg来使用带有日志返回的累积和 (CUSUM) 来检测时间序列

DF["price"] = (DF["open"]+DF['close'])/2

DF0 = np.array(DF["price"])

#  可视化时间序列数据中识别的向上和向下信号
Listup,Listdown = [],[]  #  初始化两个空列表（listup和）来存储出现向上和向下信号的索引
s_list = detect_via_cusum_lg(DF0,istart=5, threshold_times=0.1)  #  循环迭代二进制信号列表 ( s_list) 并收集信号出现的索引
for i in range(0,len(s_list)):
    if s_list[i] == 1:
        Listup.append(i)
    elif s_list[i] == -1 :
        Listdown.append(i)
plt.figure(figsize=(10, 5))      #  生成画布
plt.plot(DF0, color='y', lw=2.)  #  时间序列数据用向上和向下信号绘制，分别用“^”和“v”标记
plt.plot(DF0, '^', markersize=5, color='r', label='买入信号 UP signal', markevery=Listup)
plt.plot(DF0, 'v', markersize=5, color='g', label='卖出信号 DOWN signal', markevery=Listdown)



plt.xlabel('时间序列：2023年至今', fontsize=16) 
plt.ylabel('指数点位', fontsize=16) 
plt.title(f'大市值股指数' , fontsize=16)
plt.legend(loc='best')
plt.show()
print(f'2023年至今优质微盘股_推荐买入次数:  {len(listup)}     推荐卖出次数:  {len(listdown)}')
print(f'2023年至今优质大盘股_推荐买入次数:  {len(Listup)}     推荐卖出次数:  {len(Listdown)}')




DF0_diff = np.diff(DF0)
DF0_change = (DF0_diff / DF0[:-1]) * 100


dt0_diff = np.diff(dt0)
dt0_change = (dt0_diff / dt0[:-1]) * 100

# plt.figure(figsize=(10, 5))
# plt.legend()
# plt.plot(dt0_change, label='微市值涨跌幅', color='blue')
# plt.plot(dt0_change-DF0_change, label='涨跌幅差异', color='black')
# plt.plot(DF0_change, label='大市值涨跌幅', color='red')
# # 显示图形
# plt.show()



# 执行独立样本t-检验
t_statistic, p_value = stats.ttest_ind(dt0_change, DF0_change)
# 创建一个图形对象
plt.figure(figsize=(10, 5))
# 画两个箱线图
sns.boxplot(data=[dt0_change, DF0_change], palette='Set2')

plt.xticks([0, 1], ['微市值涨跌幅', '大市值涨跌幅'])

# 计算并画T-TSET
plt.text(0.5, 0.5, f'p_value={p_value:.4f}', color='red', ha='center', va='center', fontsize=20, transform=plt.gca().transAxes)

# 添加图例
plt.legend()
# 添加标题和标签
plt.title('箱线图及T-TSET')
# 显示图形
plt.show()




a = DF0_change.reshape(-1, 1)
b = dt0_change.reshape(-1, 1)
from sklearn.linear_model import LinearRegression
model = LinearRegression().fit(a, b)
# 获取回归系数
slope = model.coef_[0][0]
# 打印回归系数
print("回归系数（对冲比例）:", slope)

# 绘制散点图和回归线
plt.scatter(a, b, label='实际数据')
plt.plot(a, model.predict(a), color='red', label='回归线')
plt.xlabel('大市值股指数')
plt.ylabel('微市值股指数')
plt.title('大市值股指数和微市值股指数之间的关系')
plt.legend()
plt.show()

# 判断对冲关系
threshold = 0.1  # 设置一个阈值，可以根据实际情况调整
if abs(slope - 1) < threshold:
    print("存在对冲关系")
else:
    print("不存在对冲关系")