# 关于未来5天预测的准确性验证 from jqdata import * from jqfactor import * import jqdata from jqfactor import *
import pandas as pd import numpy as np from sklearn.model_selection import train_test_split # 分割数据集 from xgboost import XGBClassifier # XGBoost 分类器的实现 import matplotlib.pyplot as plt import jqdata from jqfactor import * from tqdm import tqdm import warnings # 创建进度条warnings.filterwarnings("ignore")用于抑制警告消息。 warnings.filterwarnings("ignore") import numpy as np import pandas as pd from pandas.tseries.offsets import CustomBusinessDay # from datetime import datetime import datetime import xgboost as xgb import talib from sklearn.preprocessing import StandardScaler from scipy import stats import seaborn as sns # 获取当前时间 current_date = datetime.datetime.now() current_time = current_date.strftime("%Y-%m-%d") #去除上市距beginDate不足3个月的股票 def delect_stop(stocks,beginDate,n=365): stockList=[] beginDate = datetime.datetime.strptime(beginDate, "%Y-%m-%d") for stock in stocks: start_date=get_security_info(stock).start_date if start_date<(beginDate-datetime.timedelta(days=n)).date(): stockList.append(stock) return stockList #获取股票池 def get_stock(stockPool,begin_date): if stockPool=='HS300': stockList=get_index_stocks('000300.XSHG',begin_date) elif stockPool=='ZZ500': stockList=get_index_stocks('399905.XSHE',begin_date) elif stockPool=='ZZ800': stockList=get_index_stocks('399906.XSHE',begin_date) elif stockPool=='CYBZ': stockList=get_index_stocks('399006.XSHE',begin_date) elif stockPool=='ZXBZ': stockList=get_index_stocks('399005.XSHE',begin_date) elif stockPool=='A': stockList=get_index_stocks('000002.XSHG',begin_date)+get_index_stocks('399107.XSHE',begin_date) stockList = [stock for stock in stockList if not stock.startswith(('3', '68', '4', '8'))] elif stockPool=='AA': stockList=get_index_stocks('000985.XSHG',begin_date) stockList = [stock for stock in stockList if not stock.startswith(('3', '68', '4', '8'))] #剔除ST股 st_data=get_extras('is_st',stockList, count = 1,end_date=begin_date) stockList = [stock for stock in stockList if not st_data[stock][0]] #剔除停牌、新股及退市股票 stockList=delect_stop(stockList,begin_date) return stockList stockList=get_stock('AA',current_time) q = query(valuation.code, valuation.circulating_market_cap, indicator.eps).filter( valuation.code.in_(stockList)).order_by(valuation.circulating_market_cap.asc()) df = get_fundamentals(q) # df = df[df['circulating_market_cap'] < 40] # df = df[df['circulating_market_cap'] > 20] df = df[df['eps'] > 0] df=df.head(200) result_df = pd.DataFrame() for i in df['code'].tolist(): df = get_price(i, frequency='1d', start_date='2023-01-01', end_date=current_time, fields=['open','close','high','low','volume','money'] ) result_df = result_df.add(df, fill_value=0) df = result_df # 函数detect_via_cusum_lg来使用带有日志返回的累积和 (CUSUM) 来检测时间序列 def detect_via_cusum_lg(ts, istart=30, threshold_times=5): S_h = 0 S_l = 0 S_list = np.zeros(istart) # 每次迭代,它都会计算过去几天对数返回的平均值和标准差istart for i in range(istart+1, len(ts)-1): # 函数采用时间序列ts作为输入,以及可选参数istart(计算过去的天数)和threshold_times(标准差阈值的乘数)。从索引开始迭代时间序列istart+1 meanArray = talib.SMA(ts[i-istart:i],timeperiod = istart) # 计算istart时间至今均线 stdArray = talib.STDDEV(np.log(ts[i-istart:i]/meanArray[- 1])) # 计算istart时间至今对数收益率的标准差。 #滑窗至i,避免读到未来数据[i-istart:i] tslog = np.log(ts[i] / meanArray[- 1]) #计算当前时点相对于历史时间序列的均值的对数收益率 S_h_ = max(0, S_h + tslog - stdArray[-1]) # 初始化累积和变量 (S_h和S_l) 以及用于存储检测到的信号的列表 ( S_list) S_l_ = min(0, S_l + tslog + stdArray[-1]) #计算上下边界 if S_h_> threshold_times * stdArray[-1]: # 上边界超过了设定的阈值(threshold_times倍的历史标准差) S_list = np.append(S_list,1) # 上升 elif abs(S_l_)> threshold_times * stdArray[-1]: # 下边界绝对值超过了设定的阈值(threshold_times倍的历史标准差) S_list = np.append(S_list, -1) else: S_list = np.append(S_list, 0) # 如果上下边界都没有超过阈值,则在 S_list 中追加值 0,表示无信号。没变点 return S_list df["price"] = (df["open"]+df['close'])/2 dt0 = np.array(df["price"]) # 可视化时间序列数据中识别的向上和向下信号 listup,listdown = [],[] # 初始化两个空列表(listup和)来存储出现向上和向下信号的索引 s_list = detect_via_cusum_lg(dt0,istart=5, threshold_times=0.1) # 循环迭代二进制信号列表 ( s_list) 并收集信号出现的索引 for i in range(0,len(s_list)): if s_list[i] == 1: listup.append(i) elif s_list[i] == -1 : listdown.append(i) plt.figure(figsize=(10, 5)) # 生成画布 plt.plot(dt0, color='y', lw=2.) # 时间序列数据用向上和向下信号绘制,分别用“^”和“v”标记 plt.plot(dt0, '^', markersize=5, color='r', label='买入信号 UP signal', markevery=listup) plt.plot(dt0, 'v', markersize=5, color='g', label='卖出信号 DOWN signal', markevery=listdown) plt.xlabel('时间序列:2023年至今', fontsize=16) plt.ylabel('指数点位', fontsize=16) plt.title(f'微市值股指数' , fontsize=16) plt.legend(loc='best') plt.show() StockList=get_stock('AA',current_time) q1 = query(valuation.code, valuation.circulating_market_cap, indicator.eps).filter( valuation.code.in_(StockList)).order_by(valuation.circulating_market_cap.asc()) DF = get_fundamentals(q1) # DF = DF[DF['circulating_market_cap'] > 500] DF = DF[DF['eps'] > 0] DF = DF.tail(200) result_DF = pd.DataFrame() for i in DF['code'].tolist(): DF = get_price(i, frequency='1d', start_date='2023-01-01', end_date=current_time, fields=['open','close','high','low','volume','money'] ) result_DF = result_df.add(DF, fill_value=0) DF = result_DF # 函数detect_via_cusum_lg来使用带有日志返回的累积和 (CUSUM) 来检测时间序列 DF["price"] = (DF["open"]+DF['close'])/2 DF0 = np.array(DF["price"]) # 可视化时间序列数据中识别的向上和向下信号 Listup,Listdown = [],[] # 初始化两个空列表(listup和)来存储出现向上和向下信号的索引 s_list = detect_via_cusum_lg(DF0,istart=5, threshold_times=0.1) # 循环迭代二进制信号列表 ( s_list) 并收集信号出现的索引 for i in range(0,len(s_list)): if s_list[i] == 1: Listup.append(i) elif s_list[i] == -1 : Listdown.append(i) plt.figure(figsize=(10, 5)) # 生成画布 plt.plot(DF0, color='y', lw=2.) # 时间序列数据用向上和向下信号绘制,分别用“^”和“v”标记 plt.plot(DF0, '^', markersize=5, color='r', label='买入信号 UP signal', markevery=Listup) plt.plot(DF0, 'v', markersize=5, color='g', label='卖出信号 DOWN signal', markevery=Listdown) plt.xlabel('时间序列:2023年至今', fontsize=16) plt.ylabel('指数点位', fontsize=16) plt.title(f'大市值股指数' , fontsize=16) plt.legend(loc='best') plt.show() print(f'2023年至今优质微盘股_推荐买入次数: {len(listup)} 推荐卖出次数: {len(listdown)}') print(f'2023年至今优质大盘股_推荐买入次数: {len(Listup)} 推荐卖出次数: {len(Listdown)}') DF0_diff = np.diff(DF0) DF0_change = (DF0_diff / DF0[:-1]) * 100 dt0_diff = np.diff(dt0) dt0_change = (dt0_diff / dt0[:-1]) * 100 # plt.figure(figsize=(10, 5)) # plt.legend() # plt.plot(dt0_change, label='微市值涨跌幅', color='blue') # plt.plot(dt0_change-DF0_change, label='涨跌幅差异', color='black') # plt.plot(DF0_change, label='大市值涨跌幅', color='red') # # 显示图形 # plt.show() # 执行独立样本t-检验 t_statistic, p_value = stats.ttest_ind(dt0_change, DF0_change) # 创建一个图形对象 plt.figure(figsize=(10, 5)) # 画两个箱线图 sns.boxplot(data=[dt0_change, DF0_change], palette='Set2') plt.xticks([0, 1], ['微市值涨跌幅', '大市值涨跌幅']) # 计算并画T-TSET plt.text(0.5, 0.5, f'p_value={p_value:.4f}', color='red', ha='center', va='center', fontsize=20, transform=plt.gca().transAxes) # 添加图例 plt.legend() # 添加标题和标签 plt.title('箱线图及T-TSET') # 显示图形 plt.show() a = DF0_change.reshape(-1, 1) b = dt0_change.reshape(-1, 1) from sklearn.linear_model import LinearRegression model = LinearRegression().fit(a, b) # 获取回归系数 slope = model.coef_[0][0] # 打印回归系数 print("回归系数(对冲比例):", slope) # 绘制散点图和回归线 plt.scatter(a, b, label='实际数据') plt.plot(a, model.predict(a), color='red', label='回归线') plt.xlabel('大市值股指数') plt.ylabel('微市值股指数') plt.title('大市值股指数和微市值股指数之间的关系') plt.legend() plt.show() # 判断对冲关系 threshold = 0.1 # 设置一个阈值,可以根据实际情况调整 if abs(slope - 1) < threshold: print("存在对冲关系") else: print("不存在对冲关系")