前些天在某网站看到了一个比较有意思的股票交易策略,即在尾盘[02:30~03:00]挑选出当日换手率3%~5%,涨幅5%左右的股票买入,下一交易日伺机卖出。因为股票涨跌存在一定的延续性,个人感觉该策略似乎有一点道理,后经过连续几天的测试,认为该策略有进一步研究和验证的价值。于是打算通过python对其进行一个初步的验证,思路大概如下:
1、获取沪深A股部分股票(60XXXX和00XXXX)过去一个月(2022-06-01~2022-07-01)涨幅和换手率的历史数据
2、以连续两个交易日的数据构建数据集,其中前一日的涨幅和换手率取整后作为X,后一日的涨跌作为y
3、各种机器学习模型对数据进行拟合,这里主要基于sklearn并且不考虑堆叠
数据获取部分
要获取沪深股票历史数据,首先需要获取股票代码部分,然后根据股票代码再到相应接口获取历史数据,这两部分均可通过网易财经获得
import urllib.request
import re
import time
from random import random
def CodeGet():
'''
在http://quotes.money.163.com/old/#query=EQA_EXCHANGE_CNSESH&DataType=HS_RANK&sort=PERCENT&order=desc&count=24&page=0
通过点击翻页查看开发者工具(F12)→Network找到了沪深A股的代码接口,分别是:
沪:http://quotes.money.163.com/hs/service/diyrank.php?host=http%3A%2F%2Fquotes.money.163.com%2Fhs%2Fservice%2Fdiyrank.php&page=0&query=STYPE%3AEQA%3BEXCHANGE%3ACNSESH&fields=NO%2CSYMBOL%2CNAME%2CPRICE%2CPERCENT%2CUPDOWN%2CFIVE_MINUTE%2COPEN%2CYESTCLOSE%2CHIGH%2CLOW%2CVOLUME%2CTURNOVER%2CHS%2CLB%2CWB%2CZF%2CPE%2CMCAP%2CTCAP%2CMFSUM%2CMFRATIO.MFRATIO2%2CMFRATIO.MFRATIO10%2CSNAME%2CCODE%2CANNOUNMT%2CUVSNEWS&sort=PERCENT&order=desc&count=24&type=query
深:http://quotes.money.163.com/hs/service/diyrank.php?host=http%3A%2F%2Fquotes.money.163.com%2Fhs%2Fservice%2Fdiyrank.php&page=0&query=STYPE%3AEQA%3BEXCHANGE%3ACNSESZ&fields=NO%2CSYMBOL%2CNAME%2CPRICE%2CPERCENT%2CUPDOWN%2CFIVE_MINUTE%2COPEN%2CYESTCLOSE%2CHIGH%2CLOW%2CVOLUME%2CTURNOVER%2CHS%2CLB%2CWB%2CZF%2CPE%2CMCAP%2CTCAP%2CMFSUM%2CMFRATIO.MFRATIO2%2CMFRATIO.MFRATIO10%2CSNAME%2CCODE%2CANNOUNMT%2CUVSNEWS&sort=PERCENT&order=desc&count=24&type=query
其中沪88页,深110页,提取出其中符合条件的股票代码
'''
p = re.compile('"CODE":"[0-9]*"')
code_lst = []
# 沪代码提取
for i in range(88):
print('CodeGet-', i)
try:
req = urllib.request.urlopen(f'http://quotes.money.163.com/hs/service/diyrank.php?host=http%3A%2F%2Fquotes.money.163.com%2Fhs%2Fservice%2Fdiyrank.php&page={i}&query=STYPE%3AEQA%3BEXCHANGE%3ACNSESH&fields=NO%2CSYMBOL%2CNAME%2CPRICE%2CPERCENT%2CUPDOWN%2CFIVE_MINUTE%2COPEN%2CYESTCLOSE%2CHIGH%2CLOW%2CVOLUME%2CTURNOVER%2CHS%2CLB%2CWB%2CZF%2CPE%2CMCAP%2CTCAP%2CMFSUM%2CMFRATIO.MFRATIO2%2CMFRATIO.MFRATIO10%2CSNAME%2CCODE%2CANNOUNMT%2CUVSNEWS&sort=PERCENT&order=desc&count=24&type=query').read().decode()
except:
print('failure-', i)
continue
for j in p.findall(req):
t = j[-7:-1]
if t.startswith('60'):
code_lst.append(t) # 毕竟68XXXX也买不了
if random() < .7:
time.sleep(5)
# 深代码提取
for i in range(110):
print('CodeGet--', i)
try:
req = urllib.request.urlopen(f'http://quotes.money.163.com/hs/service/diyrank.php?host=http%3A%2F%2Fquotes.money.163.com%2Fhs%2Fservice%2Fdiyrank.php&page={i}&query=STYPE%3AEQA%3BEXCHANGE%3ACNSESZ&fields=NO%2CSYMBOL%2CNAME%2CPRICE%2CPERCENT%2CUPDOWN%2CFIVE_MINUTE%2COPEN%2CYESTCLOSE%2CHIGH%2CLOW%2CVOLUME%2CTURNOVER%2CHS%2CLB%2CWB%2CZF%2CPE%2CMCAP%2CTCAP%2CMFSUM%2CMFRATIO.MFRATIO2%2CMFRATIO.MFRATIO10%2CSNAME%2CCODE%2CANNOUNMT%2CUVSNEWS&sort=PERCENT&order=desc&count=24&type=query').read().decode()
except:
print('failure--', i)
continue
for j in p.findall(req):
t = j[-7:-1]
if t.startswith('00'):
code_lst.append(t)
if random() < .7:
time.sleep(5)
return code_lst
def DataGet(code: list):
'''
这里还是用网易财经的接口
http://quotes.money.163.com/service/chddata.html?code={}&start=20220601&end=20220702&fields=TCLOSE;HIGH;LOW;TOPEN;LCLOSE;CHG;PCHG;TURNOVER;VOTURNOVER;TCAP;MCAP
因为只需要涨跌幅和换手率,所以只查询PCHG和TURNOVER两个字段就行
通过urllib.request.urlretrieve将数据保存于本地data文件夹
'''
for i in code:
print(i)
try:
if i[0] == '0':
urllib.request.urlretrieve(f'http://quotes.money.163.com/service/chddata.html?code=1{i}&start=20220601&end=20220702&fields=PCHG;TURNOVER', f'data\\{i}.csv')
elif i[0] == '6':
urllib.request.urlretrieve(f'http://quotes.money.163.com/service/chddata.html?code=0{i}&start=20220601&end=20220702&fields=PCHG;TURNOVER', f'data\\{i}.csv')
except:
pass
if __name__ == '__main__':
DataGet(CodeGet())
写到这里先运行一下,发现请求频率有点过快,加个睡眠随机时间,再次运行共获得3015个文件
数据分析部分
首先看一下,换手率3%~5%且涨幅3%~7%的股票下一个交易日的涨跌幅情况,单个数据形式是这样的
import matplotlib.pyplot as plt
import pandas as pd
import os
lst = []
for i in os.listdir('data'):
try:
data = pd.read_csv(f'data\\{i}', encoding = 'utf-8').values
except UnicodeDecodeError:
try:
data = pd.read_csv(f'data\\{i}', encoding = 'gbk').values
except:
continue
for j in range(len(data) - 1, 0, -1):
try:
if (3 <= data[j, 3] <= 7) and (3 <= data[j, 4] <= 5):
lst.append(data[j - 1, 3])
except:
pass
print(sum(lst) / len(lst)) # -0.011
plt.hist(lst, bins = 50)
plt.show()
emmmmm,基本上是正态的,但可以看到涨停的数量有些异常,再看下细分的统计情况
from collections import defaultdict
import matplotlib.pyplot as plt
import pandas as pd
import os
# 这里打算把当日涨跌幅分为5类[-10, -6, -2, 2, 6, 10] → [-2, -1, 0, 1, 2]
# 当日换手率分为5类[0, 1, 3, 5, 10] → [0, 1, 2, 3, 4]
def cal(pchg, turnover):
'''
接收涨跌幅和换手率返回相应类别
'''
if pchg <= -6:
p = -2
elif pchg <= -2:
p = -1
elif pchg <= 2:
p = 0
elif pchg <= 6:
p = 1
else:
p = 2
if turnover <= 1:
t = 0
elif turnover <= 3:
t = 1
elif turnover <= 5:
t = 2
elif turnover <= 10:
t = 3
else:
t = 4
return (p, t)
dic = defaultdict(list)
for i in os.listdir('data'):
try:
data = pd.read_csv(f'data\\{i}', encoding = 'utf-8').values
except UnicodeDecodeError:
try:
data = pd.read_csv(f'data\\{i}', encoding = 'gbk').values
except:
continue
for j in range(len(data) - 1, 0, -1):
try:
key = cal(data[j, 3], data[j, 4])
if data[j - 1, 3].__class__.__name__ == 'float':
dic[key].append(data[j - 1, 3])
except:
pass
for k, v in dic.items():
print(f'{k}:{sum(v) / len(v):.2f}')
'''
(0, 0):0.33
(-1, 0):0.76
(1, 1):0.26
(1, 0):0.27
(-1, 1):0.79
(0, 1):0.29
(2, 1):1.66
(1, 3):-0.33
(1, 2):-0.00
(-1, 2):0.49
(2, 3):0.80
(0, 2):0.24
(-1, 3):0.31
(0, 3):-0.08
(2, 2):0.61
(-2, 2):0.84
(2, 4):0.06
(0, 4):-0.20
(-1, 4):-0.32
(1, 4):-0.85
(-2, 3):-0.35
(-2, 1):0.58
(-2, 4):-1.35
(2, 0):4.25
(-2, 0):-4.44
'''
fig = plt.figure()
n = 1
for i in (-2, -1, 0, 1, 2):
for j in range(5):
ax = fig.add_subplot(5, 5, n)
if (i, j) in dic:
ax.hist(dic[(i, j)], label = f'pchg({i})-turnover({j})')
ax.legend()
n += 1
plt.show()
至此可以对策略进行些许修正:当日涨幅在6%以上,并且换手率小于3%时,下一交易日的期望涨幅较高
模型拟合部分
通过一系列机器学习模型对数据进行拟合,保留表现最好的模型,并根据期预测结果作图
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from random import random
from sklearn.linear_model import SGDRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor, BaggingRegressor, ExtraTreesRegressor, AdaBoostRegressor, GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
X_train, y_train, X_test, y_test = [], [], [], []
for i in os.listdir('data'):
try:
data = pd.read_csv(f'data\\{i}', encoding = 'utf-8').values
except UnicodeDecodeError:
try:
data = pd.read_csv(f'data\\{i}', encoding = 'gbk').values
except:
pass
for j in range(len(data) - 1, 0, -1):
try:
if data[j, 3].__class__.__name__ == 'float' and data[j, 4].__class__.__name__ == 'float' and data[j - 1, 3].__class__.__name__ == 'float':
if random() < 0.7:
X_train.append([data[j, 3], data[j, 4]])
y_train.append(data[j - 1, 3])
else:
X_test.append([data[j, 3], data[j, 4]])
y_test.append(data[j - 1, 3])
except:
pass
X = X_train + X_test
y = y_train + y_test
model, mse = None, np.inf
for mod in (SGDRegressor, KNeighborsRegressor, RandomForestRegressor, BaggingRegressor, ExtraTreesRegressor, AdaBoostRegressor, GradientBoostingRegressor, DecisionTreeRegressor):
model_tmp = mod()
model_tmp.fit(X_train, y_train)
mse_tmp = mean_squared_error(model_tmp.predict(X_test), y_test)
if mse_tmp < mse:
mse = mse_tmp
model = model_tmp
pchg = np.arange(-10, 10, 0.2)
turnover = np.arange(0, 10, 0.1)
z = np.array([[model.predict(np.array([pchg[i], turnover[j]]).reshape(1, -1))[0] for i in range(100)] for j in range(100)])
pchg_, turnover_, z_ = [], [], []
for p in pchg:
for t in turnover:
pchg_.append(p)
turnover_.append(t)
z_.append(model.predict(np.array([p, t]).reshape(1, -1))[0])
scatter_data = pd.DataFrame({'pchg': pchg_, 'turnover': turnover_, 'z': z_})
scatter_data.plot.scatter('pchg', 'turnover', c = 'z', colormap = 'jet')
plt.show() # 散点图
pchg, turnover = np.meshgrid(pchg, turnover)
# 表面图
fig = plt.figure()
ax = fig.gca(projection = '3d')
ax.plot_surface(pchg, turnover, z, alpha = 0.3)
plt.show()
效果一般,但仍不难看出高涨幅和低换手率带来的下一交易日的期望涨幅较大
最后
本文仅为过去一月的历史数据分析,无法明确已有规律能否在未来数据上生效(机器学习难以回答的问题),因此并不构成投资建议。