长江证劵分析员做过题为“交易数据反映的信息及预测性”研究,见:http://www.docin.com/p-68475999.html。
笔者试图套用文章的模型进行高频数据的研究:期货数据形如:
....
*读取主力合约数据;
data test;
set quote20120731;
format date yymmdd10.;
time1=input(time,time8.);
format time1 time8.; *10:00:00时间格式;
hour=hour(time1); *提取小时;
minute=minute(time1); *提取分钟;
second=second(time1); *剥离秒;
dif_price=dif(price); *计算每笔数据的价格变量量;
dif_amount=dif(amount); *计算单笔交易的成交量变化;
drop time;
run;
*剔除集合竞价数据,9:30以前及下午开盘的集合竞价等数据;
data test1;
set test;
if hour=9 & minute<=30 then delete;
if hour=15 & minute>15 then delete;
run;
*计算单位分笔,价格变动引起的成交量变动量和价格变化:dif_price,dif_amount;
proc sql;
create table test2 as
select distinct
futurecode,date,time1,price,amount,hour,minute,second,
sum(dif_price) as dif_price,
sum(dif_amount) as dif_amount
from test1
group by hour, minute ,second ;
quit;
*定义价格走势,flag=1正向走势;flag=-1负向走势;
data test3;
set test2;
if dif_price < 0 then flag=-1;
else if dif_price >0 then flag=1;
else flag=0;
run;
*计算两个方向上的价格累计和交易量累计;
proc sql;
create table test4 as
select futurecode,date,time1,price,amount,hour,minute,second,flag,
sum(dif_price) as ps,
sum(dif_amount) as vol
from test3
group by hour,minute,second,flag;
quit;
*对正向和负向走势的价格和交易量进行重命名,为后面建模铺垫。data步是赋值过程,proc sql语句统计
一分钟内的汇总情况。;
data test5;
set test4;
if flag=1 then do;
psp=ps;volp=vol;
end;
else if flag=-1 then do;
psn=ps;voln=vol;
end;
drop ps vol;
run;
proc sql;
create table test6 as
select futurecode,date,time1,price,amount,hour,minute,second,flag,
sum(psp) as psp,
sum(volp) as volp,
sum(psn) as psn,
sum(voln) as voln
from test5
group by hour,minute;
quit;
*提取一分钟的价格,进行收益率计算;
proc sort data= test6;
by time1;
run;
data test7;
set test6;
by hour minute;
if last.minute then output;
run;
*参数计算:各参数定义见:http://www.docin.com/p-68475999.html;
data test8;
set test7;
r=dif(log(price));
psn=abs(psn);
vol=volp+voln;
vol1=log(vol);
vol2=log(lag(vol));
vol3=log(lag2(vol));
vol4=dif(vol1);
vol5=dif(vol2);
vol6=dif(vol3);
VDIR=(volp-voln)/vol;
vdir1=lag(vdir);
vdir2=lag2(vdir);
vdense=vol/(psp+psn);
vdense1=lag(vdense);
vdense2=lag2(vdense);
ratio=(volp/psp)/(voln/psn) ;
ratio1=lag(ratio);
ratio2=lag2(ratio);
run;
*建立回归模型;
ods rtf file="e:\result.rtf";
ods graphics on;
proc reg data=test8 ;
model r = vol1-vol3;
model r = vol4-vol6;
model r = vdir vdir1 vdir2;
model r = vdense vdense1 vdense2;
model r = ratio ratio1 ratio2;
run;
ods graphics off;
ods rtf close;