四年前的代码,当时想着能不能用这个算法,在股市上大赚一笔,现在想起来真是好笑,不过,想了,也做了,虽然没结果,也许对后来者有用,就共享出来,然后一笑了之。
%SVM_SMO solving regression problem;
%almost that all the basic module could be kept;
%using the RBF training data as the input;
%copy the SVM_SMO_C as the algorithm scheme;
%--------------------------------------------------------------------------
%build the training set;
%this is a regression problem;
%think about the linear efxt non-sensitive loss;
%--------------------------------------------------------------------------
%--------------------------------------------------------------------------
%20140218 updating
%今天需要更新的地方有:
%1,对偶间隙,不要偏置的版本;试验了,作用不大;
%2,偏置如何更新;作用不大,不过已经更新过了;
%3,delta_W矩阵的运用;似乎没什么作用,不过继续使用;
%4,ratio停机准则来选择第一个点;这个好像有点作用,我称之为“啃硬骨头法”——如果
%有一个外循环,造成的ratio不降反升,那就继续这个外循环,也就是第一个点不变化,继
%续这个过程,知道下降为止,试验结果还可以;
%5,但是在内循环,需要找到能够让对偶目标增长最快的点,所以要记录以前的点,我称之为
%“好吃你就多吃一点”法则;
%6,总的来说方法如下:在大原则下,要啃硬骨头,具体实施的时候,找到让对偶目标增长
%最快的方法;
%7,此外,e的选取,以及容许误差的选择对收敛速度有影响;还有就是停机准则的判决标准;
%8,另外,还有abs(a(i))*(e-|f(xi)-yi|)+C*efxt(i);没有测试;这个也是值得测试的
%一个方案;
%--------------------------------------------------------------------------
%--------------------------------------------------------------------------
%20140305 updating
%今天实验用输入输出异质的方法来预测股指,看看能达到怎样的精度;
%首先要解决数据录入整理的问题;
%接下来就是细节的处理,比如C的值,样本数量,方差,精度,相对精度,等等。。。
%--------------------------------------------------------------------------
clear all;
close all;
fprintf('Build the training set.\n');
st = cputime;
for round=1:5
N=400;
NN=5000;
NS=N; %suppose that all data points are SVs.
NS_1=N; %suppose that half points are SVs which in bonds;
si=1; %方差;
m0=5; %输入向量的维度,这里可以看成是追溯历史的深度;
qwe=4; %1,开盘价; 4,收盘价;2,最高价;3,最低价;
del_d=0; %delay days, at this time, 20;
%nt=3000; %total points: 10,000;
%n=1e3; %training set scale: 1000;
%ep=100*25; %epoch number: 2500;
%vf=5; %frequency of verification: validation on every 5 epoch training;
%sigma_v=1;
%beta=0.5;
load sz000001.mat;
A=data;
%x=A(:,1);
[a1,a2]=size(A);
%交换第一列和第四列;
%{
swap=A(:,1);
A(:,1)=A(:,4);
A(:,4)=swap;
%}
for i=1:a2
miu_a(i)=mean(A(:,i)); %取得x1的均值;注意到这里是减去了整个数据集的均值,实际上对于训练集而言,均值不一定为0;
A(:,i)=A(:,i)-miu_a(i); %减去均值;相当于减去了直流分量;
max_a(i)=max(abs(A(:,i))); %取得绝对值的最大值;这就是归一化了;
A(:,i)=A(:,i)./max_a(i); %除以这个数;
end
for i=1:N;
for j=1:m0;
X(i,j)=A(i+a1-N-del_d-1,j);
end
end
for i=1:N
y(i)=A(i+a1-N-del_d,qwe);
end
figure;
plot(y,'r.');
fprintf('initial the SMO parameters.\n');
a=zeros(1,N);%normrnd(0,1,1,N)./10;% zeros(1,N);% %the first initial value;
b=0; %bias.
C=1; %the boundry; C是惩罚系数,又称正则化系数?预示风险大小?这个数字大会怎样呢?
FX=zeros(1,N); %f(x(i)) i=1toN, since {a(i)}=0, so f(x)=0; f(x)=w'*x-b; or f(x)=SUM ai*K(xi,xj)-b=0;
E=FX-y; %y: the expected response; normrnd(0,1,1,N)./1;%
%??E should be followed by training set;
efxt=zeros(1,N); %efxt is the gap distance related parameters; 这个是所有管道外面的点,对于管壁的偏离程度;也是我们想尽可能消除的参数;(减少绝对值=C的算子);
e=(1e-3)*16; %相当于是管子,超过这个界限就跑到管子外面,小于这个界限还在管子内部;小于
e_f=0; %因为e是动态变化的,所以用e_f标识,看看是如何变化的;
eps=e/2; %threshold for stopping judgement;容许范围,一般要比e更小才行的;
thre=0.001; %停机准则的阈值;
times=0; %at the start, times=0;the number of
%external loops;
presv=0; %at the beginning, suppose there is no
%support vector found;
Gram=eye(N,N); %build the gram matrix, for recording the calculation results;
delta_W=zeros(N,N);%abs(normrnd(0,1,N,N));
ot=0; %stop the operation according the run times;
totaltimes=0; %how many times of calculation operated?
in=1; %set the initial value of in;in start from 1;
ic=1;
i0=1;
%ratio_old=0;
ratio=1;
while(1) %the first loop, when ratio<eps, loop stop;
%select the first point;
%--------------------------------------------------------------------------
if times==0 %(mod(times,10)==0) %first selection or calculation;
i1=1; %select the point in random, so chose 1;
else %if this is not the first selection:
%----------------------------------------------------------------------
%when other selection:
%1, choose the (0,C) alpha which break the KKT conditions;
%2, then alpha=0 and alpha=C points which break the KKT conditions;
%3, if alpha choosen is in the last of the squene, re-start from 1;
%----------------------------------------------------------------------
%{
if ratio>=ratio_old
times=0;
else
ratio_old=ratio;
end
%}
%if ratio>ratio_old %这里的用意是:如果ratio没有取得进展,那么重复上一轮循环,直到取得进展为止;
%ratio_old=ratio;
while (in<=N) %search in all (0,C)alpha; 遍历界内违反KKT条件的所有点;
%could I simplize the alpha parameters as below?
if (abs(a(in))<C) && (a(in)~=0)
ain=abs(y(in)-FX(in))-e;
if abs(ain)>eps
i1=in;
presv=1;
end
end
in=in+1; %if we don't found the break KKT condition SV, continue...
if (presv) %we've found SV and not exceed the NS times;
break;
end
end
if presv==0 %if we don't found SV which break KKT condition, or, the times_i1 out of NS times;
in=1;
while (ic<=N) %遍历所有管道外的点;
%if the alpha on the boundry,i.e. a=0 or C;
if abs(a(ic))==C %寻找那些不满足KKT条件的点;实际上这些点都能符合这些条件;因此这个循环似乎作用不大;只能这么说了,每次迭代之后,这些值都会变化,所以还是有帮助的;
aic=abs(y(ic)-FX(ic))-e;%-efxt(ic);%-efxt(ic);%
if aic<-eps %(aic>eps)||(aic<-eps)
i1=ic; %if break the KKT rule, i1=i;
presv=1; %we found a SV already;
end
end
ic=ic+1;
if (presv)
break;
end
end
end
if presv==0
while (i0<=N)
%if the alpha on the boundry,i.e. a=0 or C;
if a(i0)==0
ai0=abs(y(i0)-FX(i0))-e;
if ai0>eps
i1=i0; %if break the KKT rule, i1=i;
presv=1; %we found a SV already;
end
end
i0=i0+1;
if (presv)
break;
end
end
end
if (presv==0) %if we didn't found a SV which break KKT;
ic=1;
i0=1;
i1=floor(rand*N)+1;
end
presv=0; %back to the initial value;i.e.no sv found;
%end
end
%--------------------------------------------------------------------------
%{
--------------------------------------------------------
now I have the first point, and search the second point.
--------------------------------------------------------
%}
times_2=0; %how many times of internal loop?
i2old=i1; %the initial value of i2;
while(1) %the important question is how to
%stop the loop and this is internal
%loop, too.
if times==0 %(mod(times,10)==0) %at the first time, most points have;every 5 loops, we calculate all data points.
%a(i)=0;
if i2old<N %i2old from i1:N; i.e. 1:N.
i2=i2old+1; %i2 from 2:N+1;
i2old=i2; %remember the choice;
end
else %at the other loops,there should be
%some points a(i)>0;
%--------------------------------------------------------------------------
%第二个点的寻找才是费脑筋的活,这里需要寻找界内的点,然后,尽可能寻找对对偶目标
%贡献大的点;
%可以通过计算每次对delta_W的贡献,并且记录下来完成这个活动;
%这种方法被称为“好吃你就多吃一点法”;
%也可以通过计算对偶间隙的大小来选择合适的点;
%虽然运算一般来说都有简洁的形式,但在一开始,必须穷尽所有可能性;
%对偶间隙的大小,这种方法效果不大好,取消了;
%--------------------------------------------------------------------------
max_delta=0;
min_E=E(i1);
max_E=E(i1);
%found_f=0;
if times_2<=NS_1
if E(i1)>=0
for i=1:N
if delta_W(i1,i)<=0 && a(i)~=0 && abs(a(i))<C %根据这个方法,所有界内的点基本上都遍历了一遍;给那些遍历过后,对偶函数反而下降的点一次机会;
if (E(i)<min_E) %&&(i~=i2old_x)
min_E=E(i);
i2=i;
%found_f=1;
end
end
end
else
for i=1:N
if delta_W(i1,i)<=0 && a(i)~=0 && abs(a(i))<C %&& 不管大小,正负;
if (E(i)>max_E) %&&(i~=i2old_x)
max_E=E(i);
i2=i;
%found_f=1;
end
end
end
end
else
for i=1:N
if a(i)~=0 && abs(a(i))<C %然后再界内选择最优点,继续,但是这里就有问题了,
%需不需要选择管道外的点??如果不需要,那么内循环
%应该是2×NS_1次就可以了,多运算无益;刚才试过了,
%没有效果,内循环必须有足够的次数;
if delta_W(i1,i)>max_delta && i~=i1;
max_delta=delta_W(i1,i);
i2=i;
%found_f=1;
%times_2=0;
end
end
end
end
end
times_2=times_2+1; %totally N for times=0;
if times_2>NS %+1 %finish one complete loop;
break;
end
totaltimes=totaltimes+1; %total times of calculation;开始运算了,计数加一;
%---------------------------------------------------------------
%now I have two points, start the calculation;
%---------------------------------------------------------------
%{
if i2>=max_i %the i2 should larger than max_i;
max_i=i2; %update the value of max_i;
else %if the i2 less than max_i;
max_i=1; %max_i go back to 1;
times=1; %times increased since the first loop
%is complete;
break; %jump out the loop;
end
%}
%now we get the i2, so we could start the optimization;
%select the second point;
%SMO optimization algorithm;
%{
possible parameters;
SV(i); collect all support vectors into one group and calculate the E1&E2
using these elements;
x1,x2;
x_sv(i) and y_sv(i) compare to the SV(i);
---------------------------------------------
maybe we don't need to collect the SV group.
we just choose the {a(i)>0}is okay,
when the algorithm stops, we could update the
SVs and give the final decision hyper plane.
---------------------------------------------
a1,a2;
a2new,a2new-unc,a2old;a1new,a1old;
y1,y2;
E1,E2;
L,H;
K11,K22,K12;
%}
%and we need the kernel function;
%{
K(x,z)=exp(-1/(2*sigma^2)*norm(x-z)^2);
%}
%--------------------------------------------------------------------------
%
%RUN the SMO algorithm
%
%--------------------------------------------------------------------------
x1=X(i1,:);
x2=X(i2,:);
y1=y(i1);
y2=y(i2);
a1old=a(i1);
a2old=a(i2);
E1=E(i1); %this value got from memory;
E2=E(i2); %ditto;
star=a1old+a2old;
%FX1=FX(i1);
FX1old=FX(i1);
%FX2=FX(i2);
FX2old=FX(i2);
bold=b;
K11=1;%K(x1,x1); for Guass kernel function;
K22=1;%K(x2,x2); ditto;
if Gram(i1,i2)~=0
K12=Gram(i1,i2);
else
K12=K(x1,x2,si);
Gram(i1,i2)=K12;
Gram(i2,i1)=K12;
end
k=K11+K22-2*K12; %parameter couldn't be 0 when it worked as divider.
if k==0 %the possibility is rare.
k=0.01;
end
delta=2*e/k;
a2new=a2old+(E1-E2)/k;
a1new=star-a2new;
if (a1new*a2new<0)
if (abs(a2new)>=delta)||(abs(a1new)>=delta)
a2new=a2new-sign(a2new)*delta;
else
judcon=abs(a2new)-abs(a1new);
a2new=(sign(judcon)+1)/2*star;
end
end
L=max(star-C,-C);
H=min(C,star+C);
a2new=min(max(a2new,L),H);
a1new=star-a2new;
if times~=0 && L>=H %first time, we should let L=H=0;
break;
end
if a1new>C
a1new=C;
end
if a1new<-C
a1new=-C;
end
%a1new=max(0,a1new);
%{
if abs(a2new-a2old)<(eps*(a2new+a2old-eps)) %if the difference is little, jump out of the loop;
break;
end
%}
a(i1)=a1new; %now we could update the{a(i)}i=1toN
a(i2)=a2new; %update the {a(i)};
%now we starts update the bias;
%--------------------------------------------------------------------------
%bold=b;
%{
a1e=a1old-a1new;%-a1old;
%a1e_2=a1e*K12;
a2e=a2old-a2new;%-a2old;
%a2e_2=a2e*K12;
b1new=-E1+a1e+a2e*K12+b;
b2new=-E2+a1e*K12+a2e+b;
%bnew=(b1new+b2new)/2;
if abs(a1new)<C && a1new~=0 %if a1new is in the bounds;
b=b1new;
else
if abs(a2new)<C && a2new~=0
b=b2new;
else
%if (a1new==0||abs(a1new)==C)&&(a2new==0||abs(a2new)==C)&&(L~=H)
b=(b1new+b2new)/2;
%end
end
end
%}
%--------------------------------------------------------------------------
%try to update the FX1 and FX2 by faster method;
%--------------------------------------------------------------------------
%{
if abs(a1new)<C && a1new~=0
FX1=FX1+(a2new-a2old)*K12+a1new-a1old;
else
FX1=0;
for i=1:N
if a(i)~=0
if Gram(i,i1)~=0;
Ki1=Gram(i,i1);
else
Ki1=K(X(i,:),x1,si);
Gram(i,i1)=Ki1;
Gram(i1,i)=Ki1;
end
FX1=FX1+a(i)*Ki1;
end
end
end
if abs(a2new)<C && a2new~=0
FX2=FX2+(a1new-a1old)*K12+a2new-a2old;
else
FX2=0;
for i=1:N
if a(i)~=0
if Gram(i,i2)~=0;
Ki2=Gram(i,i2);
else
Ki2=K(X(i,:),x2,si);
Gram(i,i2)=Ki2;
Gram(i2,i)=Ki2;
end
FX2=FX2+a(i)*Ki2;
end
end
end
%}
FX1=0;
FX2=0;
for i=1:N
if a(i)~=0
if Gram(i,i1)~=0;
Ki1=Gram(i,i1);
else
Ki1=K(X(i,:),x1,si);
Gram(i,i1)=Ki1;
Gram(i1,i)=Ki1;
end
FX1=FX1+a(i)*Ki1;
if Gram(i,i2)~=0;
Ki2=Gram(i,i2);
else
Ki2=K(X(i,:),x2,si);
Gram(i,i2)=Ki2;
Gram(i2,i)=Ki2;
end
FX2=FX2+a(i)*Ki2;
end
end
%ve=a*Gram(:,i1);
%ve2=a*Gram(:,i2);
%}
if (a2new>0) && (a2new<C)%abs(a2new)<C && a2new~=0
b=y2-FX2-e;
else
if (a2new<0) && (a2new>-C)
b=y2-FX2+e;
else
if (a1new>0) && (a1new<C)%abs(a1new)<C && a1new~=0 %if a1new is in the bounds;
b=y1-FX1-e;
else
if (a1new<0) && (a1new>-C)
b=y1-FX1+e;
else
%if (a1new==0||abs(a1new)==C)&&(a2new==0||abs(a2new)==C)&&(L~=H)
b=(y1-FX1+y2-FX2)/2;
%end
end
end
end
end
FX(i1)=FX1+b; %FX=SUM ai*yi*K(xi,xj)-b;
FX(i2)=FX2+b;
E(i1)=FX(i1)-y1; %store the E(i) into the E matrix;
E(i2)=FX(i2)-y2;
%这里是对偶目标引起的变化,还是有点作用的;
delta_W(i1,i2)=a1new*(FX1-y1-0.5*a1new)-a1old*(FX1old-y1-0.5*a1old-bold)+a2new*(FX2-y2-0.5*a2new)-a2old*(FX2old-y2-0.5*a2old-bold)+K12*(a1old*a2old-a1new*a2new)+e*(abs(a1new)-abs(a1old)+abs(a2new)-abs(a2old));
%delta_W(i1,i2)=abs(delta_W(i1,i2));
%delta_W(i2,i1)=delta_W(i1,i2);%??????
%%觉得i1和i2,如果两个点相同,但是次序不一样,可能引起的对偶函数变化也不尽相同,所以这里要拿出来;不能混为一谈;效果一般,可以接受;
%{
E1=E(i1);
E2=E(i2);
b1new=E1+a1e+a2e*K12+bold;
b2new=E2+a1e*K12+a2e+bold;
b=(b1new+b2new)/2;
%}
%N, the number of training set;
%now we calculate the stop formula and judge whether the algorithm should
%stop.
end
%---------------------------------------------------------------
%one internal loops complete, times++ for external loop;
%---------------------------------------------------------------
%C=max(a)+1;
%--------------------------------------------------------------------------
% I hope that calculation could be complete less than (n-1)*(n-2)/2 times;
%--------------------------------------------------------------------------
if totaltimes>2500*N
fprintf( 'how many knives?\n');
fprintf( '-----------------------------------------\n' );
break;
end
times=times+1; %times++, i.e. outer loop ++; 内循环结束后,times+1,
%{
if times>=N %i.e. we don't want C(N-1,2) times calculation;
times=1;
ot=ot+1;
end
if ot>2 %how many times of C(N-1,2) operated;
fprintf('out of times ...\n');
fprintf('-----------------------------------------\n');
break;
end
%}
%do we need update the E(i) and FX(i)? I think so. FX(i) is neccesary, E(i)
%don't need.
%now I hesitate.
i=1;
l=0;
j=0;
x_sv=zeros(NS,m0);
%统计所有的支持向量;
for i=1:N
if a(i)~=0
l=l+1;
SV(l)=a(i);
y_sv(l)=y(i);
x_sv(l,:)=X(i,:);
ptr(l)=i; %remember the pointer;
if abs(a(i))~=C; %how many SVs which in the bonds.(0,C)
j=j+1;
end
end
end
NS=l; %the number of support vectors.
NS_1=j; %所有管道壁上的支持向量;
%{
if NS<=(N/10)
p=0;
for j=1:NS;
for i=1:NS;
d_max=norm(x_sv(:,i)-x_sv(:,j));
if d_max>p;
p=d_max;
end
end
end
d_max=p;
si=d_max^2/(2*NS);
end
%}
%{
lold=1;
while (a(lold)==C)
lold=lold+1;
end
FV=0;
for l=1:NS
FV=FV+SV(l)*y_sv(l)*K(x_sv(:,l),x_sv(:,lold));
end
b=FV-y_sv(lold);
lold=lold+1;
if lold>NS
lold=1;
end
%}
%计算所有的SVM输出;
FX=zeros(1,N);
for i=1:N;
%if a(i)~=0;
for l=1:NS
if Gram(ptr(l),i)~=0;
Kli=Gram(ptr(l),i);
else
Kli=K(x_sv(l,:),X(i,:),si);
Gram(ptr(l),i)=Kli;
Gram(i,ptr(l))=Kli;
end
FX(i)=FX(i)+SV(l)*Kli;
end
%end
end
sv_x=x_sv; %this vector for demostation.
x_sv=zeros(NS,m0); %clear past data for preparation.
FX_old=FX;
%--------------------------------------------------------------------------
%update the bias; average all |a|<C and a~=0;
%--------------------------------------------------------------------------
i=0;
bias=0;
for l=1:NS;
if SV(l)>0 && SV(l)<C
i=i+1;
bias=bias+y_sv(l)-FX(ptr(l))-e;
end
if SV(l)<0 && SV(l)>-C %这里居然出现了一个大bug,实在是太不仔细了!
i=i+1;
bias=bias+y_sv(l)-FX(ptr(l))+e;
end
end
if i~=0;
b=bias/i;
end
FX=FX+b*ones(1,N);
E=FX-y;
efxt=max(0,abs(E)-e);
%{
figure;
plot(FX);
hold on;
plot(y,'r.');
%}
%{
for i=1:N
efxt(i)=max(0,abs(E(i))-e);
end
%}
%{
if a(i)>=0
efxt(i)=efxt(i);
else
efxt(i)=-efxt(i);
end
%}
%-------------------------------------
%the new criteria: meet the KKT or not
%20140204 the effect is not good.
%think about the C-SVM method.
%-------------------------------------
%{
meetKKT=0;
i=1;
while i<=N
jg=y(i)*FX(i);
if a(i)>0
if jg~=1;
meetKKT=1;
end
else
if a(i)==0
if jg<1
meetKKT=1;
end
end
end
if (meetKKT) %if break the KKT rule, quit the loop;
%i.e. need more loops.
break;
end
i=i+1;
end
fprintf('Now we can see whether we could stop calculation or not...\n');
if (meetKKT)
fprintf('NOT now\n');
else
fprintf('Complete.\n');
break;
end
%}
%--------------------------------------------------------------
%Now we revise the stop criteria.
%对于回归问题,停机条件肯定要改的;原目标和对偶目标都已经不一致了;
%--------------------------------------------------------------
%{
%停机条件1;
AAK_1=0; %a(i)>0 or a(i)=-C;
AAK_2=0; %-C<a(i)<0;
%AAK_3=0; %a(i)=-C;
AAK=0;
alpha_e=0; %alpha(i)*e;
alpha_b_y=0; %alpha(i)*(b-y(i));
for i=1:N
if a(i)~=0
for j=1:N
if (a(j)~=0)
if Gram(i,j)~=0
Kij=Gram(i,j);
else
Kij=K(X(i,:),X(j,:),si);
Gram(i,j)=Kij;
Gram(j,i)=Kij;
end
if (a(i)>0)||(a(i)==-C)
AAK_1=AAK_1+a(i)*a(j)*Kij;
else
if (a(i)<0) && (a(i)>-C)
AAK_1=AAK_1-a(i)*a(j)*Kij;
end
end
end
end
end
end
%{
for i=1:N
if (a(i)<0) && (a(i)>-C)
for j=1:N
if (a(j)~=0)
if Gram(i,j)~=0
Kij=Gram(i,j);
else
Kij=K(X(i,:),X(j,:),si);
Gram(i,j)=Kij;
Gram(j,i)=Kij;
end
AAK_2=AAK_2+a(i)*a(j)*Kij;
end
end
end
end
AAK_2=-AAK_2; %符号问题这里先解决;到最后全部加总就可以了;
%}
%{
for i=1:N
if a(i)==-C
for j=1:N
if (a(j)~=0)
if Gram(i,j)~=0
Kij=Gram(i,j);
else
Kij=K(X(i,:),X(j,:),si);
Gram(i,j)=Kij;
Gram(j,i)=Kij;
end
AAK_3=AAK_3+a(i)*a(j)*Kij;
end
end
end
end
%}
for i=1:N
if a(i)==-C
alpha_e=alpha_e+C*e;
else
alpha_e=alpha_e+a(i)*e;
end
end
for i=1:N
if (a(i)>0) || (a(i)==-C)
alpha_b_y=alpha_b_y+a(i)*(b-y(i));
else
if (a(i)<0) && (a(i)>-C)
alpha_b_y=alpha_b_y+a(i)*(y(i)-b);
end
end
end
for i=1:N
if (a(i)~=0)
for j=1:N
if (a(j)~=0)
if Gram(i,j)~=0
Kij=Gram(i,j);
else
Kij=K(X(i,:),X(j,:),si);
Gram(i,j)=Kij;
Gram(j,i)=Kij;
end
AAK=AAK+a(i)*a(j)*Kij;
end
end
end
end
sumay=a*y';
sumabsa=sum(abs(a));
sume=C*sum(efxt);
Gap=AAK_1+alpha_e+alpha_b_y+sume;%+AAK_2
%}
%停机条件2;
%Gap=0;
%alpha_e=0;
Gap=abs(a)*(e-abs(FX-y))';%_old-y))';%??这里是有疑问的,究竟是采用FX还是FX_old?要重新查找一下对偶间隙的问题;
%if we use the FX_old, the result is not good, though the runtime is small;
%so, we must use the FX; i.e. there is
%no bias in the formula;
%{
for i=1:N
Gap=Gap+abs(a(i))*(e-abs(FX(i)-y(i)));
end
%}
%{
for i=1:N
if a(i)>0 || a(i)==-C
Gap=Gap+a(i)*(FX(i)-y(i));
else
if a(i)<0 && a(i)>-C
Gap=Gap+a(i)*(y(i)-FX(i));
end
end
end
for i=1:N
if a(i)==-C
alpha_e=alpha_e+C*e;
else
alpha_e=alpha_e+a(i)*e;
end
end
%}
%AAK=0;
%以下是代码优化;
AAK_1=0;
for i=1:NS
for j=1:NS
if Gram(ptr(i),ptr(j))~=0
Kij=Gram(ptr(i),ptr(j));
else
Kij=K(sv_x(i,:),sv_x(j,:),si);
Gram(ptr(i),ptr(j))=Kij;
Gram(ptr(j),ptr(i))=Kij;
end
AAK_1=AAK_1+SV(i)*SV(j)*Kij;
end
end
%{
for i=1:N
if (a(i)~=0)
for j=1:N
if (a(j)~=0)
if Gram(i,j)~=0
Kij=Gram(i,j);
else
Kij=K(X(i,:),X(j,:),si);
Gram(i,j)=Kij;
Gram(j,i)=Kij;
end
AAK=AAK+a(i)*a(j)*Kij;
end
end
end
end
%}
sumay=a*y';
sumabsa=sum(abs(a));
sume=C*sum(efxt);
%Gap=Gap+alpha_e;
Gap_raw=Gap+sume;
Gap=abs(Gap_raw);
ratio=Gap/abs(Gap_raw+sumay-e*sumabsa-0.5*AAK_1+1);%(sume+0.5*AAK+1);%(Gap+sumay-e*sumabsa-0.5*AAK+1);%(sumay-e*sumabsa-0.5*AAK+1);
%ratio=(sume+sumay-e*sumabsa)/(sume+0.5*AAK+1);
%dynamically tune the threshold;相当于是慢慢的收拢管道;
if ratio<0.08 && e_f==0
e=(1e-3)*8;
e_f=0.5;
eps=e/2;
else
if ratio<0.04 && e_f==0.5
e=(1e-3)*5;
e_f=1;
eps=e/2;
else
if ratio<0.01 && e_f==1
e=(1e-3)*2;
e_f=2;
eps=e/2;
else
if ratio<0.005 && e_f==2
e=(1e-3);
e_f=3;
eps=e/2;
%{
else
if ratio<0.005 && e_f==3
e=(1e-3);
e_f=4;
eps=e/2;
end
%}
end
end
end
end
if ratio<0.001 && e_f==3
break;
end
%}
%{
%停机条件3;
W=0;
AAK=0;
for i=1:N
if (a(i)~=0)
for j=1:N
if (a(j)~=0)
if Gram(i,j)~=0
Kij=Gram(i,j);
else
Kij=K(X(i,:),X(j,:),si);
Gram(i,j)=Kij;
Gram(j,i)=Kij;
end
AAK=AAK+a(i)*a(j)*Kij;
end
end
end
end
sumay=a*y';
sumabsa=sum(abs(a));
sume=C*sum(efxt);
%W=suma-0.5*AAYYK-e*sumabsa;
ratio=(AAK+e*sumabsa-sumay+sume)/(0.5*AAK+sume+1);
%}
%ratio=(0.5*AAK+1/N*sume/C+sumay-e*sumabsa)/(AAK+1/N*sume/C+1);
%((suma-2*W+sume)/(suma-W+sume+1));
%fprintf('Now we can see the RATIO...\n');
fprintf('Ratio = %f\n',ratio);
if (ratio<thre)
break; %if ratio meet the stop threshold, loop
%stops.
end
%{
if (ratio<ratio_old)
sv_f=sv_x;
NS_f=NS;
SV_f=SV;
ratio_old=ratio;
end
%}
%{
if times>NS_1 %if no progress, let working set as all points;
times=0;
end
%}
end
%{
for i=1:N;
%if a(i)~=0;
for l=1:NS
if Gram(ptr(l),i)~=0;
Kli=Gram(ptr(l),i);
else
Kli=K(sv_x(l,:),X(i,:),si);
Gram(ptr(l),i)=Kli;
Gram(i,ptr(l))=Kli;
end
FX(i)=FX(i)+SV(l)*Kli;
end
%end
end
%sv_x=x_sv; %this vector for demostation.
%x_sv=zeros(NS,20); %clear past data for preparation.
FX=FX+b*ones(1,N);
%}
hold on;
plot(FX,'b.');
%E_y=E./(10+y)*100;
%{
p=0;
for i=1:NS;
for j=1:NS;
di(j)=norm(sv_x(i,:)-sv_x(j,:));
if di(j)>p;
p=di(j);
end
end
end
si=p^2/(2*NS);
%}
for i=1:N;
for j=1:m0;
X(i,j)=A(i+a1-N-del_d,j);
%X(i,j)=v(i+j-1+N);
end
end
for i=1:N;
if i<N
y(i)=A(i+a1-N-del_d+1,qwe);
else
y(i)=0;
end
end
figure;
plot(y,'r.');
hold on;
%{
sv_x=sv_f;
NS=NS_f;
SV=SV_f;
%}
FX=zeros(1,N);
for i=1:N;
%if a(i)~=0;
for l=1:NS
%{
if Gram(ptr(l),i)~=0;
Kli=Gram(ptr(l),i);
else
Kli=K(sv_x(l,:),X(i,:),si);
Gram(ptr(l),i)=Kli;
Gram(i,ptr(l))=Kli;
end
%}
Kli=K(sv_x(l,:),X(i,:),si);
FX(i)=FX(i)+SV(l)*Kli;
end
%end
end
%sv_x=x_sv; %this vector for demostation.
%x_sv=zeros(NS,20); %clear past data for preparation.
FX=FX+b*ones(1,N);
plot(FX,'b.');
%E_1=FX-y;
%E_y_1=E_1./(10+y)*100;
%figure;
%plot(E_y);
%figure;
%plot(E_y_1,'r');
fprintf('run time = %4.2f seconds\n',cputime-st);
FX_pre=miu_a(1)+max_a(1)*FX(N);
%{
y_pre=miu_a(1)+max_a(1)*y(N)
err=FX_pre-y_pre
ratio_e=err/y_pre
%}
HS_300(round)=FX_pre;
end
stock_index=median(HS_300)
%--------------------------------------------------------------------------
%2014/03/06
%calculating the error between the prediction and real value;
%
%
%--------------------------------------------------------------------------
数据是从某炒股软件中下载的,存为.mat文件,以下是链接:
链接:https://pan.baidu.com/s/1Ux3IF1CTrHeLdQt2Ivm_MA 密码:fzvn
发财的时候,记得回来踩踩!