独立成分分析ICA

#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include<time.h>
#include<io.h>
#include<direct.h>//目录控制头文件
#include<iostream>
using namespace std;

#define WIN 20     //窗口大小
#define ASSCODE 5 //编码长度

const int FOLD= 5;  //交叉验证迭代次数
const int M_num=50;//主成分个数
double W_AUC;//最大AUC
double AUC;//临时AUC

double BestThreshold;
double sen,spe,ppv,npv,acc,cc,sen1,spe1,ppv1,npv1,acc1,cc1;

void write_m(char* filename, double* a, int r, int c)
{
	FILE *fp;
	fp=fopen(filename,"w");
	for(int i=0;i<r;i++)
	{
		for(int j=0;j<c;j++)
			fprintf(fp,"%lf ",a[i*c+j]);
		fprintf(fp,"\n");
	}
	fclose(fp);
}
void write_mm(char* filename, double** a, int r, int c)
{
	FILE *fp;
	fp=fopen(filename,"w");
	for(int i=0;i<r;i++)
	{
		for(int j=0;j<c;j++)
			fprintf(fp,"%lf ",a[i][j]);
		fprintf(fp,"\n");
	}
	fclose(fp);
}
void file_pre_train(int block)//功能:将裂解样本和非裂解样本,分为9:1份,即校正集和预测集
{
	char a[32];
	FILE *cleafp,*noncleafp;
	if((cleafp=fopen("indata\\cleavage.txt","r"))==NULL )
	{
		printf("cannot open cleavage.txt\n");
		getchar();		return;
	}
	if((noncleafp=fopen("indata\\noncleavage.txt","r"))==NULL )
	{
		printf("cannot open noncleavage\n");
		getchar();		return;
	}
	FILE *trainfp,*prefp;
	if((trainfp=fopen("out\\train.txt","w"))==NULL )//训练
	{
		printf("cannot write_m train\n");
		getchar();		return;
	}
	if((prefp=fopen("out\\predict.txt","w"))==NULL )//预测
	{
		printf("cannot open infile\n");
		getchar();		return;
	}
	for(int i=0;i<1160;i++)
	{
	  fread(a,31,1,cleafp);//裂解样本
	  if((i/116)==block)
		fwrite(a,31,1,prefp);  //预测样本		
	  else
		fwrite(a,31,1,trainfp);  //训练样本
	}
	for(int i=0;i<1160;i++)
	{
	  fread(a,31,1,noncleafp); //非裂解样本
	  if((i/116)==block)
		 fwrite(a,31,1,prefp);
	  else
		 fwrite(a,31,1,trainfp);
	}

	fclose(cleafp);
	fclose(noncleafp);
	fclose(trainfp);
	fclose(prefp);
}
void file_pre()//功能:将裂解样本和非裂解样本合并为预测数据out\\PredictData.txt
{
	char a[32];
	int i,j;
	int c=31;
	int r=2320;
	FILE *pre_fp;
	FILE *cleafp,*noncleafp;
	if((cleafp=fopen("indata\\cleavage.txt","r"))==NULL )
	{
		printf("file_pre() cannot open cleavage.txt\n");
		getchar();		return;
	}
	if((noncleafp=fopen("indata\\noncleavage.txt","r"))==NULL )
	{
		printf("file_pre cannot open noncleavage\n");
		getchar();		return;
	}
	if((pre_fp=fopen("out\\PredictData.txt","w"))==NULL )//预测数据
	{
		printf("cannot open infile\n");
		getchar();		return;
	}
	/*size_t fread( void *buffer,size_t size,size_t count,FILE *stream );*/
	for(i=0;i<1160;i++)
	{
	   fread(a,sizeof( char ),31,cleafp);
	   fwrite(a,sizeof( char ),31,pre_fp);
	}

	for(i=0;i<1160;i++)
	{
       fread(a,sizeof( char ),31,noncleafp);
	   fwrite(a,sizeof( char ),31,pre_fp);
	}
	fclose(cleafp);
	fclose(noncleafp);
	fclose(pre_fp);
}

//编码变换
void code(FILE *pre_fp,double *x,double *y,int r,int c)  //pre_fp——train。txt
{
	int i,j,k;

	char *pat,*temp;

	pat=(char *) calloc(r*WIN,sizeof(char));

	//将对应窗口大小的数据x存储进数组中,以及裂解值y
	for(i=0;i<r;i++)
	{
		for(k=0;k<(28-WIN)/2;k++)
			fscanf(pre_fp,"%c",&temp);

		for(j=0;j<WIN;j++)
			fscanf(pre_fp,"%c",&pat[i*WIN+j]);//按窗口读取

		for(k=0;k<(28-WIN)/2;k++)
			fscanf(pre_fp,"%c",&temp);

		for(j=0;j<1;j++)
		    fscanf(pre_fp,"%lf",&y[i*1+j]);  //读入label
		fgetc(pre_fp);

	 }
	 fclose(pre_fp);

	//编码变换二进制
	for(i=0;i<r;i++)
	{
	     for(k=0;k<c;k++)
		     x[i*c+k]=0.0;

	     for(j=0;j<WIN;j++)
		 {
		      if ( ASSCODE==3)
		{
			switch(pat[i*WIN+j])
			{
				case'A':x[i*c+j*ASSCODE +0]= 0.07;x[i*c+j*ASSCODE +1]=-1.73;x[i*c+j*ASSCODE +2]= 0.09;break;
				case'C':x[i*c+j*ASSCODE +0]= 0.71;x[i*c+j*ASSCODE +1]=-0.97;x[i*c+j*ASSCODE +2]= 4.13;break;
				case'D':x[i*c+j*ASSCODE +0]= 3.64;x[i*c+j*ASSCODE +1]= 1.13;x[i*c+j*ASSCODE +2]= 2.36;break;
				case'E':x[i*c+j*ASSCODE +0]= 3.08;x[i*c+j*ASSCODE +1]= 0.39;x[i*c+j*ASSCODE +2]=-0.07;break;
				case'F':x[i*c+j*ASSCODE +0]=-4.92;x[i*c+j*ASSCODE +1]= 1.30;x[i*c+j*ASSCODE +2]= 0.45;break;
				case'G':x[i*c+j*ASSCODE  +0]= 2.23;x[i*c+j*ASSCODE  +1]=-5.36;x[i*c+j*ASSCODE  +2]= 0.30;break;
				case'H':x[i*c+j*ASSCODE  +0]= 2.41;x[i*c+j*ASSCODE  +1]= 1.74;x[i*c+j*ASSCODE  +2]= 1.11;break;
				case'I':x[i*c+j*ASSCODE  +0]=-4.44;x[i*c+j*ASSCODE  +1]=-1.68;x[i*c+j*ASSCODE  +2]=-1.03;break;
				case'K':x[i*c+j*ASSCODE  +0]= 2.84;x[i*c+j*ASSCODE  +1]= 1.41;x[i*c+j*ASSCODE  +2]=-3.14;break;
				case'L':x[i*c+j*ASSCODE  +0]=-4.19;x[i*c+j*ASSCODE  +1]=-1.03;x[i*c+j*ASSCODE  +2]=-0.98;break;
				case'M':x[i*c+j*ASSCODE  +0]=-2.49;x[i*c+j*ASSCODE  +1]=-0.27;x[i*c+j*ASSCODE  +2]=-0.41;break;
				case'N':x[i*c+j*ASSCODE  +0]= 3.22;x[i*c+j*ASSCODE  +1]= 1.45;x[i*c+j*ASSCODE  +2]= 0.84;break;
				case'P':x[i*c+j*ASSCODE  +0]=-1.22;x[i*c+j*ASSCODE  +1]= 0.88;x[i*c+j*ASSCODE  +2]= 2.23;break;
				case'Q':x[i*c+j*ASSCODE  +0]= 2.18;x[i*c+j*ASSCODE  +1]= 0.53;x[i*c+j*ASSCODE  +2]=-1.14;break;
				case'R':x[i*c+j*ASSCODE  +0]= 2.88;x[i*c+j*ASSCODE  +1]= 2.52;x[i*c+j*ASSCODE  +2]=-3.44;break;
				case'S':x[i*c+j*ASSCODE  +0]= 1.96;x[i*c+j*ASSCODE  +1]=-1.63;x[i*c+j*ASSCODE  +2]= 0.57;break;
				case'T':x[i*c+j*ASSCODE  +0]= 0.92;x[i*c+j*ASSCODE  +1]=-2.09;x[i*c+j*ASSCODE  +2]=-1.40;break;
				case'V':x[i*c+j*ASSCODE  +0]=-2.69;x[i*c+j*ASSCODE  +1]=-2.53;x[i*c+j*ASSCODE  +2]=-1.29;break;
				case'W':x[i*c+j*ASSCODE  +0]=-4.75;x[i*c+j*ASSCODE  +1]= 3.65;x[i*c+j*ASSCODE  +2]= 0.85;break;
				case'Y':x[i*c+j*ASSCODE  +0]=-1.39;x[i*c+j*ASSCODE  +1]= 2.32;x[i*c+j*ASSCODE +2]= 0.01;break;
				default:break;
			}

		}
		if(ASSCODE == 4)
		{
			switch(pat[i*WIN+j])
			{
			case'A':x[i*c+j*ASSCODE  + 0] = -0.840;x[i*c+j*ASSCODE  + 1] = -0.524;x[i*c+j*ASSCODE  + 2] = -0.348;x[i*c+j*ASSCODE  + 3] = -0.911;break;
			case'C':x[i*c+j*ASSCODE  + 0] = 0.579;x[i*c+j*ASSCODE  + 1] = -1.674;x[i*c+j*ASSCODE  + 2] = 0.840;x[i*c+j*ASSCODE  + 3] = -1.056;break;
			case'D':x[i*c+j*ASSCODE  + 0] = -0.091;x[i*c+j*ASSCODE  + 1] = 2.048;x[i*c+j*ASSCODE  + 2] = 1.589;x[i*c+j*ASSCODE  + 3] = -0.185;break;
			case'E':x[i*c+j*ASSCODE  + 0] = 0.495;x[i*c+j*ASSCODE  + 1] = 2.083;x[i*c+j*ASSCODE  + 2] = 1.354;x[i*c+j*ASSCODE  + 3] = 0.120;break;
			case'F':x[i*c+j*ASSCODE  + 0] = 1.796;x[i*c+j*ASSCODE  + 1] = -1.911;x[i*c+j*ASSCODE  + 2] = -0.156;x[i*c+j*ASSCODE  + 3] = 0.789;break;
			case'G':x[i*c+j*ASSCODE  + 0] = -1.883;x[i*c+j*ASSCODE  + 1] = -0.440;x[i*c+j*ASSCODE  + 2] = -0.373;x[i*c+j*ASSCODE  + 3] = -0.907;break;
			case'H':x[i*c+j*ASSCODE  + 0] = 0.857;x[i*c+j*ASSCODE  + 1] = 1.302;x[i*c+j*ASSCODE  + 2] = -0.212;x[i*c+j*ASSCODE  + 3] = -0.354;break;
			case'I':x[i*c+j*ASSCODE  + 0] = 1.536;x[i*c+j*ASSCODE  + 1] = -2.023;x[i*c+j*ASSCODE  + 2] = -0.360;x[i*c+j*ASSCODE  + 3] = -0.453;break;
			case'K':x[i*c+j*ASSCODE  + 0] = 0.974;x[i*c+j*ASSCODE  + 1] = 2.882;x[i*c+j*ASSCODE  + 2] = -0.888;x[i*c+j*ASSCODE  + 3] = 0.413;break;
			case'L':x[i*c+j*ASSCODE  + 0] = 1.401;x[i*c+j*ASSCODE  + 1] = -1.638;x[i*c+j*ASSCODE  + 2] = -0.232;x[i*c+j*ASSCODE  + 3] = -0.263;break;
			case'M':x[i*c+j*ASSCODE  + 0] = 1.127;x[i*c+j*ASSCODE  + 1] = -1.158;x[i*c+j*ASSCODE  + 2] = -0.192;x[i*c+j*ASSCODE  + 3] = 0.071;break;
			case'N':x[i*c+j*ASSCODE  + 0] = -0.211;x[i*c+j*ASSCODE  + 1] = 0.963;x[i*c+j*ASSCODE  + 2] = 0.039;x[i*c+j*ASSCODE  + 3] = -0.019;break;
			case'P':x[i*c+j*ASSCODE  + 0] = -0.495;x[i*c+j*ASSCODE  + 1] = -0.057;x[i*c+j*ASSCODE  + 2] = -0.937;x[i*c+j*ASSCODE  + 3] = 0.714;break;
			case'Q':x[i*c+j*ASSCODE  + 0] = 0.267;x[i*c+j*ASSCODE  + 1] = 1.027;x[i*c+j*ASSCODE  + 2] = 0.044;x[i*c+j*ASSCODE  + 3] = 0.360;break;
			case'R':x[i*c+j*ASSCODE  + 0] = 1.744;x[i*c+j*ASSCODE  + 1] = 2.899;x[i*c+j*ASSCODE  + 2] = -1.134;x[i*c+j*ASSCODE  + 3] = -0.110;break;
			case'S':x[i*c+j*ASSCODE  + 0] = -0.954;x[i*c+j*ASSCODE  + 1] = 0.332;x[i*c+j*ASSCODE  + 2] = -0.164;x[i*c+j*ASSCODE  + 3] = -0.584;break;
			case'T':x[i*c+j*ASSCODE  + 0] = -0.399;x[i*c+j*ASSCODE  + 1] = 0.234;x[i*c+j*ASSCODE  + 2] = -0.093;x[i*c+j*ASSCODE  + 3] = -0.272;break;
			case'V':x[i*c+j*ASSCODE  + 0] = 0.603;x[i*c+j*ASSCODE  + 1] = -1.534;x[i*c+j*ASSCODE  + 2] = -0.386;x[i*c+j*ASSCODE  + 3] = -0.440;break;
			case'W':x[i*c+j*ASSCODE  + 0] = 2.707;x[i*c+j*ASSCODE  + 1] = -1.413;x[i*c+j*ASSCODE  + 2] = -0.112;x[i*c+j*ASSCODE  + 3] = 1.781;break;
			case'Y':x[i*c+j*ASSCODE  + 0] = 2.655;x[i*c+j*ASSCODE  + 1] = -0.792;x[i*c+j*ASSCODE  + 2] = 1.562;x[i*c+j*ASSCODE  + 3] = 0.569;break;
			case'*':x[i*c+j*ASSCODE  + 0] = -11.869;x[i*c+j*ASSCODE  + 1] = -0.605;x[i*c+j*ASSCODE  + 2] = 0.158;x[i*c+j*ASSCODE  + 3] = 0.738;break;
			default:break;
			}
		}
		if(ASSCODE == 5)
		{
			switch(pat[i*WIN+j])
			{
			case'A':x[i*c+j*ASSCODE  + 0] = -1.359;x[i*c+j*ASSCODE  + 1] = -1.680;x[i*c+j*ASSCODE  + 2] = -0.531;x[i*c+j*ASSCODE  + 3] = -1.186;x[i*c+j*ASSCODE  + 4] = 0.045;break;
			case'C':x[i*c+j*ASSCODE  + 0] = 1.340;x[i*c+j*ASSCODE  + 1] = -2.070;x[i*c+j*ASSCODE  + 2] = 1.226;x[i*c+j*ASSCODE  + 3] = -1.221;x[i*c+j*ASSCODE  + 4] = -0.844;break;
			case'D':x[i*c+j*ASSCODE  + 0] = -1.820;x[i*c+j*ASSCODE  + 1] = 1.850;x[i*c+j*ASSCODE  + 2] = 2.418;x[i*c+j*ASSCODE  + 3] = -0.561;x[i*c+j*ASSCODE  + 4] = -0.116;break;
			case'E':x[i*c+j*ASSCODE  + 0] = -1.104;x[i*c+j*ASSCODE  + 1] = 2.371;x[i*c+j*ASSCODE  + 2] = 2.047;x[i*c+j*ASSCODE  + 3] = -0.140;x[i*c+j*ASSCODE  + 4] = -0.020;break;
			case'F':x[i*c+j*ASSCODE  + 0] = 3.283;x[i*c+j*ASSCODE  + 1] = -1.073;x[i*c+j*ASSCODE  + 2] = -0.349;x[i*c+j*ASSCODE  + 3] = 0.970;x[i*c+j*ASSCODE  + 4] = 0.221;break;
			case'G':x[i*c+j*ASSCODE  + 0] = -2.885;x[i*c+j*ASSCODE  + 1] = -2.463;x[i*c+j*ASSCODE  + 2] = -0.555;x[i*c+j*ASSCODE  + 3] = -0.882;x[i*c+j*ASSCODE  + 4] = 0.565;break;
			case'H':x[i*c+j*ASSCODE  + 0] = 0.259;x[i*c+j*ASSCODE  + 1] = 1.737;x[i*c+j*ASSCODE  + 2] = -0.484;x[i*c+j*ASSCODE  + 3] = -0.287;x[i*c+j*ASSCODE  + 4] = -1.297;break;
			case'I':x[i*c+j*ASSCODE  + 0] = 2.893;x[i*c+j*ASSCODE  + 1] = -1.543;x[i*c+j*ASSCODE  + 2] = -0.675;x[i*c+j*ASSCODE  + 3] = -0.636;x[i*c+j*ASSCODE  + 4] = -0.904;break;
			case'K':x[i*c+j*ASSCODE  + 0] = -1.199;x[i*c+j*ASSCODE  + 1] = 3.887;x[i*c+j*ASSCODE  + 2] = -1.287;x[i*c+j*ASSCODE  + 3] = 0.444;x[i*c+j*ASSCODE  + 4] = -0.381;break;
			case'L':x[i*c+j*ASSCODE  + 0] = 2.491;x[i*c+j*ASSCODE  + 1] = -1.154;x[i*c+j*ASSCODE  + 2] = -0.449;x[i*c+j*ASSCODE  + 3] = -0.495;x[i*c+j*ASSCODE  + 4] = -0.531;break;
			case'M':x[i*c+j*ASSCODE  + 0] = 1.839;x[i*c+j*ASSCODE  + 1] = -0.732;x[i*c+j*ASSCODE  + 2] = -0.355;x[i*c+j*ASSCODE  + 3] = -0.042;x[i*c+j*ASSCODE  + 4] = -0.123;break;
			case'N':x[i*c+j*ASSCODE  + 0] = -1.370;x[i*c+j*ASSCODE  + 1] = 0.799;x[i*c+j*ASSCODE  + 2] = 0.203;x[i*c+j*ASSCODE  + 3] = -0.480;x[i*c+j*ASSCODE  + 4] = 0.875;break;
			case'P':x[i*c+j*ASSCODE  + 0] = -1.059;x[i*c+j*ASSCODE  + 1] = -0.600;x[i*c+j*ASSCODE  + 2] = -1.386;x[i*c+j*ASSCODE  + 3] = 0.592;x[i*c+j*ASSCODE  + 4] = 1.612;break;
			case'Q':x[i*c+j*ASSCODE  + 0] = -0.734;x[i*c+j*ASSCODE  + 1] = 1.301;x[i*c+j*ASSCODE  + 2] = 0.229;x[i*c+j*ASSCODE  + 3] = -0.038;x[i*c+j*ASSCODE  + 4] = 0.967;break;
			case'R':x[i*c+j*ASSCODE  + 0] = -0.505;x[i*c+j*ASSCODE  + 1] = 4.325;x[i*c+j*ASSCODE  + 2] = -1.750;x[i*c+j*ASSCODE  + 3] = 0.138;x[i*c+j*ASSCODE  + 4] = -0.850;break;
			case'S':x[i*c+j*ASSCODE  + 0] = -2.027;x[i*c+j*ASSCODE  + 1] = -0.656;x[i*c+j*ASSCODE  + 2] = -0.187;x[i*c+j*ASSCODE  + 3] = -0.936;x[i*c+j*ASSCODE  + 4] = 0.555;break;
			case'T':x[i*c+j*ASSCODE  + 0] = -1.249;x[i*c+j*ASSCODE  + 1] = -0.338;x[i*c+j*ASSCODE  + 2] = -0.073;x[i*c+j*ASSCODE  + 3] = -0.498;x[i*c+j*ASSCODE  + 4] = 0.735;break;
			case'V':x[i*c+j*ASSCODE  + 0] = 1.336;x[i*c+j*ASSCODE  + 1] = -1.684;x[i*c+j*ASSCODE  + 2] = -0.663;x[i*c+j*ASSCODE  + 3] = -0.587;x[i*c+j*ASSCODE  + 4] = -0.499;break;
			case'W':x[i*c+j*ASSCODE  + 0] = 4.237;x[i*c+j*ASSCODE  + 1] = 0.352;x[i*c+j*ASSCODE  + 2] = -0.225;x[i*c+j*ASSCODE  + 3] = 2.115;x[i*c+j*ASSCODE  + 4] = 0.857;break;
			case'Y':x[i*c+j*ASSCODE  + 0] = 3.507;x[i*c+j*ASSCODE  + 1] = 0.588;x[i*c+j*ASSCODE  + 2] = 2.324;x[i*c+j*ASSCODE  + 3] = 0.681;x[i*c+j*ASSCODE  + 4] = 0.322;break;
			case'*':x[i*c+j*ASSCODE  + 0] = -5.358;x[i*c+j*ASSCODE  + 1] = -3.217;x[i*c+j*ASSCODE  + 2] = 0.521;x[i*c+j*ASSCODE  + 3] = 3.050;x[i*c+j*ASSCODE  + 4] = -1.189;break;
			default:break;
			}
		}
            if (ASSCODE==20){
		     switch(pat[i*WIN+j])
			 {
              case'A':x[i*c+j*ASSCODE+0]=1;break;
              case'C':x[i*c+j*ASSCODE+1]=1;break;
		      case'D':x[i*c+j*ASSCODE+2]=1;break;
		      case'E':x[i*c+j*ASSCODE+3]=1;break;
		      case'F':x[i*c+j*ASSCODE+4]=1;break;
		      case'G':x[i*c+j*ASSCODE+5]=1;break;
		      case'H':x[i*c+j*ASSCODE+6]=1;break;
		      case'I':x[i*c+j*ASSCODE+7]=1;break;
		      case'K':x[i*c+j*ASSCODE+8]=1;break;
		      case'L':x[i*c+j*ASSCODE+9]=1;break;
		      case'M':x[i*c+j*ASSCODE+10]=1;break;
		      case'N':x[i*c+j*ASSCODE+11]=1;break;
		      case'P':x[i*c+j*ASSCODE+12]=1;break;
		      case'Q':x[i*c+j*ASSCODE+13]=1;break;
		      case'R':x[i*c+j*ASSCODE+14]=1;break;
		      case'S':x[i*c+j*ASSCODE+15]=1;break;
		      case'T':x[i*c+j*ASSCODE+16]=1;break;
		      case'V':x[i*c+j*ASSCODE+17]=1;break;
		      case'W':x[i*c+j*ASSCODE+18]=1;break;
		      case'Y':x[i*c+j*ASSCODE+19]=1;break;
			  default:break;
			 }
            }

		 }

    }

}
void normalize(double *x,int r,int c)//数据标准化
{
	int i,j;
	double m=0,v=0;

	double *aver;//均值
	if ((aver=(double *) calloc(c*1, sizeof(double))) == NULL) printf("aver is wrong");
	double *covar;//方差
	if ((covar=(double *) calloc(c*1, sizeof(double))) == NULL) printf("covar is wrong");
    for(j=0;j<c;j++)
	   {
	      aver[j]=0.0;
		  for(i=0;i<r;i++)
			  aver[j]+=x[i*c+j];
		  aver[j]/=r;
	   }

	for(j=0;j<c;j++)
	  {
		  covar[j]=0.0;
		  for(i=0;i<r;i++)
	         {
			    x[i*c+j]=x[i*c+j]-aver[j];
	            covar[j]+=pow(x[i*c+j],2);
	          }
          covar[j]=sqrt(covar[j]/(r-1));
	  }

	for(j=0;j<c;j++)
	{
		for(i=0;i<r;i++)
			x[i*c+j]=(x[i*c+j]-aver[j])/covar[j];
	}
	return;
}

void normalize2(double **x,int r,int c,double *aver,double *covar)//数据标准化
{
	int i,j;
	double m=0,v=0;

    for(j=0;j<c;j++)
	   {
	      aver[j]=0.0;
		  for(i=0;i<r;i++)
			  aver[j]+=x[i][j];
		  aver[j]/=r;
	   }

	for(j=0;j<c;j++)
	  {
		  covar[j]=0.0;
		  for(i=0;i<r;i++)
	         {
			    x[i][j]=x[i][j]-aver[j];
	            covar[j]+=pow(x[i][j],2);
	          }
          covar[j]=sqrt(covar[j]/(r-1));
	  }

	for(j=0;j<c;j++)
	{
		for(i=0;i<r;i++)
			x[i][j]=(x[i][j]-aver[j])/covar[j];
	}
	return;
}
double* cov(double *x,int r,int c,double *cov_x)///??
{
	int i,k;
	for(int j=0;j<c;j++)
	   {
		  for(k=0;k<c;k++)
		     {
			    cov_x[j*c+k]=0;
			    for(i=0;i<r;i++)
			       cov_x[j*c+k]+=x[i*c+j]*x[i*c+k];
			    cov_x[j*c+k]/=(r-1);
		     }
	   }
	return 0;
}

void householder(double** m, int n, double *d, double *e)
{
	int l,k,j,i;
	double scale, hh, h, g, f;

	for(i=n-1;i>0;i--)
	{
		l=i-1;
		h=scale=0;
		if(l>0)
		{
			for(k=0;k<l+1;k++)
			{
				scale+=fabs(m[i][k]);
			}
			if(scale==0.0)
			{
				e[i]=m[i][l];
			}
			else
			{
				for(k=0;k<l+1;k++)
				{
					m[i][k]/=scale;			//	m[i][k]=m[i][k]/scale;
					h+=m[i][k]*m[i][k];
				}
				f=m[i][l];
				g=(f>=0.0?-sqrt(h):sqrt(h));  //alpha
				e[i]=scale*g;
				h-=f*g;
				m[i][l]=f-g;
				f=0.0;

				for(j=0;j<l+1;j++)
				{
					m[j][i]=m[i][j]/h;
					g=0.0;
					for(k=0;k<j+1;k++)
					{
						g+=m[j][k]*m[i][k];
					}
					for(k=j+1;k<l+1;k++)
					{
						g+=m[k][j]*m[i][k];
					}
					e[j]=g/h;
					f+=e[j]*m[i][j];
				}
				hh=f/(h+h);
				for(j=0;j<l+1;j++)
				{
					f=m[i][j];
					e[j]=g=e[j]-hh*f;
					for(k=0;k<j+1;k++)
					{
						m[j][k]-=( f*e[k]+g*m[i][k] );
					}
				}
			}
		}
		else
		{
			e[i]=m[i][l];
		}
		d[i]=h;
	}

	d[0]=0.0;
	e[0]=0.0;

	for(i=0;i<n;i++)
	{
		l=i;
		if(d[i]!=0.0)
		{
			for(j=0;j<l;j++)
			{
				g=0.0;
				for(k=0;k<l;k++)
				{
					g+=m[i][k]*m[k][j];
				}
				for(k=0;k<l;k++)
				{
					m[k][j]-=g*m[k][i];
				}
			}
		}
		d[i]=m[i][i];
		m[i][i]=1.0;
		for(j=0;j<l;j++)
		{
			m[j][i]=m[i][j]=0.0;
		}
	}
}
void ql(double** z, int n,  double* d, double* e)
{
	int m,l,iter,i,k;
	double s,r,p,g,f,dd,c,b;
	for(i=1;i<n;i++)
		e[i-1]=e[i];
	e[n-1]=0.0;
	for(l=0;l<n;l++)
	{
		iter=0;
		do
		{
			for(m=l;m<n-1;m++)
			{
				dd=fabs(d[m])+fabs(d[m+1]);
				if(fabs(e[m])+dd==dd)  break;
			}
			if(m!=l)
			{
				if(iter++==30) 	return;
				g=(d[l+1]-d[l])/(e[l]+e[l]);
				r=sqrt(g*g+1.0);
				g=d[m]-d[l]+e[l]/( g + g>0?(r>0?r:-r):(r>0?-r:r) );
				s=c=1.0;
				p=0.0;
				for(i=m-1;i>=l;i--)
				{
					f=s*e[i];
					b=c*e[i];
					e[i+1]=(r=sqrt(f*f+g*g));
					if(r==0.0)
					{
						d[i+1]-=p;
						e[m]=0.0;
						break;
					}
					s=f/r;  c=g/r;  g=d[i+1]-p;	r=(d[i]-g)*s+2.0*c*b;  d[i+1]=g+(p=s*r);  g=c*r-b;
					for(k=0;k<n;k++)
					{
						f=z[k][i+1];
						z[k][i+1]=s*z[k][i]+c*f;
						z[k][i]=c*z[k][i]-s*f;
					}
				}
				if(r==0.0 && i>=l)  continue;
				d[l]-=p;  e[l]=g;  e[m]=0.0;
			}
		}while(m!=l);
	}

}
对特征值特征向量排序
void order(double** q, double* val, int c) 
{
	for(int n=0;n<c-1;n++)
	{
		for(int m=n+1;m<c;m++)
		{
			if(val[n]< val[m] )
			{
				swap(val[n], val[m]);           //特征值特征向量排序
				for(int k=0;k<c;k++)
					swap(q[k][n],q[k][m]);
			}
		}
	}
	double sum=0.0;
	double sum1=0.0;
	for(int i=0;i<c;i++)
	{
	  sum+=val[i];
	}

	for(int p=0;p<c;p++)
	{
		sum1+=val[p];
		double t=sum1/sum;
		if(t>=0.80&&t<=0.85)
		printf("主成分占得比例:%.3f  取主成分个数:%d\n",t,p);
	}

}

/
void eigen(double *covar,int c,double *val)
{
	int i,j;

	double** Rx=new double*[c]; //Rx<-->covar  二维与一维的转换
	for(i=0;i<c;i++) Rx[i]=new double[c];

	for(i=0;i<c;i++)
		for(j=0;j<c ;j++)
		  Rx[i][j]=covar[i*c+j];

	double* e=new double[c]; //临时向量
	householder(Rx,c,val,e);
	ql(Rx,c,val,e);
	delete []e;
	order(Rx,val,c);
 	for(i=0;i<c;i++)
		for(j=0;j<c ;j++)
		  covar[i*c+j]=Rx[i][j];
}
void whiten(double *x,double *d,int row,int col,double *L)
{
   int i,j;

   double *L1;//白化阵
   if ((L1=(double *) calloc(row*col, sizeof(double))) == NULL) printf("L1 is wrong");

   for(i=0;i<col;i++)
	  for(j=0;j<row;j++)
		 L1[i*row+j]=x[j*col+i]/sqrt(d[i]);

   for(int i=0;i<row;i++)//转置
	   for(int j=0;j<col;j++)
		   L[i*col+j]=L1[j*row+i];
}
void mult(double *a,double *b,int r1,int c1,int c2,double **x)
{
	int i,j,k;
	for(i=0;i<r1;i++)
	{
		for(j=0;j<c2;j++)
		{
			x[i][j]=0;
			for(k=0;k<c1;k++)
				x[i][j]+=a[i*c1+k]*b[k*c2+j];
		}
	}
}
void mult1(double *a,double **b,int r1,int c1,int c2,double **x)
{
	int i,j,k;
	for(i=0;i<r1;i++)
	{
		for(j=0;j<c2;j++)
		{
			x[i][j]=0;
			for(k=0;k<c1;k++)
				x[i][j]+=a[i*c1+k]*b[k][j];
		}
	}
}
void mult2(double **a,double **b,int r1,int c1,int c2,double **x)
{
	int i,j,k;
	for(i=0;i<r1;i++)
	{
		for(j=0;j<c2;j++)
		{
			x[i][j]=0;
			for(k=0;k<c1;k++)
				x[i][j]+=a[i][k]*b[k][j];
		}
	}

}

void permute1(double** q, double* d, int c)
{
	double sum=0.0;
	double sum1=0.0;
	for(int n=0;n<c-1;n++)
	{
		for(int m=n+1;m<c;m++)
		{
			if(d[n]<d[m] )
			{
				swap(d[n],d[m]);
				for(int k=0;k<c;k++) swap(q[k][n],q[k][m]);
			}
		}
	}
}
double** orthnormalize(double** w1,int q)
{
	double** w=new double*[q];//以下为对W1的正交标准化,求得W2
	for(int i=0;i<q;i++) w[i]=new double[q];

	double** w1p=new double*[q];//以下为对W1的正交标准化,求得W2
	for(int i=0;i<q;i++) w1p[i]=new double[q];

	for(int i=0;i<q;i++)
		for(int j=0;j<q;j++)
			w1p[i][j]=w1[j][i];

	mult2(w1,w1p,q,q,q,w);

	double* d=new double[q];
	double* e=new double[q];

	householder(w,q,d,e);
	ql(w,q,d,e);
	permute1(w,d,q);

	double** B=new double*[q];
	for(int i=0;i<q;i++) B[i]=new double[q];

	for(int i=0;i<q;i++)
	{
		for(int j=0;j<q;j++)
		{
			B[i][j]=0;
			for(int k=0;k<q;k++)
			{
				B[i][j]+=w[i][k]*w[j][k]/sqrt(d[k]);
			}
		}
	}

    double** w2=new double*[q];//W2
	for(int i=0;i<q;i++) w2[i]=new double[q];

	mult2(B,w1,q,q,q,w2);

	for(int i=0;i<q;i++) delete[]w[i];
	    delete[] w;
	    w=NULL;
	for(int i=0;i<q;i++) delete[]w1p[i];
	    delete[] w1p;
	    w1p=NULL;
	for(int i=0;i<q;i++) delete[]B[i];
	    delete[] B;
	    B=NULL;

	delete[]d;
	delete[]e;

	return w2;
}
double** G(double** a, int r, int c)
{
	double** ga=new double*[r];
	for(int i=0;i<r;i++) ga[i]=new double[c];

	for(int i=0;i<r;i++)
	{
		for(int j=0;j<c;j++)
		{
			ga[i][j]=tanh(a[i][j]);
		}
	}
	return ga;
}

double** GP(double** a, int r, int c)
{
	double** gga=new double*[r];
	for(int i=0;i<r;i++) gga[i]=new double[c];

	for(int i=0;i<r;i++)
	{
		for(int j=0;j<c;j++)
		{
			gga[i][j]=1-tanh(a[i][j])*tanh(a[i][j]);
		}
	}
	return gga;
}
void setvalue(double** from,double** to, int c)
{
	for(int i=0;i<c;i++)
	{
		for(int j=0;j<c;j++)
		{
			to[i][j]=from[i][j];
		}
	}
}

double** w_function(double **Z,int r,int q)
{
	int i,j,k;
	double** W1=new double*[q];//随机阵
	for(i=0;i<q;i++) W1[i]=new double[q];

	double** W2=new double*[q];//随机阵
	for(i=0;i<q;i++) W2[i]=new double[q];

	for(i=0;i<q;i++)
		for(j=0;j<q;j++)
			W1[i][j]=(rand()%1000000)/1000000.0;
	//write_mm("W1.txt",W1,q,q);

	double** X=new double*[r];
	for(i=0;i<r;i++) X[i]=new double[q];

	W2=orthnormalize(W1,q);
	//write_mm("W2.txt",W2,q,q);

	setvalue(W2,W1,q);

	//以下循环求得W3
	int iter=0;
	double zelta=0;
	do
	{
		iter++;

		setvalue(W2,W1,q);//旧的等于新值,迭代
		mult2(Z,W1,r,q,q,X);
		//write_mm("X.txt",X,r,q);
		double** g=G(X,r,q);
		double** gp=GP(X,r,q);

		for(j=0;j<q;j++)
		{
			for(k=0;k<q;k++)
			{
				W2[j][k]=0;
				for(i=0;i<r;i++)//均值
					W2[j][k]+=(Z[i][j]*g[i][k]-W1[j][k]*gp[i][k]);
				W2[j][k]/=r;
			}
		}

		for(i=0;i<r;i++) delete[]X[i];
	    delete[] X;
	    X=NULL;
		for(i=0;i<r;i++) delete[]g[i];
	    delete[] g;
	    g=NULL;
		for(i=0;i<r;i++) delete[]gp[i];
	    delete[] gp;
	    gp=NULL;

	    orthnormalize(W2,q);
		setvalue(W2,W1,q);//交换值


	    double sum1=0;//判断zelta大小
	    double sum2=0;

	    for(i=0;i<q;i++)
	       {
		    for(j=0;j<q;j++)
		       {
			      sum1+=(W2[i][j]-W1[i][j])*(W2[i][j]-W1[i][j]);
			      sum2+=W1[i][j]*W1[i][j];
		       }
	       }
	    zelta=sqrt(sum1/sum2/q/q);
	}while( zelta>1e-7 && iter<300 );
	return W2;
}
int test(double *coef1,double *coef,int q)
{
	int i,count=0;
	double temp,temp1=0.0;
	for(i=0;i<q;i++)
	{
		temp=coef1[i]-coef[i];
		coef[i]=coef1[i];
		temp1+=temp*temp;
	}
	temp=sqrt(temp1);
	if(temp>1e-10)
		return 1;
	else
	    return 0;
}
double coefficient(double **x,double *y,double *aver_x,double *covar_x,double aver_y,double covar_y,double *coef,double b0,int r,int q)
{
	int i,j,k;

	int error;

	double *train_x1;
	double *train_y1;

	if((train_x1=(double *) calloc(r*q,sizeof(double)))==NULL) printf("the train_x1 is wrong");
	if((train_y1=(double *) calloc(r*1,sizeof(double)))==NULL) printf("the train_y1 is wrong");

	for(i=0;i<q;i++)
		for(j=0;j<q;j++)
		    {
			  train_x1[i+j*q]=0.0;
			  for(k=0;k<r;k++)
                 train_x1[i*q+j]+=x[k][i]*x[k][j];
		     }

    for(i=0;i<q;i++)
	{
		for(k=0;k<r;k++)
            train_y1[i]+=x[k][i]*y[k];
	 }

	double *coef1;

	coef1=(double *) calloc(q, sizeof(double));

	 for(i=0;i<q;i++)
	 {
		 coef1[i]=0;
	     coef[i]=0;
	 }

	 do
	 {
	    for(i=0;i<q;i++)
		{
	       double temp=0.0;
		   for(j=0;j<q;j++)
		   if(i!=j)
			  temp+=train_x1[i*q+j]*coef1[j];
		   temp=train_y1[i]-temp;
		   coef1[i]=temp/train_x1[i+i*q];
		}
		error=test(coef1,coef,q);
	 }while(error);


	 double temp=0.0;

	 for(j=0;j<q;j++)
	    {
			coef[j]*=covar_y/covar_x[j];
		    temp+=coef[j]*aver_x[j];
	        b0=aver_y-temp;
        }
	return b0;
}


int result(FILE *outfp,double *test_y,double *predictvalue,int r,double th)//评价参数
{
	int i,j;
    double  a,b,c,d,e;


	double tn=0;
	double tp=0;
	double fp=0;
	double fn=0;

	for(i=0;i<r;i++)
		{

			if(test_y[i]==1.0)
			{
				if(predictvalue[i]<th)
					fn+=1;
				else
					tp+=1;
			}
			if(test_y[i]==0.0)
			{
				if(predictvalue[i]<th)
					tn+=1;
				else
					fp+=1;
			}
		}

	a=tp+fn;
	b=tn+fp;
	c=tp+fp;
	d=tn+fn;
	e=tp+fp+tn+fn;

	sen=tp/a;
	spe=tn/b;
	ppv=tp/c;
	npv=tn/d;
	acc=(tp+tn)/e;
	cc=(tp*tn-fn*fp)/sqrt((tn+fn)*(tn+fp)*(tp+fn)*(tp+fp));


	if(fabs(sen-spe)<0.1)
	{
	   fprintf(outfp,"\nth       tn      fp      tp      fn          sen         spe          ppv        npv       acc        cc     \n");
	   fprintf(outfp,"%-4.3lf    %-4.0lf    %-4.0lf    %-4.0lf    %-4.0lf    %lf    %lf    %lf    %lf   %lf    %lf    \n",th,tn,fp,tp,fn,sen,spe,ppv,npv,acc,cc );
	}
	else
		return 0;

   return 0;
}

int AUC_curve(double *test_y,double *predictvalue,int r)
{
	int i,j,k=0;
    double  a,b,c,d,e;
	AUC=0;
	double *Sen,*Spe;
	Sen=(double *)calloc(r,sizeof(double));//创建数组
	Spe=(double *)calloc(r,sizeof(double));
	double *threshold=new double[r];
	double *se_add_sp=new double[r];
	double *accuracy=new double[r];//正确率
	double *cc=new double[r];//相关系数
	double tempPrey=0;
	double tempy=0;

	for (i=0;i<r-1;i++)
	{
		for (j=i+1;j<r;j++)
		{
			if (predictvalue[i]<predictvalue[j])
			{
				tempPrey=predictvalue[i];
				predictvalue[i]=predictvalue[j];
				predictvalue[j]=tempPrey;

				tempy=test_y[i];
				test_y[i]=test_y[j];
				test_y[j]=tempy;
			}
		}
	}

	double temptheshold=0;
	double sen1=0,spe1=0;
	double sen2=0,spe2=0;

	for(i=0;i<r;i++) //计算sp  se
	{
		if(i!=(r-1))
			
		{
			temptheshold=(predictvalue[i]+predictvalue[i+1])/2.0;
			threshold[i]=temptheshold;
		}
		else
		{
			temptheshold=predictvalue[i]-0.5;
			threshold[i]=temptheshold;
		}
		double tn=0;
		double tp=0;
		double fp=0;
		double fn=0;

		for(j=0;j<r;j++)//计数循环
		{
			if(test_y[j]==1.0)
			{
				if(predictvalue[j]<temptheshold)
					fn+=1;
				else
					tp+=1;
			}

			if(test_y[j]==0.0)
			{
				if(predictvalue[j]<temptheshold)
					tn+=1;
				else
					fp+=1;
			}
		}
		a=tp+fn;  //真正 总数
		b=tn+fp;  //负类 的总个数
		c=tp+fp;
		d=tn+fn;
		e=tp+fp+tn+fn;

		sen1=tp/a;
		spe1=tn/b;   //
		se_add_sp[i]=sen1+spe1;

		spe1=1-spe1; //最后sp等于1-sp

		sen1=sen1;
		AUC += (spe1-spe2)*(sen1+sen2)/2.0;  //计算AUC
		sen2=sen1;Sen[i]=sen1;
		spe2=spe1;Spe[i]=spe1;
		accuracy[i]=(tp+tn)/e;//正确率
		cc[i]=(tp*tn-fn*fp)/sqrt(a*b*c*d);//相关系数
	}

	FILE *auc=fopen("out\\all_AUC.txt","a");
	fprintf(auc,"se\t1-sp\n");

	for(i=0;i<r;i++)
		fprintf(auc,"%lf %lf\n",Sen[i],Spe[i]); //打印  sp,se

	fprintf(auc,"AUC=%lf\n",AUC);
	fclose(auc);
	if(W_AUC<AUC)
	{
		W_AUC=AUC;
		FILE *auc_fp=fopen("out\\best_AUC.txt","w");
	 
		fprintf(auc_fp,"se \t\t1-sp\t\tthreshold\tse_add_sp\taccuracy\tcc\n");
		double tempac=0.0;
		for(i=0;i<r;i++)
		{
			fprintf(auc_fp,"%lf \t%lf \t%lf \t%lf \t%lf \t%lf\n",Sen[i],Spe[i],threshold[i],se_add_sp[i],accuracy[i],cc[i]); //打印sp,se
			if(accuracy[i]>tempac)
			{
				tempac=accuracy[i];
				BestThreshold=threshold[i];
				
			}
		}
		printf("BestAUC=%lf, BestAC=%lf, BestThreshold= %lf\n",W_AUC,tempac,BestThreshold);
		fprintf(auc_fp,"AUC=%lf\n",AUC);
		fclose(auc_fp);
	}
	return 0;
}

int accessfile()
{
	/*检查文件夹是否存在,不存在则创建*/
	if( (_access( "indata", 0 )) != -1 )  
	{  
		printf( "读取数据文件夹 indata 存在\n" );  
	}else{
		printf( "数据文件目录 indata 不存在,先创建目录放入数据后在运行\n" ); 
		return -1;
	}

	if( (_access( "out", 0 )) != -1 )  
	{  
		printf( "输出文件夹 out 存在\n" );  
	}else
	{
		if( _mkdir( "out" ) == 0 )
		{
			printf( "Directory 'out' was successfully created\n" );
		}
		else
			printf( "Problem creating directory 'out'\n" );
	}
}
int main()
{
	if(accessfile()==-1)return -1;
	/*记录开始时间*/
	struct  tm *newtime;
	time_t  long_time,timebegin,timeend;
	timebegin=time(NULL);
	time(&long_time);// 等价于 long_time=time(NULL);
	newtime=localtime( &long_time );
	printf( "%s", asctime( newtime ));
	W_AUC=0.0;
	
	//对输入文件指针赋值
	remove("out\\all_AUC.txt");

	int row_train=2088;//校正集行数
    int row_all=2320;//留一交叉验证,校正部分行数

	int rcount=ASSCODE*WIN; //也是100
	int column=rcount;
  
 

	double b0=0;//常数项
	double C=0;//主链贡献率

	int i,j,k;

	double *train_x;  //训练集 样本
	double *train_y;

	if((train_x=(double *) calloc(row_train*column,sizeof(double)))==NULL)
		printf("the train_x is wrong");
	if((train_y=(double *) calloc(row_train,sizeof(double)))==NULL) 
		printf("the train_y is wrong");

	double *aver_xx;//独立成分的均值
	if ((aver_xx=(double *) calloc(column, sizeof(double))) == NULL)
		printf("aver_xx is wrong");
	double *covar_xx;//训练样本每一列的方差
	if ((covar_xx=(double *) calloc(column, sizeof(double))) == NULL) 
		printf("covar_xx is wrong");

	double *covar;//方差阵
	if ((covar=(double *) calloc(column*column, sizeof(double))) == NULL) printf("covar is wrong");

	double *eigval=new double[column];//正交阵的特征值
 
	double *L;//白化阵转置后得到
	if((L=(double*) calloc(column*M_num,sizeof(double)))==NULL) printf("L is wrong");

	double *test_x;   //测试集 预测集
	double *test_y;

	if((test_x=(double *) calloc(row_all*column,sizeof(double)))==NULL) printf("the test_x is wrong");
	if((test_y=(double *) calloc(row_all,sizeof(double)))==NULL) printf("the test_y is wrong");

	//double** Tmain=new double*[row_train];//主成份
	//for(i=0;i<row_train;i++) Tmain[i]=new double[column];

	double** Z=new double*[row_train];//白化数据
	for(i=0;i<row_train;i++) Z[i]=new double[M_num];

	double** W2=new double*[M_num];//w
	for(i=0;i<M_num;i++) W2[i]=new double[M_num];

	double** Y=new double*[row_train];//独立成分
	for(i=0;i<row_train;i++) Y[i]=new double[M_num];


	double *aver_x;//独立成分的均值
	if ((aver_x=(double *) calloc(M_num*1, sizeof(double))) == NULL) printf("aver_x is wrong");
	double *covar_x;//独立成分的方差
	if ((covar_x=(double *) calloc(M_num*1, sizeof(double))) == NULL) printf("covar_x is wrong");

	double aver_y=0,covar_y=0;

	double** B=new double*[column];//解混矩阵
	for(i=0;i<column;i++) 
		B[i]=new double[M_num];

	double *coef;//系数
	coef=(double *) calloc(M_num, sizeof(double));

	double *Coef;//权重系数
	Coef= (double *) calloc(column, sizeof(double));

	double** TEST=new double*[row_all];//解混矩阵处理后的测试集
	for(i=0;i<row_all;i++)		TEST[i]=new double[M_num]; 


	double *predict_value;
	predict_value=(double *) calloc(row_all, sizeof(double));


	 FILE *b_fp;
	 b_fp=fopen("out\\b0.txt","w");
	 fprintf(b_fp,"窗口大小为%d\n",WIN);

	 FILE *outfp;
	 outfp=fopen("out\\parameter.txt","w");
	 fprintf(outfp,"窗口大小为%d\n",WIN);

	for(int it=0;it<FOLD;it++)/交叉验证  FOLD 
	{
        printf("第%d个样本\n",it+1);

		file_pre_train(it );//分类训练集和测试集

		FILE *train_fp=fopen("out\\train.txt","r");
		if(train_fp==NULL)
			printf("fp=null\n" );

		code(train_fp,train_x,train_y,row_train,column);//编码变换

		normalize(train_x,row_train,column);//原始数据标准化:(x-均值)/标准差
		//write_m("train_x.txt",train_x,row_train,c);
		for(i=0;i<column;i++)  //求每一列的标准差
		{
			for(j=0;j<row_train;j++)
				aver_xx[i]+=train_x[j*column+i];
			aver_xx[i]/=row_train;
			for(j=0;j<row_train;j++)
				covar_xx[i]+=pow((train_x[j*column+i]-aver_xx[i]),2);
			covar_xx[i]=sqrt(covar_xx[i]/(row_train-1));
		}
		for(i=0;i<row_train;i++)//IC50值标准化
			aver_y+=train_y[i];
		aver_y/=row_train;
		for(i=0;i<row_train;i++)
			covar_y+=pow((train_y[i]-aver_y),2);
		covar_y=sqrt(covar_y/(row_train-1));
		for(i=0;i<row_train;i++)
			train_y[i]=(train_y[i]-aver_y)/covar_y;
		//printf("%lf%lf\n",aver_y,covar_y);
		//write_m("train_y.txt",train_y,row_train,1);

	
		cov(train_x,row_train,column,covar);//返回协方差阵covar
		//write_m("cov.txt",covar,column,column);

		eigen(covar,column,eigval);//covar:特征向量column:列数,eigval特征值
		//write_m("eigen.txt",covar,column,column);

		//mult(train_x,covar,row_train,column,M_num,Tmain);//主成份T
		//write_mm("zhuchenfen.txt",T,row_train,c);

		whiten(covar,eigval,column,M_num,L);//矩阵L[column,M_num]作为返回值

		//write_m("L3.txt",L,column,M_num);

		mult(train_x,L,row_train,column,M_num,Z);//白化数据Z[row_train,M_num]
		write_mm("白化数据Z.txt",Z,row_train,M_num);

		W2=w_function(Z,row_train,M_num);//单位正交阵W
		//write_mm("W2.txt",W2,q,q);

		mult2(Z,W2,row_train,M_num,M_num,Y);//计算独立成分矩阵
		//write_mm("独立成分Y.txt",Y,row_train,c);

		mult1(L,W2,column,M_num,M_num,B);//计算解混矩阵
		//write_mm("解混矩阵B.txt",B,c,q);

		normalize2(Y,row_train,M_num ,aver_x,covar_x);//独立成分矩阵做标准化
		/*write_mm("Y.txt",Y,row_train,c);
		write_m("aver_x.txt",aver_x,q,1);
		write_m("covar_x.txt",covar_x,q,1);*/

		b0=coefficient(Y,train_y,aver_x,covar_x,aver_y,covar_y,coef,b0,row_train,M_num);
		printf("b0: %lf\n",b0);
		//write_m("系数coef值.txt",coef,1,q);

		fprintf(b_fp,"%lf\n",b0);//记录 b0

		//---预测---//
		file_pre(); //将裂解和非裂解各去1160,合并为一个预测文件,前面的预测是交叉验证预测

		FILE *test_fp=fopen("out\\PredictData.txt","r");      //全部序列 1/0
		int row=0;
		char temparray[35];
		while(!feof(test_fp))
		{
			row++;
			fgets(temparray,35,test_fp);
		}
		printf("predict row=%d\n",row);
		rewind(test_fp);
		code(test_fp,test_x,test_y,row_all,column);

		normalize(test_x,row_all,column);
	

		/*mult1(test_x,B,row_all,c,q,TEST);
		write_mm("TEST_x.txt",TEST,row_all,q);*/
		for(i=0;i<row_all;i++)
	    {
		   for(j=0;j<M_num;j++)
		   {
			  TEST[i][j]=0;
			  for(k=0;k<column;k++)
				  TEST[i][j]+=test_x[i*column+k]*B[k][j];//解混矩阵B
		   }
	    }
		//write_mm("TEST_x.txt",TEST,row_all,q);

	   for(i=0;i<row_all;i++)
		   predict_value[i]=0.0;

	   for(i=0;i<row_all;i++)//预测y=a*x+b 
	   {
			for(j=0;j<M_num;j++)
				predict_value[i]+=TEST[i][j]*coef[j];
			predict_value[i]+=b0;  
	   }

	   write_m("out\\predict_value.txt",predict_value,row_all,1);//记录预测值

       double sum;
	   for(i=0;i<column; i++)
       {
	        sum=0.0 ;
	        for(j=0;j<M_num;j++ )
	        sum+=B[i][j]*coef[j];
	        Coef[i]=sum;
       }
	   for(i=0;i<column;i++)
		   Coef[i]/=covar_xx[i];
	   write_m("out\\权重系数Coef.txt",Coef,column,1);

	   for(i=0;i<column;i++)
	       C-=Coef[i]*aver_xx[i];
	   //printf("主链贡献值const=%lf\n",C);

	   fprintf(outfp,"\n第%d个样本\n",it+1);
	   fprintf(outfp,"主成分个数为%d\n",M_num);

	   double th;
	   for(th=0.4;th<0.7;th+=0.005)
		   result(outfp,test_y,predict_value,row_all,th);       //outfp parameter.txt 文件
                                                                //test——y 末尾0/1串
                                                                //predict_value TEST(row_all*row_all)全部测试集*coef+b0
                                                                //row_all总数2320
                                                                //th 0.4:0.005:0.7	
	   AUC_curve(test_y,predict_value,row_all);    //打印AUC
	   
	}

	/*记录结束时间,并打印到屏幕*/
	timeend=time(NULL);
	/*显示当前时间字符串*/
	time( &long_time );
	newtime = localtime( &long_time );
	printf("\n%s",asctime( newtime ) );
	fprintf(outfp,"%s\n",asctime( newtime ));
	int mm=(int)difftime(timeend,timebegin);
	printf(" The total time you take is: %d:%d:%d\n",mm/3600,mm%3600/60,mm%3600%60);
	fprintf(outfp," The total time you take is: %d:%d:%d\n",mm/3600,mm%3600/60,mm%3600%60);

	return 0;
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值