DPM learn.cc编译

每次做完都不记录,好久不用,回头再弄一次,错误又走一遍。

learn.cc编译生成learn.cc

1. 首先将learn.cc改名为learn.cpp,在VS2010中新建一个空的控制台工程,添加原文件learn.cpp

2. 修改内容

尝试编译,错误提示为:Cannot open include file: 'sys/time.h': No such file or directoryc:\test\dpm_learn\dpm_learn\learn.cpp5

这是linux中的目录格式,直接将第5行的#include <sys/time.h> 改为 #include <time.h>,再次编译,然后就出现了一大堆错误,有以下几点:

(1) windows中的time.h文件中没有结构体timeval的定义,也没有gettimeofday函数,从网上找了一段代码添加到learn.cpp中。

(2) 上一步加入的代码需要添加头文件windows.h,而包含进这个头文件后,就包含了系统中定义的min和max函数,所以需要注释掉learn.cpp中定义的min和max函数,否则出错。

(3) windows中没有drand48和srand48的定义,把网友博客中自己写的这两个函数添加进去。

(4) windows中没有INFINITY的定义,自己定义一个。

(5) main函数中的int buf[labelsize+2];出错,原因是VS中的c++编译器不允许用变量指定数组长度,改为使用new动态分配:

int *buf = new int[labelsize+2]; ,同时,在同一作用域最后delete [] buf;

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <time.h>
#include <errno.h>
#include <fstream>
#include <iostream>
//#include "stdafx.h"
#include <Windows.h>
//#include <iosfwd>


using namespace std;

/*
 * Optimize LSVM objective function via gradient descent.
 *
 * We use an adaptive cache mechanism.  After a negative example
 * scores beyond the margin multiple times it is removed from the
 * training set for a fixed number of iterations.
 */

// Data File Format
// EXAMPLE*
// 
// EXAMPLE:
//  long label          ints
//  blocks              int
//  dim                 int
//  DATA{blocks}
//
// DATA:
//  block label         float
//  block data          floats
//
// Internal Binary Format
//  len           int (byte length of EXAMPLE)
//  EXAMPLE       <see above>
//  unique flag   byte

// number of iterations
#define ITER 10e6

// minimum # of iterations before termination
#define MIN_ITER 5e6

// convergence threshold
#define DELTA_STOP 0.9995

// number of times in a row the convergence threshold
// must be reached before stopping
#define STOP_COUNT 5

// small cache parameters
#define INCACHE 25
#define MINWAIT (INCACHE+25)
#define REGFREQ 20

// error checking
#define check(e) \
(e ? (void)0 : (printf("%s:%u error: %s\n%s\n", __FILE__, __LINE__, #e, strerror(errno)), exit(1)))

// number of non-zero blocks in example ex
#define NUM_NONZERO(ex) (((int *)ex)[labelsize+1])

// float pointer to data segment of example ex
#define EX_DATA(ex) ((float *)(ex + sizeof(int)*(labelsize+3)))

// class label (+1 or -1) for the example
#define LABEL(ex) (((int *)ex)[1])

// block label (converted to 0-based index)
#define BLOCK_IDX(data) (((int)data[0])-1)

// set to 0 to use max-component L2 regularization
// set to 1 to use full model L2 regularization
#define FULL_L2 0

// windows have no INFINITY define
#define INFINITY 0x0FFFFFFF

int labelsize;
int dim;

//参照网上自己写的drand48和srand48函数
#define MNWZ 0x100000000   
#define ANWZ 0x5DEECE66D   
#define CNWZ 0xB16  
static unsigned long long seed = 1;

// a collapsed example is a sequence of examples
struct collapsed {
  char **seq;
  int num;
};

// the two node types in an AND/OR tree
enum node_type { OR, AND };

// set of collapsed examples
struct dataArray {
  collapsed *x;
  int num;
  int numblocks;
  int numcomponents;
  int *blocksizes;
  int *componentsizes;
  int **componentblocks;
  float *regmult;
  float *learnmult;
};

int gettimeofday(struct timeval *tp, void *tzp)
{
    time_t clock;
    struct tm tm;
    SYSTEMTIME wtm;
    GetLocalTime(&wtm);
    tm.tm_year = wtm.wYear - 1900;
    tm.tm_mon = wtm.wMonth - 1;
    tm.tm_mday = wtm.wDay;
    tm.tm_hour = wtm.wHour;
    tm.tm_min = wtm.wMinute;
    tm.tm_sec = wtm.wSecond;
    tm.tm_isdst = -1;

    clock = mktime(&tm);
    tp->tv_sec = clock;
    tp->tv_sec = wtm.wMilliseconds * 1000;
    return(0);
}
double drand48(void)
{
    seed = (ANWZ * seed + CNWZ) & 0xFFFFFFFFFFFFLL;
    unsigned int x = seed >> 16;
    return  ((double)x / (double)MNWZ);
}
//static unsigned long long seed = 1; 
void srand48(unsigned int i)
{
    seed = (((long long int)i) << 16) | rand();
}

// comparison function for sorting examples 
int comp(const void *a, const void *b) {
    // sort by extended label first, and whole example second...
    int c = memcmp(*((char **)a) + sizeof(int),
        *((char **)b) + sizeof(int),
        labelsize * sizeof(int));
    if (c)
        return c;

    // labels are the same  
    int alen = **((int **)a);
    int blen = **((int **)b);
    if (alen == blen)
        return memcmp(*((char **)a) + sizeof(int),
            *((char **)b) + sizeof(int),
            alen);
    return ((alen < blen) ? -1 : 1);
}

// seed the random number generator with an arbitrary (fixed) value
void seed_rand() {
 srand48(3);
}

//static inline double min(double x, double y) { return (x <= y ? x : y); }
//static inline double max(double x, double y) { return (x <= y ? y : x); }

// compute the score of an example
static inline double ex_score(const char *ex, dataArray X, double **w) {
  double val = 0.0;
  float *data = EX_DATA(ex);
  int blocks = NUM_NONZERO(ex);
  for (int j = 0; j < blocks; j++) {
    int b = BLOCK_IDX(data);
    data++;
    double blockval = 0;
    for (int k = 0; k < X.blocksizes[b]; k++)
      blockval += w[b][k] * data[k];
    data += X.blocksizes[b];
    val += blockval;
  }
  return val;
}

// return the value of the object function.
// out[0] : loss on negative examples
// out[1] : loss on positive examples
// out[2] : regularization term's value
double compute_loss(double out[3], double C, double J, dataArray X, double **w) {
  double loss = 0.0;
#if FULL_L2
  // compute ||w||^2
  for (int j = 0; j < X.numblocks; j++) {
    for (int k = 0; k < X.blocksizes[j]; k++) {
      loss += w[j][k] * w[j][k] * X.regmult[j];
    }
  }
#else
  // compute max norm^2 component
  for (int c = 0; c < X.numcomponents; c++) {
    double val = 0;
    for (int i = 0; i < X.componentsizes[c]; i++) {
      int b = X.componentblocks[c][i];
      double blockval = 0;
      for (int k = 0; k < X.blocksizes[b]; k++)
        blockval += w[b][k] * w[b][k] * X.regmult[b];
      val += blockval;
    }
    if (val > loss)
      loss = val;
  }
#endif
  loss *= 0.5;

  // record the regularization term
  out[2] = loss;

  // compute loss from the training data
  for (int l = 0; l <= 1; l++) {
    // which label subset to look at: -1 or 1
    int subset = (l*2)-1;
    double subsetloss = 0.0;
    for (int i = 0; i < X.num; i++) {
      collapsed x = X.x[i];

      // only consider examples in the target subset
      char *ptr = x.seq[0];
      if (LABEL(ptr) != subset)
        continue;

      // compute max over latent placements
      int M = -1;
      double V = -INFINITY;
      for (int m = 0; m < x.num; m++) {
        double val = ex_score(x.seq[m], X, w);
        if (val > V) {
          M = m;
          V = val;
        }
      }

      // compute loss on max
      ptr = x.seq[M];
      int label = LABEL(ptr);
      double mult = C * (label == 1 ? J : 1);
      subsetloss += mult * max(0.0, 1.0-label*V);
    }
    loss += subsetloss;
    out[l] = subsetloss;
  }

  return loss;
}

// gradient descent
void gd(double C, double J, dataArray X, double **w, double **lb, char *logdir, char *logtag) {
  ofstream logfile;
  string filepath = string(logdir) + "/learnlog/" + string(logtag) + ".log";
  logfile.open(filepath.c_str());
  logfile.precision(14);
  logfile.setf(ios::fixed, ios::floatfield);

  int num = X.num;
  
  // state for random permutations
  int *perm = (int *)malloc(sizeof(int)*X.num);
  check(perm != NULL);

  // state for small cache
  int *W = (int *)malloc(sizeof(int)*num);
  check(W != NULL);
  for (int j = 0; j < num; j++)
    W[j] = INCACHE;

  double prev_loss = 1E9;

  bool converged = false;
  int stop_count = 0;
  int t = 0;
  while (t < ITER && !converged) {
    // pick random permutation
    for (int i = 0; i < num; i++)
      perm[i] = i;
    for (int swapi = 0; swapi < num; swapi++) {
      int swapj = (int)(drand48()*(num-swapi)) + swapi;
      int tmp = perm[swapi];
      perm[swapi] = perm[swapj];
      perm[swapj] = tmp;
    }

    // count number of examples in the small cache
    int cnum = 0;
    for (int i = 0; i < num; i++)
      if (W[i] <= INCACHE)
    cnum++;

    int numupdated = 0;
    for (int swapi = 0; swapi < num; swapi++) {
      // select example
      int i = perm[swapi];

      // skip if example is not in small cache
      if (W[i] > INCACHE) {
    W[i]--;
    continue;
      }

      collapsed x = X.x[i];

      // learning rate
      double T = min(ITER/2.0, t + 10000.0);
      double rateX = cnum * C / T;

      t++;
      if (t % 100000 == 0) {
        double info[3];
        double loss = compute_loss(info, C, J, X, w);
        double delta = 1.0 - (fabs(prev_loss - loss) / loss);
        logfile << t << "\t" << loss << "\t" << delta << endl;
        if (delta >= DELTA_STOP && t >= MIN_ITER) {
          stop_count++;
          if (stop_count > STOP_COUNT)
            converged = true;
        } else if (stop_count > 0) {
          stop_count = 0;
        }
        prev_loss = loss;
        printf("\r%7.2f%% of max # iterations "
               "(delta = %.5f; stop count = %d)", 
               100*double(t)/double(ITER), max(delta, 0.0), 
               STOP_COUNT - stop_count + 1);
    fflush(stdout);
        if (converged)
          break;
      }
      
      // compute max over latent placements
      int M = -1;
      double V = -INFINITY;
      for (int m = 0; m < x.num; m++) {
    double val = ex_score(x.seq[m], X, w);
    if (val > V) {
      M = m;
      V = val;
    }
      }
      
      char *ptr = x.seq[M];
      int label = LABEL(ptr);
      if (label * V < 1.0) {
        numupdated++;
    W[i] = 0;
    float *data = EX_DATA(ptr);
    int blocks = NUM_NONZERO(ptr);
    for (int j = 0; j < blocks; j++) {
      int b = BLOCK_IDX(data);
      double mult = (label > 0 ? J : -1) * rateX * X.learnmult[b];      
      data++;
      for (int k = 0; k < X.blocksizes[b]; k++)
        w[b][k] += mult * data[k];
      data += X.blocksizes[b];
    }
      } else {
    if (W[i] == INCACHE)
          W[i] = MINWAIT + (int)(drand48()*50);
    else
      W[i]++;
      }

      // periodically regularize the model
      if (t % REGFREQ == 0) {
        // apply lowerbounds
        for (int j = 0; j < X.numblocks; j++)
          for (int k = 0; k < X.blocksizes[j]; k++)
            w[j][k] = max(w[j][k], lb[j][k]);

        double rateR = 1.0 / T;

#if FULL_L2 
        // update model
        for (int j = 0; j < X.numblocks; j++) {
          double mult = rateR * X.regmult[j] * X.learnmult[j];
          mult = pow((1-mult), REGFREQ);
          for (int k = 0; k < X.blocksizes[j]; k++) {
            w[j][k] = mult * w[j][k];
          }
        }
#else
        // assume simple mixture model
        int maxc = 0;
        double bestval = 0;
        for (int c = 0; c < X.numcomponents; c++) {
          double val = 0;
          for (int i = 0; i < X.componentsizes[c]; i++) {
            int b = X.componentblocks[c][i];
            double blockval = 0;
            for (int k = 0; k < X.blocksizes[b]; k++)
              blockval += w[b][k] * w[b][k] * X.regmult[b];
            val += blockval;
          }
          if (val > bestval) {
            maxc = c;
            bestval = val;
          }
        }
        for (int i = 0; i < X.componentsizes[maxc]; i++) {
          int b = X.componentblocks[maxc][i];
          double mult = rateR * X.regmult[b] * X.learnmult[b];        
          mult = pow((1-mult), REGFREQ);
          for (int k = 0; k < X.blocksizes[b]; k++)
            w[b][k] = mult * w[b][k];
        }
#endif
      }
    }
  }

  if (converged)
    printf("\nTermination criteria reached after %d iterations.\n", t);
  else
    printf("\nMax iteration count reached.\n", t);

  free(perm);
  free(W);
  logfile.close();
}

// score examples
double *score(dataArray X, char **examples, int num, double **w) {
  double *s = (double *)malloc(sizeof(double)*num);
  check(s != NULL);
  for (int i = 0; i < num; i++)
    s[i] = ex_score(examples[i], X, w);
  return s;  
}

// merge examples with identical labels
void collapse(dataArray *X, char **examples, int num) {
  collapsed *x = (collapsed *)malloc(sizeof(collapsed)*num);
  check(x != NULL);
  int i = 0;
  x[0].seq = examples;
  x[0].num = 1;
  for (int j = 1; j < num; j++) {
    if (!memcmp(x[i].seq[0]+sizeof(int), examples[j]+sizeof(int), 
        labelsize*sizeof(int))) {
      x[i].num++;
    } else {
      i++;
      x[i].seq = &(examples[j]);
      x[i].num = 1;
    }
  }
  X->x = x;
  X->num = i+1;  
}

int main(int argc, char **argv) {  
  seed_rand();
  int count;
  dataArray X;

  // command line arguments
  check(argc == 12);
  double C = atof(argv[1]);
  double J = atof(argv[2]);
  char *hdrfile = argv[3];
  char *datfile = argv[4];
  char *modfile = argv[5];
  char *inffile = argv[6];
  char *lobfile = argv[7];
  char *cmpfile = argv[8];
  char *objfile = argv[9];
  char *logdir  = argv[10];
  char *logtag  = argv[11];

  // read header file
  FILE *f = fopen(hdrfile, "rb");
  check(f != NULL);
  int header[3];
  count = fread(header, sizeof(int), 3, f);
  check(count == 3);
  int num = header[0];
  labelsize = header[1];
  X.numblocks = header[2];
  X.blocksizes = (int *)malloc(X.numblocks*sizeof(int));
  count = fread(X.blocksizes, sizeof(int), X.numblocks, f);
  check(count == X.numblocks);
  X.regmult = (float *)malloc(sizeof(float)*X.numblocks);
  check(X.regmult != NULL);
  count = fread(X.regmult, sizeof(float), X.numblocks, f);
  check(count == X.numblocks);
  X.learnmult = (float *)malloc(sizeof(float)*X.numblocks);
  check(X.learnmult != NULL);
  count = fread(X.learnmult, sizeof(float), X.numblocks, f);
  check(count == X.numblocks);
  check(num != 0);
  fclose(f);
  printf("%d examples with label size %d and %d blocks\n",
     num, labelsize, X.numblocks);
  printf("block size, regularization multiplier, learning rate multiplier\n");
  dim = 0;
  for (int i = 0; i < X.numblocks; i++) {
    dim += X.blocksizes[i];
    printf("%d, %.2f, %.2f\n", X.blocksizes[i], X.regmult[i], X.learnmult[i]);
  }

  // read component info file
  // format: #components {#blocks blk1 ... blk#blocks}^#components
  f = fopen(cmpfile, "rb");
  count = fread(&X.numcomponents, sizeof(int), 1, f);
  check(count == 1);
  printf("the model has %d components\n", X.numcomponents);
  X.componentblocks = (int **)malloc(X.numcomponents*sizeof(int *));
  X.componentsizes = (int *)malloc(X.numcomponents*sizeof(int));
  for (int i = 0; i < X.numcomponents; i++) {
    count = fread(&X.componentsizes[i], sizeof(int), 1, f);
    check(count == 1);
    printf("component %d has %d blocks:", i, X.componentsizes[i]);
    X.componentblocks[i] = (int *)malloc(X.componentsizes[i]*sizeof(int));
    count = fread(X.componentblocks[i], sizeof(int), X.componentsizes[i], f);
    check(count == X.componentsizes[i]);
    for (int j = 0; j < X.componentsizes[i]; j++)
      printf(" %d", X.componentblocks[i][j]);
    printf("\n");
  }
  fclose(f);

  // read examples
  f = fopen(datfile, "rb");
  check(f != NULL);
  printf("Reading examples\n");
  char **examples = (char **)malloc(num*sizeof(char *));
  check(examples != NULL);
  int *buf = new int[labelsize + 2];
  for (int i = 0; i < num; i++) {
    // we use an extra byte in the end of each example to mark unique
    // we use an extra int at the start of each example to store the 
    // example's byte length (excluding unique flag and this int)
    //int buf[labelsize+2];

      memset(buf, 0, sizeof(int) *(labelsize + 2));
      
    count = fread(buf, sizeof(int), labelsize+2, f);
    check(count == labelsize+2);
    // byte length of an example's data segment
    int len = sizeof(int)*(labelsize+2) + sizeof(float)*buf[labelsize+1];
    // memory for data, an initial integer, and a final byte
    examples[i] = (char *)malloc(sizeof(int)+len+1);
    check(examples[i] != NULL);
    // set data segment's byte length
    ((int *)examples[i])[0] = len;
    // set the unique flag to zero
    examples[i][sizeof(int)+len] = 0;
    // copy label data into example
    for (int j = 0; j < labelsize+2; j++)
      ((int *)examples[i])[j+1] = buf[j];
    // read the rest of the data segment into the example
    count = fread(examples[i]+sizeof(int)*(labelsize+3), 1, 
          len-sizeof(int)*(labelsize+2), f);
    check(count == len-sizeof(int)*(labelsize+2));
    
  }
  delete[]buf;
  fclose(f);
  printf("done\n");

  // sort
  printf("Sorting examples\n");
  char **sorted = (char **)malloc(num*sizeof(char *));
  check(sorted != NULL);
  memcpy(sorted, examples, num*sizeof(char *));
  qsort(sorted, num, sizeof(char *), comp);
  printf("done\n");

  // find unique examples
  int i = 0;
  int len = *((int *)sorted[0]);
  sorted[0][sizeof(int)+len] = 1;
  for (int j = 1; j < num; j++) {
    int alen = *((int *)sorted[i]);
    int blen = *((int *)sorted[j]);
    if (alen != blen || 
    memcmp(sorted[i] + sizeof(int), sorted[j] + sizeof(int), alen)) {
      i++;
      sorted[i] = sorted[j];
      sorted[i][sizeof(int)+blen] = 1;
    }
  }
  int num_unique = i+1;
  printf("%d unique examples\n", num_unique);

  // collapse examples
  collapse(&X, sorted, num_unique);
  printf("%d collapsed examples\n", X.num);

  // initial model
  double **w = (double **)malloc(sizeof(double *)*X.numblocks);
  check(w != NULL);
  f = fopen(modfile, "rb");
  for (int i = 0; i < X.numblocks; i++) {
    w[i] = (double *)malloc(sizeof(double)*X.blocksizes[i]);
    check(w[i] != NULL);
    count = fread(w[i], sizeof(double), X.blocksizes[i], f);
    check(count == X.blocksizes[i]);
  }
  fclose(f);

  // lower bounds
  double **lb = (double **)malloc(sizeof(double *)*X.numblocks);
  check(lb != NULL);
  f = fopen(lobfile, "rb");
  for (int i = 0; i < X.numblocks; i++) {
    lb[i] = (double *)malloc(sizeof(double)*X.blocksizes[i]);
    check(lb[i] != NULL);
    count = fread(lb[i], sizeof(double), X.blocksizes[i], f);
    check(count == X.blocksizes[i]);
  }
  fclose(f);
  
  // train
  printf("Training\n");
  gd(C, J, X, w, lb, logdir, logtag);
  printf("done\n");

  // save model
  printf("Saving model\n");
  f = fopen(modfile, "wb");
  check(f != NULL);
  for (int i = 0; i < X.numblocks; i++) {
    count = fwrite(w[i], sizeof(double), X.blocksizes[i], f);
    check(count == X.blocksizes[i]);
  }
  fclose(f);

  // score examples
  printf("Scoring\n");
  double *s = score(X, examples, num, w);

  // Write info file
  printf("Writing info file\n");
  f = fopen(inffile, "w");
  check(f != NULL);
  for (int i = 0; i < num; i++) {
    int len = ((int *)examples[i])[0];
    // label, score, unique flag
    count = fprintf(f, "%d\t%f\t%d\n", ((int *)examples[i])[1], s[i], 
                    (int)examples[i][sizeof(int)+len]);
    check(count > 0);
  }
  fclose(f);

  // compute loss and write it to a file
  double lossinfo[3];
  compute_loss(lossinfo, C, J, X, w);
  printf("Writing objective function info file\n");
  f = fopen(objfile, "w");
  count = fprintf(f, "%f\t%f\t%f", lossinfo[0], lossinfo[1], lossinfo[2]);
  check(count > 0);
  fclose(f);
  
  printf("Freeing memory\n");
  for (int i = 0; i < X.numblocks; i++) {
    free(w[i]);
    free(lb[i]);
  }
  free(w);
  free(lb);
  free(s);
  for (int i = 0; i < num; i++)
    free(examples[i]);
  free(examples);
  free(sorted);
  free(X.x);
  free(X.blocksizes);
  free(X.regmult);
  free(X.learnmult);
  for (int i = 0; i < X.numcomponents; i++)
    free(X.componentblocks[i]);
  free(X.componentblocks);
  free(X.componentsizes);

  return 0;
}
 

3. VS2017工程设置

在预编译里增加:_CRT_SECURE_NO_WARNINGS,否则会报错

4.运行错误

“Win32Project3.exe”(Win32):  已加载“D:\software\VS2013\VS2013 文档\Win32Project3\Debug\Win32Project3.exe”。已加载符号。
“Win32Project3.exe”(Win32):  已加载“C:\Windows\SysWOW64\ntdll.dll”。无法查找或打开 PDB 文件。
“Win32Project3.exe”(Win32):  已加载“C:\Windows\SysWOW64\kernel32.dll”。已加载符号。
“Win32Project3.exe”(Win32):  已加载“C:\Windows\SysWOW64\KernelBase.dll”。无法查找或打开 PDB 文件。
“Win32Project3.exe”(Win32):  已加载“C:\Windows\SysWOW64\msvcr120d.dll”。已加载符号。
程序“[4308] Win32Project3.exe”已退出,返回值为 0 (0x0)。

解决方式
1、点 调试,然后 选项和设置 
 
2、右边勾上 启用源服务器支持 
 
3、左边点 符号,把 微软符号服务器 勾选上 
 
4、运行的时候等一下,加载完成后就好了。 
5、只是第一次加载,不用担心。或者,你也可以等加载完了之后,再把之前勾选的取消掉,也没有问题。 

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值