这次我们分析SVM.h和USEsvm.h
在开头我想放一篇参考文献点击打开链接
<strong>#ifndef _LIBSVM_H //条件编译指令
#define _LIBSVM_H
#define LIBSVM_VERSION 289 //定义libsvm版本2.89
#ifdef __cplusplus
extern "C" {
#endif
extern int libsvm_version;
struct svm_node
{
int index; //</strong>索引值<strong>
double value; //</strong>定义一个值<strong>
};
struct svm_problem //</strong><span style="color: rgb(51, 51, 51);font-size:12px; line-height: 26px; font-family: 宋体;">其中“</span><span style="color: rgb(51, 51, 51); font-family: Arial;font-size:12px; line-height: 26px;"><span style="font-family: 'Times New Roman';">l</span></span><span style="color: rgb(51, 51, 51);font-size:12px; line-height: 26px; font-family: 宋体;">”表示训练数据的实例数,而“</span><span style="color: rgb(51, 51, 51); font-family: Arial;font-size:12px; line-height: 26px;"><span style="font-family: 'Times New Roman';">y</span></span><span style="color: rgb(51, 51, 51);font-size:12px; line-height: 26px; font-family: 宋体;">”是一个数组,用于存放它们的目标值。(类型值用整型数据,回归值用实数)“ </span><span style="color: rgb(51, 51, 51); font-family: Arial;font-size:12px; line-height: 26px;"><span style="font-family: 'Times New Roman';">x</span></span><span style="color: rgb(51, 51, 51);font-size:12px; line-height: 26px; font-family: 宋体;">”是一个数组指针,每一个指针指向一个稀疏的训练向量(即一个</span><span style="color: rgb(51, 51, 51); font-family: Arial;font-size:12px; line-height: 26px;"><span style="font-family: 'Times New Roman';">svm_node</span></span><span style="color: rgb(51, 51, 51);font-size:12px; line-height: 26px; font-family: 宋体;">数组)。</span><strong>
{
int l;
double *y;
struct svm_node **x;
};
// Added by Jianxin Wu for AUC <a target=_blank href="http://http://www.cnblogs.com/lixiaolun/p/4053499.html">点击打开链接</a>
struct RECORD
{
int y;
double v;
};
int compare(const void* a,const void* b); //比较
double ComputeAUC(struct RECORD* record,const int tcount); //计算AUC,需要了解的话,可以查看上面的超链接
// End of AUC
enum { C_SVC, NU_SVC, ONE_CLASS, EPSILON_SVR, NU_SVR }; /* svm_type */ //枚举类
/* The enum value "HIK" is added by Jianxin Wu, for histogram intersection kernel */
enum { LINEAR, POLY, RBF, SIGMOID, PRECOMPUTED, HIK }; /* kernel_type */
struct svm_parameter
{
int svm_type;
int kernel_type;
int degree; /* for poly */
double gamma; /* for poly/rbf/sigmoid */
double coef0; /* for poly/sigmoid */
/* these are for training only */
double cache_size; /* in MB */
double eps; /* stopping criteria */
double C; /* for C_SVC, EPSILON_SVR and NU_SVR */
int nr_weight; /* for C_SVC */
int *weight_label; /* for C_SVC */
double* weight; /* for C_SVC */
double nu; /* for NU_SVC, ONE_CLASS, and NU_SVR */
double p; /* for EPSILON_SVR */
int shrinking; /* use the shrinking heuristics */
int probability; /* do probability estimates */
};
// This is moved from svm.cpp by Jianxin Wu
//
// svm_model
//
struct svm_model
{
svm_parameter param; // parameter
int nr_class; // number of classes, = 2 in regression/one class svm
int l; // total #SV
svm_node **SV; // SVs (SV[l])
double **sv_coef; // coefficients for SVs in decision functions (sv_coef[k-1][l])
double *rho; // constants in decision functions (rho[k*(k-1)/2])
double *probA; // pariwise probability information
double *probB;
// for classification only
int *label; // label of each class (label[k])
int *nSV; // number of SVs for each class (nSV[k])
// nSV[0] + nSV[1] + ... + nSV[k-1] = l
// XXX
int free_sv; // 1 if svm_model is created by svm_load_model
// 0 if svm_model is created by svm_train
};
struct svm_model *svm_train(const struct svm_problem *prob, const struct svm_parameter *param);
void svm_cross_validation(const struct svm_problem *prob, const struct svm_parameter *param, int nr_fold, double *target);
int svm_save_model(const char *model_file_name, const struct svm_model *model);
struct svm_model *svm_load_model(const char *model_file_name);
// This line added by Jianxin Wu for loading BSVM model
struct svm_model *bsvm_load_model(const char *model_file_name);
// Added by Jianxin for computing the positive/negative response separately
double kernel_eval_func(const svm_node *x,const svm_node *y,const svm_parameter& param);
int svm_get_svm_type(const struct svm_model *model);
int svm_get_nr_class(const struct svm_model *model);
void svm_get_labels(const struct svm_model *model, int *label);
double svm_get_svr_probability(const struct svm_model *model);
void svm_predict_values(const struct svm_model *model, const struct svm_node *x, double* dec_values);
double svm_predict(const struct svm_model *model, const struct svm_node *x);
double svm_predict_probability(const struct svm_model *model, const struct svm_node *x, double* prob_estimates);
void svm_destroy_model(struct svm_model *model);
void svm_destroy_param(struct svm_parameter *param);
const char *svm_check_parameter(const struct svm_problem *prob, const struct svm_parameter *param);
int svm_check_probability_model(const struct svm_model *model);
extern void (*svm_print_string) (const char *);
#ifdef __cplusplus
}
#endif
#endif /* _LIBSVM_H */
</strong>
上面是svm,在开头的超链接中的文章已经讲得很清楚了
#ifndef __USE_SVM__
#define __USE_SVM__
#include <set>
#include <string>
#include "svm.h"
// LIBSVM的接口函数
// 给SVM设置默认参数值
// 在调用该函数后,你需要改变某些参数的值
void UseSVM_Init(svm_parameter& param,svm_problem& prob,svm_node* &x_space);
// 清空SVM相关的数据,结构体。为下一次使用
void UseSVM_CleanUp(svm_model* &model,svm_parameter& param,svm_problem& prob,svm_node* &x_space);
// 转换一个二维矩阵
// 函数定义在后面的.H文件中
template<class T>
int UseSVM_BuildProblem(
Array2d<T> data, // 数据
const int* labels, // 数据点标签
std::set<int>& choose, // 指定分级
svm_problem& prob, // 输出问题,在调用之前,需要清空数据
svm_node* &x_space, // 在函数调用前需要设置为NULL
const bool oneclass, // 是否我们想要训练一级?
const int maxElement = -1 // constrain SVM problem size to 'maxElement' if it is >0
);
// Data structure for fast computing of several histogram kernel discrimination functions (\phi(x) \dotproduct w) - \rho
// return value is -1.0*\rho
// for use in visual codebook only
double UseSVM_Histogram_FastEvaluationStructure(
const svm_model& model, // one SVM trained model
const int m, // length of feature vector
const int upper_bound, // maximum feature value can only be upper_bound - 1(最大为最大值减一)
Array2d<double>& eval, // fast data structure
const int index // which discrimination function is this? (or, which visual code word?)
);
// Convert a HIK libsvm model into 'eval' so that testing is fast(加速测试)
double UseSVM_Histogram_FastEvaluationStructure(const svm_model& model,const int m,const int upper_bound,Array2dC<double>& eval,const bool normalize);
// Load libsvm model from file and convert to 'eval'(加载到矩阵Array2dC中)
// If pmodel==NULL, then the loaded libsvm model is destroyed immediately, otherwise it is returned by pmodel -- you need to destroy it
double UseSVM_Histogram_FastEvaluationStructure(const char* modelfile,const int m,const int upper_bound,Array2dC<double>& eval,svm_model** pmodel=NULL,const bool normalize=true);
// Convert a HIK bsvm model into 'eval' so that testing is fast(和上面的一样)
double UseSVM_Histogram_FastEvaluationStructure(const char* modelfile,const int m,const int upper_bound,Array2dC<double>& eval,svm_model** pmodel,const bool normalize);
// Load bsvm model from file and convert to 'eval'
// If pmodel==NULL, then the loaded libsvm model is destroyed immediately, otherwise it is returned by pmodel -- you need to destroy it
// For the Crammer-Sinnger (-s 2) and HIK (-t 5) only(这个是什么鬼)
double UseSVM_CS_Histogram_FastEvaluationStructure(const char* modelfile,const int m,const int upper_bound,Array2dC<double>& eval,svm_model** pmodel);
// Data structure for fast computing of several linear kernel discrimination functions (\phi(x) \dotproduct w) - \rho
// return value is -1.0*\rho
double UseSVM_Linear_FastEvaluationStructure(
const svm_model& model, // one SVM trained model
const int m, // length of feature vector
Array2d<double>& result, // fast data structure
const int index // which discrimination function is this?
);
double UseSVM_Linear_FastEvaluationStructure(const svm_model& model,const int m,Array2dC<double>& eval);
double UseSVM_Linear_FastEvaluationStructure(const char* modelfile,const int m,Array2dC<double>& result);
// save a data set in the sparse SVM format to 'filename' with class labels in 'labels' and data in 'features' (1 row <==> a data point)
// NOTE that feature index will start from 1, not 0
void UseSVM_SaveSparse(const std::string& filename,const int* labels,Array2d<double>& features);
// 'split' should have same length as labels, and a point i is saved only if split[i]==value
void UseSVM_SaveSparse(const std::string& filename,const int* labels,Array2d<double>& features,const int* split,const int value);
(目前还是看不懂,先放着赶进度,后面再来看)
template<class T>
int UseSVM_BuildProblem(Array2d<T> data,const int* labels,std::set<int>& choose,svm_problem& prob,svm_node* &x_space,const bool oneclass,const int maxElement)
{
assert(prob.l==0 && prob.y==NULL && prob.x==NULL && x_space==NULL);
int size = 0;
for(int i=0;i<data.nrow;i++)
if(choose.find(labels[i])!=choose.end()) size++;
if(size==0) return 0;
if(maxElement>0 && size>maxElement) size = maxElement;
prob.l = size;
prob.y = new double[prob.l]; assert(prob.y!=NULL);
prob.x = new svm_node*[prob.l]; assert(prob.x!=NULL);
int totalfeatures = 0;
int added_points = 0;
for(int i=0;i<data.nrow;i++)
{
if(choose.find(labels[i])==choose.end()) continue;
for(int j=0;j<data.ncol;j++) if(data.p[i][j]!=0) totalfeatures++;
added_points++;
if(maxElement>0 && added_points>=maxElement) break;
}
x_space = new svm_node[totalfeatures+prob.l]; assert(x_space!=NULL);
added_points=0;
int added_features=0;
for(int i=0;i<data.nrow;i++)
{
if(choose.find(labels[i])==choose.end()) continue;
if(oneclass)
prob.y[added_points] = 1;
else
prob.y[added_points] = labels[i];
prob.x[added_points] = &x_space[added_features];
for(int j=0;j<data.ncol;j++)
{
if(data.p[i][j]==0) continue;
x_space[added_features].index = j + 1;
x_space[added_features].value = data.p[i][j];
added_features++;
}
x_space[added_features].index = -1;
added_features++;
added_points++;
if(maxElement>0 && added_points>=maxElement) break;
}
assert(added_points==size);
assert(added_features==totalfeatures+prob.l);
return size;
}
#endif // __USE_SVM__