ml.hpp___openCV的自带函数源文件

/*M///
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.
//
//
//                           License Agreement
//                For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2014, Itseez Inc, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//   * Redistribution's of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//
//   * Redistribution's in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.
//
//   * The name of the copyright holders may not be used to endorse or promote products
//     derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/

#ifndef OPENCV_ML_HPP
#define OPENCV_ML_HPP

#ifdef __cplusplus
#  include "opencv2/core.hpp"
#endif

#ifdef __cplusplus

#include <float.h>
#include <map>
#include <iostream>

/**
  @defgroup ml Machine Learning

  The Machine Learning Library (MLL) is a set of classes and functions for statistical
  classification, regression, and clustering of data.

  Most of the classification and regression algorithms are implemented as C++ classes. As the
  algorithms have different sets of features (like an ability to handle missing measurements or
  categorical input variables), there is a little common ground between the classes. This common
  ground is defined by the class cv::ml::StatModel that all the other ML classes are derived from.

  See detailed overview here: @ref ml_intro.
 */

namespace cv
{
   

namespace ml
{
   

//! @addtogroup ml
//! @{
   

/** @brief Variable types */
enum VariableTypes
{
   
    VAR_NUMERICAL    =0, //!< same as VAR_ORDERED
    VAR_ORDERED      =0, //!< ordered variables
    VAR_CATEGORICAL  =1  //!< categorical variables
};

/** @brief %Error types */
enum ErrorTypes
{
   
    TEST_ERROR = 0,
    TRAIN_ERROR = 1
};

/** @brief Sample types */
enum SampleTypes
{
   
    ROW_SAMPLE = 0, //!< each training sample is a row of samples
    COL_SAMPLE = 1  //!< each training sample occupies a column of samples
};

/** @brief The structure represents the logarithmic grid range of statmodel parameters.

It is used for optimizing statmodel accuracy by varying model parameters, the accuracy estimate
being computed by cross-validation.
 */
class CV_EXPORTS_W ParamGrid
{
   
public:
    /** @brief Default constructor */
    ParamGrid();
    /** @brief Constructor with parameters */
    ParamGrid(double _minVal, double _maxVal, double _logStep);

    CV_PROP_RW double minVal; //!< Minimum value of the statmodel parameter. Default value is 0.
    CV_PROP_RW double maxVal; //!< Maximum value of the statmodel parameter. Default value is 0.
    /** @brief Logarithmic step for iterating the statmodel parameter.

    The grid determines the following iteration sequence of the statmodel parameter values:
    \f[(minVal, minVal*step, minVal*{
   step}^2, \dots,  minVal*{
   logStep}^n),\f]
    where \f$n\f$ is the maximal index satisfying
    \f[\texttt{
   minVal} * \texttt{
   logStep} ^n <  \texttt{
   maxVal}\f]
    The grid is logarithmic, so logStep must always be greater than 1. Default value is 1.
    */
    CV_PROP_RW double logStep;

    /** @brief Creates a ParamGrid Ptr that can be given to the %SVM::trainAuto method

    @param minVal minimum value of the parameter grid
    @param maxVal maximum value of the parameter grid
    @param logstep Logarithmic step for iterating the statmodel parameter
    */
    CV_WRAP static Ptr<ParamGrid> create(double minVal=0., double maxVal=0., double logstep=1.);
};

/** @brief Class encapsulating training data.

Please note that the class only specifies the interface of training data, but not implementation.
All the statistical model classes in _ml_ module accepts Ptr\<TrainData\> as parameter. In other
words, you can create your own class derived from TrainData and pass smart pointer to the instance
of this class into StatModel::train.

@sa @ref ml_intro_data
 */
class CV_EXPORTS_W TrainData
{
   
public:
    static inline float missingValue() {
    return FLT_MAX; }
    virtual ~TrainData();

    CV_WRAP virtual int getLayout() const = 0;
    CV_WRAP virtual int getNTrainSamples() const = 0;
    CV_WRAP virtual int getNTestSamples() const = 0;
    CV_WRAP virtual int getNSamples() const = 0;
    CV_WRAP virtual int getNVars() const = 0;
    CV_WRAP virtual int getNAllVars() const = 0;

    CV_WRAP virtual void getSample(InputArray varIdx, int sidx, float* buf) const = 0;
    CV_WRAP virtual Mat getSamples() const = 0;
    CV_WRAP virtual Mat getMissing() const = 0;

    /** @brief Returns matrix of train samples

    @param layout The requested layout. If it's different from the initial one, the matrix is
        transposed. See ml::SampleTypes.
    @param compressSamples if true, the function returns only the training samples (specified by
        sampleIdx)
    @param compressVars if true, the function returns the shorter training samples, containing only
        the active variables.

    In current implementation the function tries to avoid physical data copying and returns the
    matrix stored inside TrainData (unless the transposition or compression is needed).
     */
    CV_WRAP virtual Mat getTrainSamples(int layout=ROW_SAMPLE,
                                bool compressSamples=true,
                                bool compressVars=true) const = 0;

    /** @brief Returns the vector of responses

    The function returns ordered or the original categorical responses. Usually it's used in
    regression algorithms.
     */
    CV_WRAP virtual Mat getTrainResponses() const = 0;

    /** @brief Returns the vector of normalized categorical responses

    The function returns vector of responses. Each response is integer from `0` to `<number of
    classes>-1`. The actual label value can be retrieved then from the class label vector, see
    TrainData::getClassLabels.
     */
    CV_WRAP virtual Mat getTrainNormCatResponses() const = 0;
    CV_WRAP virtual Mat getTestResponses() const = 0;
    CV_WRAP virtual Mat getTestNormCatResponses() const = 0;
    CV_WRAP virtual Mat getResponses() const = 0;
    CV_WRAP virtual Mat getNormCatResponses() const = 0;
    CV_WRAP virtual Mat getSampleWeights() const = 0;
    CV_WRAP virtual Mat getTrainSampleWeights() const = 0;
    CV_WRAP virtual Mat getTestSampleWeights() const = 0;
    CV_WRAP virtual Mat getVarIdx() const = 0;
    CV_WRAP virtual Mat getVarType() const = 0;
    CV_WRAP virtual Mat getVarSymbolFlags() const = 0;
    CV_WRAP virtual int getResponseType() const = 0;
    CV_WRAP virtual Mat getTrainSampleIdx() const = 0;
    CV_WRAP virtual Mat getTestSampleIdx() const = 0;
    CV_WRAP virtual void getValues(int vi, InputArray sidx, float* values) const = 0;
    virtual void getNormCatValues(int vi, InputArray sidx, int* values) const = 0;
    CV_WRAP virtual Mat getDefaultSubstValues() const = 0;

    CV_WRAP virtual int getCatCount(int vi) const = 0;

    /** @brief Returns the vector of class labels

    The function returns vector of unique labels occurred in the responses.
     */
    CV_WRAP virtual Mat getClassLabels() const = 0;

    CV_WRAP virtual Mat getCatOfs() const = 0;
    CV_WRAP virtual Mat getCatMap() const = 0;

    /** @brief Splits the training data into the training and test parts
    @sa TrainData::setTrainTestSplitRatio
     */
    CV_WRAP virtual void setTrainTestSplit(int count, bool shuffle=true) = 0;

    /** @brief Splits the training data into the training and test parts

    The function selects a subset of specified relative size and then returns it as the training
    set. If the function is not called, all the data is used for training. Please, note that for
    each of TrainData::getTrain\* there is corresponding TrainData::getTest\*, so that the test
    subset can be retrieved and processed as well.
    @sa TrainData::setTrainTestSplit
     */
    CV_WRAP virtual void setTrainTestSplitRatio(double ratio, bool shuffle=true) = 0;
    CV_WRAP virtual void shuffleTrainTest() = 0;

    /** @brief Returns matrix of test samples */
    CV_WRAP virtual Mat getTestSamples() const = 0;

    /** @brief Returns vector of symbolic names captured in loadFromCSV() */
    CV_WRAP virtual void getNames(std::vector<String>& names) const = 0;

    /** @brief Extract from 1D vector elements specified by passed indexes.
    @param vec input vector (supported types: CV_32S, CV_32F, CV_64F)
    @param idx 1D index vector
     */
    static CV_WRAP Mat getSubVector(const Mat& vec, const Mat& idx);

    /** @brief Extract from matrix rows/cols specified by passed indexes.
    @param matrix input matrix (supported types: CV_32S, CV_32F, CV_64F)
    @param idx 1D index vector
    @param layout specifies to extract rows (cv::ml::ROW_SAMPLES) or to extract columns (cv::ml::COL_SAMPLES)
     */
    static CV_WRAP Mat getSubMatrix(const Mat& matrix, const Mat& idx, int layout);

    /** @brief Reads the dataset from a .csv file and returns the ready-to-use training data.

    @param filename The input file name
    @param headerLineCount The number of lines in the beginning to skip; besides the header, the
        function also skips empty lines and lines staring with `#`
    @param responseStartIdx Index of the first output variable. If -1, the function considers the
        last variable as the response
    @param responseEndIdx Index of the last output variable + 1. If -1, then there is single
        response variable at responseStartIdx.
    @param varTypeSpec The optional text string that specifies the variables' types. It has the
        format `ord[n1-n2,n3,n4-n5,...]cat[n6,n7-n8,...]`. That is, variables from `n1 to n2`
        (inclusive range), `n3`, `n4 to n5` ... are considered ordered and `n6`, `n7 to n8` ... are
        considered as categorical. The range `[n1..n2] + [n3] + [n4..n5] + ... + [n6] + [n7..n8]`
        should cover all the variables. If varTypeSpec is not specified, then algorithm uses the
        following rules:
        - all input variables are considered ordered by default. If some column contains has non-
          numerical values, e.g. 'apple', 'pear', 'apple', 'apple', 'mango', the corresponding
          variable is considered categorical.
        - if there are several output variables, they are all considered as ordered. Error is
          reported when non-numerical values are used.
        - if there is a single output variable, then if its values are non-numerical or are all
          integers, then it's considered categorical. Otherwise, it's considered ordered.
    @param delimiter The character used to separate values in each line.
    @param missch The character used to specify missing measurements. It should not be a digit.
        Although it's a non-numerical value, it surely does not affect the decision of whether the
        variable ordered or categorical.
    @note If the dataset only contains input variables and no responses, use responseStartIdx = -2
        and responseEndIdx = 0. The output variables vector will just contain zeros.
     */
    static Ptr<TrainData> loadFromCSV(const String& filename,
                                      int headerLineCount,
                                      int responseStartIdx=-1,
                                      int responseEndIdx=-1,
                                      const String& varTypeSpec=String(),
                                      char delimiter=',',
                                      char missch='?');

    /** @brief Creates training data from in-memory arrays.

    @param samples matrix of samples. It should have CV_32F type.
    @param layout see ml::SampleTypes.
    @param responses matrix of responses. If the responses are scalar, they should be stored as a
        single row or as a single column. The matrix should have type CV_32F or CV_32S (in the
        former case the responses are considered as ordered by default; in the latter case - as
        categorical)
    @param varIdx vector specifying which variables to use for training. It can be an integer vector
        (CV_32S) containing 0-based variable indices or byte vector (CV_8U) containing a mask of
        active variables.
    @param sampleIdx vector specifying which samples to use for training. It can be an integer
        vector (CV_32S) containing 0-based sample indices or byte vector (CV_8U) containing a mask
        of training samples.
    @param sampleWeights optional vector with weights for each sample. It should have CV_32F type.
    @param varType optional vector of type CV_8U and size `<number_of_variables_in_samples> +
        <number_of_variables_in_responses>`, containing types of each input and output variable. See
        ml::VariableTypes.
     */
    CV_WRAP static Ptr<TrainData> create(InputArray samples, int layout, InputArray responses,
                                 InputArray varIdx=noArray(), InputArray sampleIdx=noArray(),
                                 InputArray sampleWeights=noArray(), InputArray varType=noArray());
};

/** @brief Base class for statistical models in OpenCV ML.
 */
class CV_EXPORTS_W StatModel : public Algorithm
{
   
public:
    /** Predict options */
    enum Flags {
   
        UPDATE_MODEL = 1,
        RAW_OUTPUT=1, //!< makes the method return the raw results (the sum), not the class label
        COMPRESSED_INPUT=2,
        PREPROCESSED_INPUT=4
    };

    /** @brief Returns the number of variables in training samples */
    CV_WRAP virtual int getVarCount() const = 0;

    CV_WRAP virtual bool empty() const CV_OVERRIDE;

    /** @brief Returns true if the model is trained */
    CV_WRAP virtual bool isTrained() const = 0;
    /** @brief Returns true if the model is classifier */
    CV_WRAP virtual bool isClassifier() const = 0;

    /** @brief Trains the statistical model

    @param trainData training data that can be loaded from file using TrainData::loadFromCSV or
        created with TrainData::create.
    @param flags optional flags, depending on the model. Some of the models can be updated with the
        new training samples, not completely overwritten (such as NormalBayesClassifier or ANN_MLP).
     */
    CV_WRAP virtual bool train( const Ptr<TrainData>& trainData, int flags=0 );

    /** @brief Trains the statistical model

    @param samples training samples
    @param layout See ml::SampleTypes.
    @param responses vector of responses associated with the training samples.
    */
    CV_WRAP virtual bool train( InputArray samples, int layout, InputArray responses );

    /** @brief Computes error on the training or test dataset

    @param data the training data
    @param test if true, the error is computed over the test subset of the data, otherwise it's
        computed over the training subset of the data. Please note that if you loaded a completely
        different dataset to evaluate already trained classifier, you will probably want not to set
        the test subset at all with TrainData::setTrainTestSplitRatio and specify test=false, so
        that the error is computed for the whole new set. Yes, this sounds a bit confusing.
    @param resp the optional output responses.

    The method uses StatModel::predict to compute the error. For regression models the error is
    computed as RMS, for classifiers - as a percent of missclassified samples (0%-100%).
     */
    CV_WRAP virtual float calcError( const Ptr<TrainData>& data, bool test, OutputArray resp ) const;

    /** @brief Predicts response(s) for the provided sample(s)

    @param samples The input samples, floating-point matrix
    @param results The optional output matrix of results.
    @param flags The optional flags, model-dependent. See cv::ml::StatModel::Flags.
     */
    CV_WRAP virtual float predict( InputArray samples, OutputArray results=noArray(), int flags=0 ) const = 0;

    /** @brief Create and train model with default parameters

    The class must implement static `create()` method with no parameters or with all default parameter values
    */
    template<typename _Tp> static Ptr<_Tp> train(const Ptr<TrainData>& data, int flags=0)
    {
        Ptr<_Tp> model = _Tp::create();
        return !model.empty() && model->train(data, flags) ? model : Ptr<_Tp>();
    }
};

/****************************************************************************************\
*                                 Normal Bayes Classifier                                *
\****************************************************************************************/

/** @brief Bayes classifier for normally distributed data.

@sa @ref ml_intro_bayes
 */
class CV_EXPORTS_W NormalBayesClassifier : public StatModel
{
public:
    /** @brief Predicts the response for sample(s).

    The method estimates the most probable classes for input vectors. Input vectors (one or more)
    are stored as rows of the matrix inputs. In case of multiple input vectors, there should be one
    output vector outputs. The predicted class for a single input vector is returned by the method.
    The vector outputProbs contains the output probabilities corresponding to each element of
    result.
     */
    CV_WRAP virtual float predictProb( InputArray inputs, OutputArray outputs,
                               OutputArray outputProbs, int flags=0 ) const = 0;

    /** Creates empty model
    Use StatModel::train to train the model after creation. */
   
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

海宝7号

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值