HTK解码过程

hjx5200

于 2021-06-01 07:20:05 发布

阅读量175

点赞数

分类专栏：语音识别文章标签： HTK 语音识别解码

本文链接：https://blog.csdn.net/hjx5200/article/details/117433891

版权

语音识别专栏收录该内容

45 篇文章 4 订阅

订阅专栏

首先，调用StartRecognition函数。它的主要作用是设置一个识别的初始值，构建一个network和一个全局的PRecInfo对象，它保存识别过程中的一系列中间结果，比如经过的路径，最优的token，剪枝参数等等。

/* Each HMMSet that is used for recognition needs to be    */
/* initialised prior to use.  Initialisation routine adds  */
/* necessary structues to models and returns PSetInfo used */
/* by Viterbi recogniser to access model set.              */
typedef struct psetinfo PSetInfo; /* Private HMMSet information (HRec.c) */


/* Each recognition requires a PRecInfo to maintain status   */
/* information thoughtout the utterance.  The majority of    */
/* the structure is private although some status information */
/* and all pruning parameters are available between frames.  */
typedef struct vrecinfo VRecInfo; /* Visible recognition information */

typedef struct precinfo PRecInfo; /* Private reconition information (HRec.c) */

这里介绍了三个结构，是识别过程必不可少的。

识别过程需要HMM模型集合的信息，初始化函数返回PSetInfo对象，Viterbi识别器可以通过它访问模型。

每个识别过程都需要一个PRecInfo对象来维护识别信息，这其中大部分信息是私有的，只有少部分比如剪枝参数是在帧之间共享。看一下PRecInfo包含哪些信息。

/* Private recognition information PRecInfo. (Not visible outside HRec) */
/* Contains all status/network/allocation/pruning information for a     */
/*  single network.                                                     */
struct precinfo {
   /* Input parameters - Set once and unseen */

   Observation *obs;         /* Current Observation */

   PSetInfo *psi;           /* HMMSet information */
   Network *net;            /* Recognition network */
   int nToks;               /* Maximum tokens to propagate (0==1) */
   Boolean models;          /* Keep track of model history */
   Boolean states;          /* Keep track of state history */

   float scale;             /* LM (Net probs) scale factor */
   LogFloat wordpen;        /* Word insertion penalty */
   float pscale;            /* Pronunciation probs scale factor */
   /* Private global info */


   int frame;               /* Current frame number */
   int id;                  /* Unique observation identifier */
   int prid;                /* Unique pri identifier */

   NetNode *genMaxNode;     /* Most likely node in network */
   NetNode *wordMaxNode;    /* Most likely word end node in network */

   Token genMaxTok;         /* Most likely token */
   Token wordMaxTok;        /* Most likely word end token */

   LogFloat genThresh;      /* Cutoff from global beam */
   LogFloat wordThresh;     /* Cutoff for word end propagation */
   LogFloat nThresh;        /* Cutoff for non-best tokens */

   LogFloat *qsa;           /* Array form performing qsort */
   int qsn;                 /* Sizeof qsa */

   MemHeap instHeap;        /* Inst heap */
   MemHeap *stHeap;         /* Array[0..stHeapNum-1] of heaps for states */
   MemHeap rTokHeap;        /* RelToken heap */
   MemHeap pathHeap;        /* Path heap */
   MemHeap rPthHeap;        /* NxtPath heap */
   MemHeap alignHeap;       /* Align heap */

   int npth;                /* Current number of path records */
   int cpth;                /* Number of path records after last collection */
   Path pYesRef;            /* Head of PathYesRef linked list */
   Path pNoRef;             /* Head of PathNoRef linked list */
   Path pYesTail;           /* Tail of PathYesRef linked list */
   Path pNoTail;            /* Tail of PathNoRef linked list */

   int nalign;              /* Current number of align records */
   int calign;              /* Number of align records after last collection */
   Align aYesRef;           /* Head of AlignYesRef linked list */
   Align aNoRef;            /* Head of AlignNoRef linked list */
   Align aYesTail;          /* Tail of AlignYesRef linked list */
   Align aNoTail;           /* Tail of AlignNoRef linked list */

   int nact;                /* Number of active instances */
   int tact;                /* Cummulative number of active instances */
   NetInst head;            /* Head (oldest) of Inst linked list */
   NetInst tail;            /* Tail (newest) of Inst linked list */
   NetInst *nxtInst;        /* Inst used to select next in step sequence */
#ifdef SANITY
   NetInst *start_inst;     /* Inst that started a move */
   int ipos;                /* Current inst position */

   int pnlen;               /* Number of PathNoRef list */
   int pylen;               /* Number of PathYesRef list */

   int anlen;               /* Number of AlignNoRef list */
   int aylen;               /* Number of AlignYesRef list */
#endif

};