首先,调用StartRecognition函数。它的主要作用是设置一个识别的初始值,构建一个network和一个全局的PRecInfo对象,它保存识别过程中的一系列中间结果,比如经过的路径,最优的token,剪枝参数等等。
/* Each HMMSet that is used for recognition needs to be */
/* initialised prior to use. Initialisation routine adds */
/* necessary structues to models and returns PSetInfo used */
/* by Viterbi recogniser to access model set. */
typedef struct psetinfo PSetInfo; /* Private HMMSet information (HRec.c) */
/* Each recognition requires a PRecInfo to maintain status */
/* information thoughtout the utterance. The majority of */
/* the structure is private although some status information */
/* and all pruning parameters are available between frames. */
typedef struct vrecinfo VRecInfo; /* Visible recognition information */
typedef struct precinfo PRecInfo; /* Private reconition information (HRec.c) */
这里介绍了三个结构,是识别过程必不可少的。
识别过程需要HMM模型集合的信息,初始化函数返回PSetInfo对象,Viterbi识别器可以通过它访问模型。
每个识别过程都需要一个PRecInfo对象来维护识别信息,这其中大部分信息是私有的,只有少部分比如剪枝参数是在帧之间共享。看一下PRecInfo包含哪些信息。
/* Private recognition information PRecInfo. (Not visible outside HRec) */
/* Contains all status/network/allocation/pruning information for a */
/* single network. */
struct precinfo {
/* Input parameters - Set once and unseen */
Observation *obs; /* Current Observation */
PSetInfo *psi; /* HMMSet information */
Network *net; /* Recognition network */
int nToks; /* Maximum tokens to propagate (0==1) */
Boolean models; /* Keep track of model history */
Boolean states; /* Keep track of state history */
float scale; /* LM (Net probs) scale factor */
LogFloat wordpen; /* Word insertion penalty */
float pscale; /* Pronunciation probs scale factor */
/* Private global info */
int frame; /* Current frame number */
int id; /* Unique observation identifier */
int prid; /* Unique pri identifier */
NetNode *genMaxNode; /* Most likely node in network */
NetNode *wordMaxNode; /* Most likely word end node in network */
Token genMaxTok; /* Most likely token */
Token wordMaxTok; /* Most likely word end token */
LogFloat genThresh; /* Cutoff from global beam */
LogFloat wordThresh; /* Cutoff for word end propagation */
LogFloat nThresh; /* Cutoff for non-best tokens */
LogFloat *qsa; /* Array form performing qsort */
int qsn; /* Sizeof qsa */
MemHeap instHeap; /* Inst heap */
MemHeap *stHeap; /* Array[0..stHeapNum-1] of heaps for states */
MemHeap rTokHeap; /* RelToken heap */
MemHeap pathHeap; /* Path heap */
MemHeap rPthHeap; /* NxtPath heap */
MemHeap alignHeap; /* Align heap */
int npth; /* Current number of path records */
int cpth; /* Number of path records after last collection */
Path pYesRef; /* Head of PathYesRef linked list */
Path pNoRef; /* Head of PathNoRef linked list */
Path pYesTail; /* Tail of PathYesRef linked list */
Path pNoTail; /* Tail of PathNoRef linked list */
int nalign; /* Current number of align records */
int calign; /* Number of align records after last collection */
Align aYesRef; /* Head of AlignYesRef linked list */
Align aNoRef; /* Head of AlignNoRef linked list */
Align aYesTail; /* Tail of AlignYesRef linked list */
Align aNoTail; /* Tail of AlignNoRef linked list */
int nact; /* Number of active instances */
int tact; /* Cummulative number of active instances */
NetInst head; /* Head (oldest) of Inst linked list */
NetInst tail; /* Tail (newest) of Inst linked list */
NetInst *nxtInst; /* Inst used to select next in step sequence */
#ifdef SANITY
NetInst *start_inst; /* Inst that started a move */
int ipos; /* Current inst position */
int pnlen; /* Number of PathNoRef list */
int pylen; /* Number of PathYesRef list */
int anlen; /* Number of AlignNoRef list */
int aylen; /* Number of AlignYesRef list */
#endif
};