上一篇博客中用HCompV工具通过全局数据来初始化HMM模型参数,包括proto和vFloor两个文件。同时手工制作hmmdefs模型,在这个文件中列举了每一个单因素模型的参数,它们是完全相同的。
现在一步一步看看HCompV是如何达成目标的。
如前面博客指出的,可以通过visual studio跟踪调试HCompV。
首先列出大体的流程涉及的函数:
分别是:
1)InitShell // 对命令行参数进行处理,这个函数在多个HTKTools之间都用到,它主要把参数保存到savedCommandLine这里,后续对它进行读写处理;比如系统的参数设置,一般是“-C|S|V|D”,并把余下的参数保存到arglist二维字符数组中,计数变量为argcount;
2)然后是一堆初始化工作
InitMem();
InitLabel();
InitMath();
InitSigP();
InitWave();
InitAudio();
InitVQ();
InitModel();
if(InitParm()<SUCCESS)
HError(2000,"HCompV: InitParm failed");
if (!InfoPrinted() && NumArgs() == 0)
ReportUsage();
if (NumArgs() == 0) Exit(0);
3)核心的处理函数是从CreateHMMSet开始的。
下面来详细分析下这个函数做了哪些任务,首先在代码中函数声明为
/* EXPORT->CreateHMMSet: create the basic HMMSet structure */
void CreateHMMSet(HMMSet *hset, MemHeap *heap, Boolean allowTMods);它接受三个参数分别是HMMSet结构指针、MemHeap结构指针和allowTMods的布尔值。因此这两个结构大体长什么样子肯定要有所了解,否则后面的操作、赋值、计算都不明了。而allowTMods是个开关量。
typedef struct _HMMSet{
MemHeap *hmem; /* memory heap for this HMM Set */
Boolean *firstElem; /* first element added to hmem during MakeHMMSet*/
char *hmmSetId; /* identifier for the hmm set */
MILink mmfNames; /* List of external file names */
int numLogHMM; /* Num of logical HMM's */
int numPhyHMM; /* Num of distinct physical HMM's */
int numFiles; /* total number of ext files */
int numMacros; /* num macros used in this set */
MLink * mtab; /* Array[0..MACHASHSIZE-1]OF MLink */
PtrMap ** pmap; /* Array[0..PTRHASHSIZE-1]OF PtrMap* */
Boolean allowTMods; /* true if HMMs can have Tee Models */
Boolean optSet; /* true if global options have been set */
short vecSize; /* dimension of observation vectors */
short swidth[SMAX]; /* [0]=num streams,[i]=width of stream i */
ParmKind pkind; /* kind of obs vector components */
DurKind dkind; /* kind of duration model (model or state) */
CovKind ckind; /* cov kind - only global in V1.X */
HSetKind hsKind; /* kind of HMM set */
TMixRec tmRecs[SMAX]; /* array[1..S]of tied mixture record */
int numStates; /* Number of states in HMMSet */
int numSharedStates; /* Number of shared states in HMMSet */
int numMix; /* Number of mixture components in HMMSet */
int numSharedMix; /* Number of shared mixtures in HMMSet */
int numTransP; /* Number of distinct transition matrices */
int ckUsage[NUMCKIND]; /* Number of components using given ckind */
InputXForm *xf; /* Input transform of HMMSet */
AdaptXForm *semiTied; /* SemiTied transform associated with model set */
short projSize; /* dimension of vector to update */
/* Adaptation information accumulates */
Boolean attRegAccs; /* have the set of accumulates been attached */
Boolean attXFormInfo; /* have the set of adapt info been attached */
Boolean attMInfo; /* have the set of adapt info been attached */
AdaptXForm *curXForm;
AdaptXForm *parentXForm;
/* Added to support LogWgts */
Boolean logWt; /* Component weights are stored as Logs */
/* Added to support delayed loading of the semi-tied transform */
char *semiTiedMacro; /* macroname of semi-tied transform */
} HMMSet;
看每个数据项的注释,大体意思应该比较明白了。其中第一项就是HHMSet需要涉及的内存空间,用来保存模型参数,也就是这个函数第二个形参的目的。
typedef struct {
char *name; /* name of this memory heap */
HeapType type; /* type of this heap */
float growf; /* succ blocks grow as 1+growf */
size_t elemSize; /* size of each elem 1 always */
size_t minElem; /* init #elems per blk init #bytes per blk */
size_t maxElem; /* max #elems per block max #bytes per blk */
size_t curElem; /* current #elems per blk curr #bytes per blk */
size_t totUsed; /* total #elems used total #bytes used */
size_t totAlloc; /* total #elems alloc'ed total #bytes alloc'd */
BlockP heap; /* linked list of blocks */
Boolean protectStk; /* MSTAK only, prevents disposal below Stack Top */
}MemHeap;
该HMMSet定义了hmm模型的个数,状态数,观察向量的维度转移概率矩阵的个数等等。在这个函数里只是初始化这样的结构体,而具体值需要后面根据模型文件来构建模型时填充。
CreateHMMSet(&hset,&gstack,FALSE);
pathPattern[0]='\0';
while (NextArg() == SWITCHARG) {
s = GetSwtArg();
if (strlen(s)!=1)
HError(2019,"HCompV: Bad switch %s; must be single letter",s);
switch(s[0]){
case 'f':
if (NextArg() != FLOATARG)
HError(2019,"HCompV: Variance floor scale expected");
vFloorScale = GetChkedFlt(0.0,100.0,s);
break;
case 'l':
if (NextArg() != STRINGARG)
HError(2019,"HCompV: Segment label expected");
segLab = GetStrArg();
break;
case 'm':
meanUpdate = TRUE;
break;
case 'o':
outfn = GetStrArg();
break;
case 'v':
if (NextArg() != FLOATARG)
HError(2019,"HCompV: Minimum variance level expected");
minVar = GetChkedFlt(0.0,100.0,s);
break;
case 'k':
if (NextArg() != STRINGARG)
HError(2019,"HCompV: speaker pattern expected");
strcpy(spPattern,GetStrArg());
if (strchr(spPattern,'%')==NULL)
HError(2019,"HCompV: Speaker mask invalid");
break;
case 'c':
if (NextArg() != STRINGARG)
HError(2019,"HCompV: CMV output dir expected");
strcpy(cmDir,GetStrArg());
DoCMV = TRUE;
break;
case 'p':
if (NextArg() != STRINGARG)
HError(2019,"HCompV: path pattern expected");
strcpy(pathPattern,GetStrArg());
if (strchr(pathPattern,'%')==NULL)
HError(2019,"HCompV: Path mask invalid");
break;
case 'q':
if (NextArg() != STRINGARG)
HError(2019,"HCompV: output flags (nmv)");
strcpy(oflags,GetStrArg());
break;
case 'B':
saveBinary = TRUE;
break;
case 'F':
if (NextArg() != STRINGARG)
HError(2019,"HCompV: Data File format expected");
if((dff = Str2Format(GetStrArg())) == ALIEN)
HError(-2089,"HCompV: Warning ALIEN Data file format set");
break;
case 'G':
if (NextArg() != STRINGARG)
HError(2019,"HCompV: Label File format expected");
if((lff = Str2Format(GetStrArg())) == ALIEN)
HError(-2089,"HCompV: Warning ALIEN Label file format set");
break;
case 'H':
if (NextArg() != STRINGARG)
HError(2019,"HCompV: HMM macro file name expected");
AddMMF(&hset,GetStrArg());
break;
case 'I':
if (NextArg() != STRINGARG)
HError(2019,"HCompV: MLF file name expected");
LoadMasterFile(GetStrArg());
break;
case 'L':
if (NextArg()!=STRINGARG)
HError(2019,"HCompV: Label file directory expected");
labDir = GetStrArg();
break;
case 'M':
if (NextArg()!=STRINGARG)
HError(2019,"HCompV: Output macro file directory expected");
outDir = GetStrArg();
break;
case 'T':
if (NextArg() != INTARG)
HError(2019,"HCompV: Trace value expected");
trace = GetChkedInt(0,077,s);
break;
case 'X':
if (NextArg()!=STRINGARG)
HError(2019,"HCompV: Label file extension expected");
labExt = GetStrArg();
break;
default:
HError(2019,"HCompV: Unknown switch %s",s);
}
}
/* if not doing CMV, do standard HCompV */
if (DoCMV == FALSE){
if (NextArg()!=STRINGARG)
HError(2019,"HCompV: Source HMM file name expected");
hmmfn = GetStrArg();
Initialise();
do {
if (NextArg()!=STRINGARG)
HError(2019,"HCompV: Training data file name expected");
datafn = GetStrArg();
LoadFile(datafn);
} while (NumArgs()>0);
SetCovs();
FixGConsts(hmmLink);
SaveModel(outfn);
if (trace&T_TOP)
printf("Output written to directory %s\n",(outDir==NULL)?"./":outDir);
if (vFloorScale>0.0)
PutVFloor();
}
while训练语句就是在处理命令行的参数,不断的读取由initshell处理后剩下的参数,在HCompV命令中,有-f 0.01 -m -M dir proto等7个参数,处理完毕后,设置了全局变量vFloorScale(协方差的最低值系数),meanUpdate(bool,同时更新期望值),outDir(模型的输出目录)和hmmfn(描述模型的文件名)。
接着,默认情况下DoCMV为false,因此进入if第一个分支,设置hmmfn值为proto,它就是我们之前为HMM模型描述的结构。然后执行Initialise()函数。
4)Initialise()
函数前面的注释 /* Initialise: load HMMs and create accumulators */
下面把这个函数的主要调用关系罗列出来,然后分析它们各自都干了什么。
void Initialise(void)
{
int s,V;
Boolean eSep;
char base[MAXSTRLEN];
char path[MAXSTRLEN];
char ext[MAXSTRLEN];
/* Load HMM defs */
if(MakeOneHMM(&hset,BaseOf(hmmfn,base))<SUCCESS)
HError(2028,"Initialise: MakeOneHMM failed");
if(LoadHMMSet(&hset,PathOf(hmmfn,path),ExtnOf(hmmfn,ext))<SUCCESS)
HError(2028,"Initialise: LoadHMMSet failed");
SetParmHMMSet(&hset);
/* Create a heap to store the input data */
CreateHeap(&iStack,"InBuf", MSTAK, 1, 0.5, 100000, LONG_MAX);
/* Get a pointer to the physical HMM */
hmmId = GetLabId(base,FALSE);
macroLink = FindMacroName(&hset,'h',hmmId);
hmmLink = (HLink)macroLink->structure;
/* Find out for which streams full covariance is needed */
CheckVarianceKind( );
/* Create accumulators for the mean and variance */
for (s=1;s<=hset.swidth[0]; s++){
V = hset.swidth[s];
accs[s].meanSum=CreateVector(&gstack,V);
ZeroVector(accs[s].meanSum);
if (fullcNeeded[s]) {
accs[s].squareSum.inv=CreateSTriMat(&gstack,V);
accs[s].fixed.inv=CreateSTriMat(&gstack,V);
ZeroTriMat(accs[s].squareSum.inv);
}
else {
accs[s].squareSum.var=CreateSVector(&gstack,V);
accs[s].fixed.var=CreateSVector(&gstack,V);
ZeroVector(accs[s].squareSum.var);
}
}
/* Create an object to hold the input parameters */
SetStreamWidths(hset.pkind,hset.vecSize,hset.swidth,&eSep);
obs=MakeObservation(&gstack,hset.swidth,hset.pkind,FALSE,eSep);
if(segLab != NULL) {
segId = GetLabId(segLab,TRUE);
}
}
可以看出首先调用MakeOneHMM,看看它完成了哪些操作。
/* EXPORT->MakeOneHMM: Create a singleton for the HMM hname */
ReturnStatus MakeOneHMM(HMMSet *hset, char *hname)
{
if(InitHMMSet(hset, hname, TRUE)<SUCCESS){
ResetHMMSet(hset);
return(FAIL);
}
return(SUCCESS);
}
它实际上调用了初始化InitHMMSet函数,通过读取proto这个hmm模型文件。proto文件的内容如下:
~o <VecSize> 39 <MFCC_0_D_A>
~h "proto"
<BeginHMM>
<NumStates> 5
<State> 2
<Mean> 39
0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
<Variance> 39
1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
<State> 3
<Mean> 39
0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
<Variance> 39
1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
<State> 4
<Mean> 39
0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
<Variance> 39
1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
<TransP> 5
0.0 1.0 0.0 0.0 0.0
0.0 0.6 0.4 0.0 0.0
0.0 0.0 0.6 0.4 0.0
0.0 0.0 0.0 0.7 0.3
0.0 0.0 0.0 0.0 0.0
<EndHMM>
其实这个hmm原型文件描述了HMM主要的参数,包括5个状态,观察向量的维度是39,它是语音的MFCC以及一阶、二阶参数(MFCC_0_D_A)。这个原型文件只描述了一个hmm模型,名字就是"proto"。
在InitHMMSet函数中调用了CreateHMM函数完成模型的单个模型的构建。模型名称为“proto”,并保存到HMMSet中,但是实际上这时这个模型的参数没有设定,只是有了一个名字的空模型。
在接下来的循环语句中,LoadFile函数读取每个MFCC文件,并计算特征向量的均值和方差,通过累加的方式。
SetCovs()函数完成方差的计算,并复制给每个状态,来初始化状态的概率密度函数。
/* SetCovs: set covariance values in hmm */
void SetCovs(void)
{
int i,s,m;
StateElem *se;
StreamElem *ste;
MixtureElem *me;
MixPDF *mp;
CalcCovs(); // 计算均值和方差 保存在全局的CovAcc对象中。
if (trace&T_TOP) {
printf("Updating HMM ");
if (meanUpdate) printf("Means and ");
printf("Covariances\n");
}
for (i=2,se=hmmLink->svec+2; i < hmmLink->numStates; i++,se++)
for (s=1,ste=se->info->pdf+1; s <= hset.swidth[0]; s++,ste++)
for (m=1,me = ste->spdf.cpdf+1; m<=ste->nMix; m++, me++) {
mp = me->mpdf;
if (meanUpdate && !IsSeenV(mp->mean)){ /* meanSum now holds mean */
CopyVector(accs[s].meanSum,mp->mean); // 把计算的均方差复制给模型的状态
TouchV(mp->mean);
}
if (!IsSeenV(mp->cov.var)){
if (mp->ckind==FULLC)
CopyMatrix(accs[s].fixed.inv,mp->cov.inv);
else if (fullcNeeded[s]) /* dont need full cov, but its all we have */
TriDiag2Vector(accs[s].fixed.inv,mp->cov.var);
else
CopyVector(accs[s].fixed.var,mp->cov.var);
TouchV(mp->cov.var);
}
}
ClearSeenFlags(&hset,CLR_ALL);
}