Lattice是非常重要的数据结构,它是语音识别系统支持的高层次的抽象语法。一般由定义的gram生成。
typedef struct lattice
{
MemHeap *heap; /* Heap lattice uses */
LatFormat format; /* indicate which fields are valid */
Vocab *voc; /* Dictionary lattice based on */
int nn; /* Number of nodes */
int na; /* Number of arcs */
LNode *lnodes; /* Array of lattice nodes */
LArc *larcs; /* Array of lattice arcs */
LabId subLatId; /* Lattice Identifier (for SubLats only) */
SubLatDef *subList; /* List of sublats in this lattice level */
SubLatDef *refList; /* List of all SubLats referring to this lat */
struct lattice *chain; /* Linked list used for various jobs */
char *utterance; /* Utterance file name (NULL==unknown) */
char *vocab; /* Dictionary file name (NULL==unknown) */
char *hmms; /* MMF file name (NULL==unknown) */
char *net; /* Network file name (NULL==unknown) */
float acscale; /* Acoustic scale factor */
float lmscale; /* LM scale factor */
LogFloat wdpenalty; /* Word insertion penalty */
float prscale; /* Pronunciation scale factor */
HTime framedur; /* Frame duration in 100ns units */
float logbase; /* base of logarithm for likelihoods in lattice files
(1.0 = default (e), 0.0 = no logs) */
float tscale; /* time scale factor (default: 1, i.e. seconds) */
Ptr hook; /* User definable hook */
}
Lattice;
这个lattice应该包含哪些信息,才能完整的描述一个词格网络。得有字典对象(它的wtab包含了所有词)、节点信息、边的信息(个数和实体列表),得包含从哪些信息得到这个词格网络的,有utterance文件名、发音字典文件名、描述HMM的文件名,还有描述Netword的文件名。
函数ReadOneLattice返回一个Lattice的指针。这个函数里,首选会读取Net文件中有多少个节点N=(nn)和多少个边L=(na),然后构建一个空的Lattice。为lattice的lnodes和larcs分配合适的空间(依赖nn和na)。然后依次读取文件,并且把节点和边的标号作为下标来访问(不重要,这是C语言特性决定的)。
/* ReadOneLattice: Read (one level) of lattice from file */
Lattice *ReadOneLattice(Source *src, MemHeap *heap, Vocab *voc,
Boolean shortArc, Boolean add2Dict)
{
int i,s,e,n,v=0,nn,na;
Lattice *lat;
LNode *ln;
LArc *la;
Word wordId;
double time,aclike,lmlike;
double prlike;
char nbuf[132],vbuf[132],*ptr,ntype,del;
#define DBUFLEN 4096
char dbuf[DBUFLEN];
double lmscl=1.0, lmpen=0.0, acscl=1.0, prscl=1.0;
float logbase = 1.0, tscale = 1.0;
char *uttstr,*lmnstr,*vocstr,*hmmstr,*sublatstr,*tag;
SubLatDef *subLatId = NULL;
lat = (Lattice *) New(heap,sizeof(Lattice));
lat->heap=heap; lat->subLatId=NULL; lat->chain=NULL;
lat->voc=voc; lat->refList=NULL; lat->subList=NULL;
/* Initialise default header values */
nn=0;na=0; uttstr=lmnstr=vocstr=hmmstr=sublatstr=NULL;
/* Process lattice header */
while((ptr=GetNextFieldName(nbuf,&del,src))) {
if (nbuf[0]=='\n') {
if (na != 0 && nn != 0) break;
}
else if (strlen(ptr)==1) {
ntype=*ptr;
switch(ntype) {
case 'N':
nn=GetIntField('N',del,vbuf,src);
break;
case 'L':
na=GetIntField('L',del,vbuf,src);
break;
default:
GetFieldValue(0,src,0);
break;
}
}
else {
if (!strcmp(ptr,"UTTERANCE"))
GetFieldValue(vbuf,src,0),uttstr=CopyString(heap,vbuf);
// 其他无关代码
else
GetFieldValue(NULL,src,0);
}
}
/* Initialise lattice based on header information */
lat->nn=nn;
lat->na=na;
lat->utterance=uttstr;lat->vocab=vocstr;lat->hmms=hmmstr;
lat->net=lmnstr;lat->lmscale=lmscl;lat->wdpenalty=lmpen;
lat->acscale = acscl;
lat->logbase = logbase;
lat->tscale = tscale;
lat->framedur=0;
lat->prscale=prscl;
/* Presence of SUBLAT=id string indicates more to come */
lat->subList=NULL; lat->chain=NULL;
if (sublatstr!=NULL) lat->subLatId = GetLabId(sublatstr,TRUE);
else lat->subLatId = NULL;
/* Allocate and initiailise nodes/arcs */
lat->lnodes=(LNode *) New(heap, sizeof(LNode)*nn);
if (shortArc)
lat->larcs=(LArc *) New(heap, sizeof(LArc_S)*na);
else
lat->larcs=(LArc *) New(heap, sizeof(LArc)*na);
for(i=0, ln=lat->lnodes; i<nn; i++, ln++) {
ln->hook=NULL;
ln->pred=NULL;
ln->foll=NULL;
ln->score=0.0;
}
for(i=0, la=lat->larcs; i<na; i++, la=NextLArc(lat,la)) {
la->lmlike=0.0;
la->start=la->end=NNODE;
la->farc=la->parc=NARC;
}
// 其他代码
return lat;
}
这个函数太长了,我分段解析。现在这个函数将返回仅仅构建一个nn个LNode和na个LArc的lattice,其值都为NULL。也就是读取了下面Net文件的头两行。
看一下Net的大体格式。
VERSION=1.0 N=31 L=62 I=0 W=SENT-END I=1 W=YOUNG I=2 W=!NULL I=3 W=STEVE I=4 W=LEE I=5 W=PHIL I=6 W=WOOD I=7 W=DAVE I=8 W=TYLER I=9 W=JULIAN I=10 W=LAW I=11 W=SUE I=12 W=CALL I=13 W=!NULL I=14 W=PHONE I=15 W=ZERO I=16 W=!NULL I=17 W=OH I=18 W=NINE I=19 W=EIGHT I=20 W=SEVEN I=21 W=SIX I=22 W=FIVE I=23 W=FOUR I=24 W=THREE I=25 W=TWO I=26 W=ONE I=27 W=DIAL I=28 W=SENT-START I=29 W=!NULL I=30 W=!NULL J=0 S=2 E=0 J=1 S=16 E=0 J=2 S=3 E=1 J=3 S=13 E=1 J=4 S=1 E=2 J=5 S=4 E=2 J=6 S=6 E=2 J=7 S=8 E=2 J=8 S=10 E=2 J=9 S=13 E=3 J=10 S=5 E=4 J=11 S=13 E=4 J=12 S=13 E=5 J=13 S=7 E=6 J=14 S=13 E=6 J=15 S=13 E=7 J=16 S=9 E=8 J=17 S=13 E=8 J=18 S=13 E=9 J=19 S=11 E=10 J=20 S=13 E=10 J=21 S=13 E=11 J=22 S=28 E=12 J=23 S=12 E=13 J=24 S=14 E=13 J=25 S=28 E=14 J=26 S=16 E=15 J=27 S=27 E=15 J=28 S=15 E=16 J=29 S=17 E=16 J=30 S=18 E=16 J=31 S=19 E=16 J=32 S=20 E=16 J=33 S=21 E=16 J=34 S=22 E=16 J=35 S=23 E=16 J=36 S=24 E=16 J=37 S=25 E=16 J=38 S=26 E=16 J=39 S=16 E=17 J=40 S=27 E=17 J=41 S=16 E=18 J=42 S=27 E=18 J=43 S=16 E=19 J=44 S=27 E=19 J=45 S=16 E=20 J=46 S=27 E=20 J=47 S=16 E=21 J=48 S=27 E=21 J=49 S=16 E=22 J=50 S=27 E=22 J=51 S=16 E=23 J=52 S=27 E=23 J=53 S=16 E=24 J=54 S=27 E=24 J=55 S=16 E=25 J=56 S=27 E=25 J=57 S=16 E=26 J=58 S=27 E=26 J=59 S=28 E=27 J=60 S=30 E=28 J=61 S=0 E=29 |
可以看出来,我们这次提供的Net有31个节点,62条边。ReadOneLattice函数会逐一把这些节点和边构建到Lattice中,并且关联起来,形成一个图。从一个节点,可以找到连接它的所有边,再由这些边找到它们的起点和终点。
typedef struct lnode
{
int n; /* Sorted order */
Word word; /* Word represented by arc (labels may be on nodes) */
char *tag; /* Semantic tag for this node */
short v; /* Pronunciation variant number */
SubLatDef *sublat; /* SubLat for node (if word==lat->voc->subLatWord) */
HTime time; /* Time of word boundary at node */
ArcId foll; /* Linked list of arcs following node */
ArcId pred; /* Linked list of arcs preceding node */
double score; /* Field used for pruning */
Ptr hook; /* User definable hook */
}
LNode;
看一下这个节点的结构,包含它指向的Word,指向它的所有边pred和它出去的所有边foll。
再看一下边的结构:
typedef struct larc
{
NodeId start; /* Node at start of word */
NodeId end; /* Node at end of word */
LogFloat lmlike; /* Language model likelihood of word */
ArcId farc; /* Next arc following start node */
ArcId parc; /* Next arc preceding end node */
LogFloat aclike; /* Acoustic likelihood of word */
short nAlign; /* Number of alignment records in word */
LAlign *lAlign; /* Array[0..nAlign-1] of alignment records */
float score; /* Field used for pruning/sorting */
LogFloat prlike; /* Pronunciation likelihood of arc */
}
LArc;
包含的信息有:边的开始节点、结束节点、开始节点的输出边farc和结束节点的输入边parc。
Net文件里指定边与节点的连接信息,通过这些可以构建lattice的LNode和LArc之间的关系。
do {
if ((ptr=GetNextFieldName(nbuf,&del,src)) == NULL)
break;
/* Recognised line types have only one character names */
if (strlen(ptr)==1)
ntype=*ptr;
else
ntype=0;
if (ntype == '.') {
ptr = NULL;
break;
}
switch(ntype) {
case '\n': break;
case 'I':
n=GetIntField('I',del,vbuf,src);
ln=lat->lnodes+n;
time=0.0;wordId=voc->nullWord;tag=NULL;v=-1;
while((ptr=GetNextFieldName(nbuf,&del,src)) != NULL) {
if (nbuf[0]=='\n') break;
else {
if (strlen(ptr)>=1)
ntype=*ptr;
else
ntype=0;
switch(ntype) {
case 't':
time=GetFltField('t',del,vbuf,src);
time *= tscale;
lat->format |= HLAT_TIMES;
break;
case 'W':
GetFieldValue(vbuf,src,0);
wordId=GetWord(voc,GetLabId(vbuf,add2Dict),add2Dict);
if (wordId==NULL){
Dispose(heap, lat);
HRError(8251,"ReadLattice: Word %s not in dict",vbuf);
return(NULL);
}
break;
default:
GetFieldValue(0,src,0);
break;
}
}
}
if (wordId != voc->nullWord)
lat->format &= ~HLAT_ALABS;
ln->time=time;
ln->word=wordId;
ln->tag=tag;
ln->v=v;
if (wordId == voc->subLatWord)
ln->sublat = subLatId;
else
ln->sublat = NULL;
ln->hook=ln;
nn--;
break;
case 'J':
n=GetIntField('I',del,vbuf,src);
la=NumbLArc(lat,n);
s=e=v=-1; wordId=NULL; aclike=lmlike=0.0;
prlike=0.0;
while ((ptr=GetNextFieldName(nbuf,&del,src))) {
if (nbuf[0]=='\n') break;
else {
if (strlen(ptr)>=1) ntype=*ptr;
else ntype=0;
switch(ntype)
{
case 'S':
s=GetIntField('S',del,vbuf,src);
break;
case 'E':
e=GetIntField('E',del,vbuf,src);
break;
default:
GetFieldValue(0,src,0);
break;
}
}
}
la->start=lat->lnodes+s;
la->end=lat->lnodes+e;
la->lmlike=lmlike;
if ((lat->format&HLAT_ALABS) && la->end->word == voc->nullWord){
la->end->word=wordId;
la->end->v = v;
}
la->farc=la->start->foll;
la->parc=la->end->pred;
la->start->foll=la;
la->end->pred=la;
if (!shortArc) {
la->aclike=aclike;
la->prlike=prlike;
}
na--;
break;
default:
GetFieldValue(0,src,0);
while ((ptr=GetNextFieldName(nbuf,&del,src))) {
if (nbuf[0]=='\n') break;
else GetFieldValue(0,src,0);
}
break;
}
}
while(ptr != NULL);
这个循环代码,就是处理Net中接下来的所有行信息。根据首字母是“I”还是“J”来判断建立LNode还是LArc,并通过“=”后面的序号,从lat的lnodes和larcs序列中索引相应的节点和边对象,然后填充信息。