//Generate Word according the segmentation route bool CSegment::GenerateWord(int**nSegRoute, int nIndex) { unsigned int i=0,k=0; int j,nStartVertex,nEndVertex,nPOS; char sAtom[WORD_MAXLENGTH],sNumCandidate[100],sCurWord[100]; ELEMENT_TYPE fValue; while(nSegRoute[nIndex][i]!=-1&&nSegRoute[nIndex][i+1]!=-1&&nSegRoute[nIndex][i]<nSegRoute[nIndex][i+1]) { nStartVertex=nSegRoute[nIndex][i]; j=nStartVertex;//Set the start vertex nEndVertex=nSegRoute[nIndex][i+1];//Set the end vertex nPOS=0; m_graphSeg.m_segGraph.GetElement(nStartVertex,nEndVertex,&fValue,&nPOS); sAtom[0]=0; while(j<nEndVertex) {//Generate the word according the segmentation route strcat(sAtom,m_graphSeg.m_sAtom[j]); j++; } m_pWordSeg[nIndex][k].sWord[0]=0;//Init the result ending strcpy(sNumCandidate,sAtom); //找出连续的数字串 while(sAtom[0]!=0&&(IsAllNum((unsigned char*)sNumCandidate)||IsAllChineseNum(sNumCandidate))) {//Merge all seperate continue num into one number //sAtom[0]!=0: add in 2002-5-9 strcpy(m_pWordSeg[nIndex][k].sWord,sNumCandidate); //Save them in the result segmentation i++;//Skip to next atom now sAtom[0]=0; while(j<nSegRoute[nIndex][i+1]) {//Generate the word according the segmentation route strcat(sAtom,m_graphSeg.m_sAtom[j]); j++; } strcat(sNumCandidate,sAtom); } unsigned int nLen=strlen(m_pWordSeg[nIndex][k].sWord); if(nLen==4&&CC_Find("第上成±—+∶·./",m_pWordSeg[nIndex][k].sWord)||nLen==1&&strchr("+-./",m_pWordSeg[nIndex][k].sWord[0])) {//Only one word strcpy(sCurWord,m_pWordSeg[nIndex][k].sWord);//Record current word i--; } elseif(m_pWordSeg[nIndex][k].sWord[0]==0)//Have never entering the while loop { strcpy(m_pWordSeg[nIndex][k].sWord,sAtom); //Save them in the result segmentation strcpy(sCurWord,sAtom);//Record current word } else {//It is a num if(strcmp("--",m_pWordSeg[nIndex][k].sWord)==0||strcmp("—",m_pWordSeg[nIndex][k].sWord)==0||m_pWordSeg[nIndex][k].sWord[0]=='-'&&m_pWordSeg[nIndex][k].sWord[1]==0)//The delimiter "--" { nPOS=30464;//'w'*256;Set the POS with 'w' i--;//Not num, back to previous word } else {//Adding time suffix char sInitChar[3]; unsigned int nCharIndex=0;//Get first char sInitChar[nCharIndex]=m_pWordSeg[nIndex][k].sWord[nCharIndex]; if(sInitChar[nCharIndex]<0) { nCharIndex+=1; sInitChar[nCharIndex]=m_pWordSeg[nIndex][k].sWord[nCharIndex]; } nCharIndex+=1; sInitChar[nCharIndex]='