#include
"
../DiskBuf.h
"
#pragma once
#ifndef IsHz
#define IsHz(x) (((x)>=0x81 && (x)<=0xA0)||((x)>=0xAA && (x)<=0xFE))
#define HzCode(x) ((((x)&0x7F)<<8)+((x)>>8))
#endif
#ifdef _DEBUG
#define _IndexBuffer_Size 0x80
#else
#define _IndexBuffer_Size 0x1000
#endif
class IndexMaker
... {
private:
struct IndexMakerBuf //词库缓冲数组
...{
char nWords[3]; //[WordMaxLen];
unsigned long _Last_FileNum;
unsigned long * _Data;
unsigned long * DataPtr;
unsigned long * DataEpr;
} _WordsIndex[0x8000];
unsigned long *_Diskbuf;
unsigned long _filenum; //文件号
char _mOutput_Path[512]; //索引输出路径
char* _mOutPath_EndPtr; //指向索引路径末端
public:
long __Error;
//function
~IndexMaker(void)
...{
if (_Diskbuf)
...{
free(_Diskbuf);
}
}
IndexMaker(char *OutPutDir)//cache set:default=840, unsigned long CacheSet
...{
_filenum=1;
__Error=NULL;
strcpy(_mOutput_Path,OutPutDir);
_mOutPath_EndPtr=_mOutput_Path+strlen(_mOutput_Path);
unsigned long *tbuf=_Diskbuf=(unsigned long *)calloc(0x8000*_IndexBuffer_Size,4);
if (!_Diskbuf) ...{ __Error=_Error_Malloc;return ;}
memset(_WordsIndex,0,sizeof(_WordsIndex));
for(unsigned long i=0;i<0x8000;i++)
...{
IndexMakerBuf *tWi=_WordsIndex+i;
*(unsigned short *)(tWi->nWords)=(unsigned short)(((i>>8)|0x80)+((i&0xFF)<<8));
tWi->nWords[2]=NULL;
tWi->_Last_FileNum=NULL;
tWi->DataPtr=tWi->_Data=tbuf;
tbuf+=_IndexBuffer_Size;
tWi->DataEpr=tbuf;
}
}
long _MakeIndex(DiskBuffer *m_Buffer)
...{
char *m_FileText=NULL;
while (m_Buffer->_PopData(&m_FileText,&_filenum)) _fenci(m_FileText);
for(unsigned long i=0;i<0x8000;i++)
...{
if (_WordsIndex[i].DataPtr!=_WordsIndex[i]._Data) _Write2File(_WordsIndex+i);
}
return __Error;
}
private:
void _Write2File(IndexMakerBuf *nDataCache)
...{
strcpy(_mOutPath_EndPtr,nDataCache->nWords);//char name
FILE *outFile;
if (NULL==(outFile=fopen(_mOutput_Path,"ab+")))
...{
printf("error write to file %s %lx ",_mOutput_Path,*_mOutPath_EndPtr);
}
else
...{
fwrite(nDataCache->_Data,4,(nDataCache->DataPtr)-(nDataCache->_Data),outFile);
fclose(outFile);
}
nDataCache->DataPtr=nDataCache->_Data;
}
void _fenci(char *mTextCharPtr)
...{
mTextCharPtr--;//起始位置减一,使第一个位置为一而不是零
char * bptChar=mTextCharPtr;
while (*(bptChar))
...{
if ((*bptChar)<0)
...{
if (IsHz(*(unsigned char *)(bptChar)))
...{
//_Write2Cache(_WordsIndex+HzCode(*(unsigned short *)(bptChar)),(unsigned long)(bptChar-mTextCharPtr));
IndexMakerBuf *tDataCache=_WordsIndex+HzCode(*(unsigned short *)(bptChar));
unsigned long pushData=(unsigned long)(bptChar-mTextCharPtr);
//void _Write2Cache(IndexMakerBuf *tDataCache,unsigned long pushData)
//{
if (tDataCache->DataPtr==tDataCache->DataEpr) _Write2File(tDataCache);
if (tDataCache->_Last_FileNum!=_filenum)
...{
*(tDataCache->DataPtr)=_filenum;
tDataCache->_Last_FileNum=_filenum;
tDataCache->DataPtr++;
if (tDataCache->DataPtr==tDataCache->DataEpr) _Write2File(tDataCache);
}
*(tDataCache->DataPtr)=pushData;
tDataCache->DataPtr++;
//}
//end function _Write2Cache
}
bptChar+=2;
}
else bptChar++;
}
//*//_WriteZero();
//for(unsigned long i=1;i<0x7F00;i++) //Powered by barenx
IndexMakerBuf *nDataCacheEpr=_WordsIndex+0x7F00;
for(IndexMakerBuf *nDataCache=_WordsIndex+1;nDataCache<nDataCacheEpr;nDataCache++)
...{
if (nDataCache->_Last_FileNum==_filenum)
...{
if (nDataCache->DataPtr==nDataCache->DataEpr) _Write2File(nDataCache);
*(nDataCache->DataPtr)=NULL;
(nDataCache->DataPtr)++;
}
}
//end _WriteZero();*/
}
} ;
#pragma once
#ifndef IsHz
#define IsHz(x) (((x)>=0x81 && (x)<=0xA0)||((x)>=0xAA && (x)<=0xFE))
#define HzCode(x) ((((x)&0x7F)<<8)+((x)>>8))
#endif
#ifdef _DEBUG
#define _IndexBuffer_Size 0x80
#else
#define _IndexBuffer_Size 0x1000
#endif
class IndexMaker
... {
private:
struct IndexMakerBuf //词库缓冲数组
...{
char nWords[3]; //[WordMaxLen];
unsigned long _Last_FileNum;
unsigned long * _Data;
unsigned long * DataPtr;
unsigned long * DataEpr;
} _WordsIndex[0x8000];
unsigned long *_Diskbuf;
unsigned long _filenum; //文件号
char _mOutput_Path[512]; //索引输出路径
char* _mOutPath_EndPtr; //指向索引路径末端
public:
long __Error;
//function
~IndexMaker(void)
...{
if (_Diskbuf)
...{
free(_Diskbuf);
}
}
IndexMaker(char *OutPutDir)//cache set:default=840, unsigned long CacheSet
...{
_filenum=1;
__Error=NULL;
strcpy(_mOutput_Path,OutPutDir);
_mOutPath_EndPtr=_mOutput_Path+strlen(_mOutput_Path);
unsigned long *tbuf=_Diskbuf=(unsigned long *)calloc(0x8000*_IndexBuffer_Size,4);
if (!_Diskbuf) ...{ __Error=_Error_Malloc;return ;}
memset(_WordsIndex,0,sizeof(_WordsIndex));
for(unsigned long i=0;i<0x8000;i++)
...{
IndexMakerBuf *tWi=_WordsIndex+i;
*(unsigned short *)(tWi->nWords)=(unsigned short)(((i>>8)|0x80)+((i&0xFF)<<8));
tWi->nWords[2]=NULL;
tWi->_Last_FileNum=NULL;
tWi->DataPtr=tWi->_Data=tbuf;
tbuf+=_IndexBuffer_Size;
tWi->DataEpr=tbuf;
}
}
long _MakeIndex(DiskBuffer *m_Buffer)
...{
char *m_FileText=NULL;
while (m_Buffer->_PopData(&m_FileText,&_filenum)) _fenci(m_FileText);
for(unsigned long i=0;i<0x8000;i++)
...{
if (_WordsIndex[i].DataPtr!=_WordsIndex[i]._Data) _Write2File(_WordsIndex+i);
}
return __Error;
}
private:
void _Write2File(IndexMakerBuf *nDataCache)
...{
strcpy(_mOutPath_EndPtr,nDataCache->nWords);//char name
FILE *outFile;
if (NULL==(outFile=fopen(_mOutput_Path,"ab+")))
...{
printf("error write to file %s %lx ",_mOutput_Path,*_mOutPath_EndPtr);
}
else
...{
fwrite(nDataCache->_Data,4,(nDataCache->DataPtr)-(nDataCache->_Data),outFile);
fclose(outFile);
}
nDataCache->DataPtr=nDataCache->_Data;
}
void _fenci(char *mTextCharPtr)
...{
mTextCharPtr--;//起始位置减一,使第一个位置为一而不是零
char * bptChar=mTextCharPtr;
while (*(bptChar))
...{
if ((*bptChar)<0)
...{
if (IsHz(*(unsigned char *)(bptChar)))
...{
//_Write2Cache(_WordsIndex+HzCode(*(unsigned short *)(bptChar)),(unsigned long)(bptChar-mTextCharPtr));
IndexMakerBuf *tDataCache=_WordsIndex+HzCode(*(unsigned short *)(bptChar));
unsigned long pushData=(unsigned long)(bptChar-mTextCharPtr);
//void _Write2Cache(IndexMakerBuf *tDataCache,unsigned long pushData)
//{
if (tDataCache->DataPtr==tDataCache->DataEpr) _Write2File(tDataCache);
if (tDataCache->_Last_FileNum!=_filenum)
...{
*(tDataCache->DataPtr)=_filenum;
tDataCache->_Last_FileNum=_filenum;
tDataCache->DataPtr++;
if (tDataCache->DataPtr==tDataCache->DataEpr) _Write2File(tDataCache);
}
*(tDataCache->DataPtr)=pushData;
tDataCache->DataPtr++;
//}
//end function _Write2Cache
}
bptChar+=2;
}
else bptChar++;
}
//*//_WriteZero();
//for(unsigned long i=1;i<0x7F00;i++) //Powered by barenx
IndexMakerBuf *nDataCacheEpr=_WordsIndex+0x7F00;
for(IndexMakerBuf *nDataCache=_WordsIndex+1;nDataCache<nDataCacheEpr;nDataCache++)
...{
if (nDataCache->_Last_FileNum==_filenum)
...{
if (nDataCache->DataPtr==nDataCache->DataEpr) _Write2File(nDataCache);
*(nDataCache->DataPtr)=NULL;
(nDataCache->DataPtr)++;
}
}
//end _WriteZero();*/
}
} ;