汉字字索引

#include  " ../DiskBuf.h "
#pragma  once


#ifndef IsHz
    
#define  IsHz(x) (((x)>=0x81 && (x)<=0xA0)||((x)>=0xAA && (x)<=0xFE))
    
#define  HzCode(x) ((((x)&0x7F)<<8)+((x)>>8))
#endif

#ifdef _DEBUG
    
#define  _IndexBuffer_Size 0x80
#else
    
#define  _IndexBuffer_Size 0x1000
#endif

class  IndexMaker
{
private:
    
struct IndexMakerBuf    //词库缓冲数组
    {
        
char nWords[3];        //[WordMaxLen];
        unsigned long _Last_FileNum;
        unsigned 
long * _Data;
        unsigned 
long * DataPtr;
        unsigned 
long * DataEpr;
    }
 _WordsIndex[0x8000];

    unsigned 
long *_Diskbuf;
    unsigned 
long _filenum;            //文件号
    char _mOutput_Path[512];        //索引输出路径
    char* _mOutPath_EndPtr;            //指向索引路径末端
public:
    
long __Error;
    
//function
    ~IndexMaker(void)
    
{
        
if (_Diskbuf)
        
{
            free(_Diskbuf);
        }

    }


    IndexMaker(
char *OutPutDir)//cache set:default=840, unsigned long CacheSet
    {
        _filenum
=1;
        __Error
=NULL;

        strcpy(_mOutput_Path,OutPutDir);
        _mOutPath_EndPtr
=_mOutput_Path+strlen(_mOutput_Path);

        unsigned 
long *tbuf=_Diskbuf=(unsigned long *)calloc(0x8000*_IndexBuffer_Size,4);
        
if (!_Diskbuf) { __Error=_Error_Malloc;return ;}
        
        memset(_WordsIndex,
0,sizeof(_WordsIndex));
        
for(unsigned long i=0;i<0x8000;i++)
        
{
            IndexMakerBuf 
*tWi=_WordsIndex+i;
            
*(unsigned short *)(tWi->nWords)=(unsigned short)(((i>>8)|0x80)+((i&0xFF)<<8));
            tWi
->nWords[2]=NULL;
            tWi
->_Last_FileNum=NULL;
            tWi
->DataPtr=tWi->_Data=tbuf;
            tbuf
+=_IndexBuffer_Size;
            tWi
->DataEpr=tbuf;
        }

    }
        

    
long _MakeIndex(DiskBuffer *m_Buffer)
    
{
        
char *m_FileText=NULL;
        
while (m_Buffer->_PopData(&m_FileText,&_filenum)) _fenci(m_FileText);
        
for(unsigned long i=0;i<0x8000;i++)
        
{
            
if (_WordsIndex[i].DataPtr!=_WordsIndex[i]._Data) _Write2File(_WordsIndex+i);
        }

        
return __Error;
    }

private:
    
void _Write2File(IndexMakerBuf *nDataCache)
    
{
        strcpy(_mOutPath_EndPtr,nDataCache
->nWords);//char name
        FILE *outFile;
        
if (NULL==(outFile=fopen(_mOutput_Path,"ab+")))
        
{
            printf(
"error write to file %s %lx ",_mOutput_Path,*_mOutPath_EndPtr);
        }

        
else
        
{
            fwrite(nDataCache
->_Data,4,(nDataCache->DataPtr)-(nDataCache->_Data),outFile);
            fclose(outFile);
        }

        nDataCache
->DataPtr=nDataCache->_Data;
    }


    
void _fenci(char *mTextCharPtr)
    
{
        mTextCharPtr
--;//起始位置减一,使第一个位置为一而不是零
        char * bptChar=mTextCharPtr;
        
while (*(bptChar)) 
        
{
            
if ((*bptChar)<0)
            
{
                
if (IsHz(*(unsigned char *)(bptChar)))
                
{
                    
//_Write2Cache(_WordsIndex+HzCode(*(unsigned short *)(bptChar)),(unsigned long)(bptChar-mTextCharPtr));
                    IndexMakerBuf *tDataCache=_WordsIndex+HzCode(*(unsigned short *)(bptChar));
                    unsigned 
long pushData=(unsigned long)(bptChar-mTextCharPtr);
                    
//void _Write2Cache(IndexMakerBuf *tDataCache,unsigned long pushData)
                    
//{
                        if (tDataCache->DataPtr==tDataCache->DataEpr) _Write2File(tDataCache);
                        
if (tDataCache->_Last_FileNum!=_filenum)
                        
{
                            
*(tDataCache->DataPtr)=_filenum;
                            tDataCache
->_Last_FileNum=_filenum;
                            tDataCache
->DataPtr++;
                            
if (tDataCache->DataPtr==tDataCache->DataEpr) _Write2File(tDataCache);
                        }

                        
*(tDataCache->DataPtr)=pushData;
                        tDataCache
->DataPtr++;
                    
//}
                    
//end function _Write2Cache
                }

                bptChar
+=2;
            }

            
else bptChar++;
        }

        
//*//_WriteZero();
        
//for(unsigned long i=1;i<0x7F00;i++) //Powered by barenx
        IndexMakerBuf *nDataCacheEpr=_WordsIndex+0x7F00;
        
for(IndexMakerBuf *nDataCache=_WordsIndex+1;nDataCache<nDataCacheEpr;nDataCache++)
        
{
            
if (nDataCache->_Last_FileNum==_filenum)
            
{
                
if (nDataCache->DataPtr==nDataCache->DataEpr) _Write2File(nDataCache);
                
*(nDataCache->DataPtr)=NULL;
                (nDataCache
->DataPtr)++;
            }

        }

        
//end _WriteZero();*/
    }

}
;

Powered by barenx
 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值