void IndexFiles(char* pszDataPath, char* pszIndexPath, const bool clearIndex)
{
IndexWriter* writer = NULL;
lucene::analysis::standard::StandardAnalyzer an;
writer = _CLNEW IndexWriter(pszIndexPath ,&an, true);
writer->setMaxFieldLength(IndexWriter::DEFAULT_MAX_FIELD_LENGTH);
writer->setUseCompoundFile(false);
uint64_t str = lucene::util::Misc::currentTimeMillis();
indexDocs(writer, pszDataPath);
writer->optimize();
writer->close();
}
void indexDocs(IndexWriter* writer, char* directory)
{
DIR* dir = opendir(directory);
if ( dir != NULL )
{
struct dirent* fl;
struct fileStat buf;
char path[CL_MAX_DIR];
strcpy(path,directory);
strcat(path,PATH_DELIMITERA);
char* pathP = path + strlen(path);
fl = readdir(dir);
while (fl != NULL )
{
if ((strcmp(fl->d_name, ".")) &&(strcmp(fl->d_name, "..")))
{
pathP[0]=0;
strcat(pathP,fl->d_name);
int32_t ret = fileStat(path,&buf);
if ( buf.st_mode & S_IFDIR )
{
indexDocs(writer, path );
}
else
{
Document* doc = FileDocument( path );
writer->addDocument(doc);
_CLDELETE(doc);
}
}
fl = readdir(dir);
}
closedir(dir);
}
else
{
printf( "adding: %s/n", directory);
Document* doc = FileDocument( directory );
writer->addDocument( doc );
_CLDELETE(doc);
}
}
//某一个文件
Document* FileDocument(const char* f)
{
Document* doc = _CLNEW Document();
TCHAR tf[CL_MAX_DIR];
STRCPY_AtoT(tf,f,CL_MAX_DIR);
doc->add( *_CLNEW Field(_T("path"), tf, Field::STORE_YES | Field::INDEX_UNTOKENIZED ) );
FILE* fh = fopen(f,"r");
if ( fh != NULL )
{
StringBuffer str;
int fn = fileno(fh);
struct stat filestat;
fstat(fn, &filestat);
str.reserve(filestat.st_size);
char abuf[1024];
TCHAR tbuf[1024];
size_t r;
do{
r = fread(abuf,1,1023,fh);
abuf[r]=0;
STRCPY_AtoT(tbuf,abuf,r);
tbuf[r]=0;
str.append(tbuf);
}while(r>0);
fclose(fh);
doc->add( *_CLNEW Field(_T("contents"),str.getBuffer(), Field::STORE_YES | Field::INDEX_TOKENIZED|Field::TERMVECTOR_WITH_OFFSETS) );
}
return doc;
}