好久没有写博客了,今天写一点,其实也不想写,最近在整搜索引擎相关的东西,要对数据库建立索引,lucene可以很方便的对文本文件,包括pdf等文件做索引,当然数据库也可以哇。
废话不说,贴代码吧:
import java.io.IOException;
import java.sql.ResultSet;
import java.sql.SQLException;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
import org.wltea.analyzer.lucene.IKAnalyzer;
import com.patent.common.connectionManage.ConnectionManage;
import com.patent.common.connectionManage.ResultManage;
/**
* @desc 专利数据索引管理类
* @author 陈建国
*
*/
public class IndexManage {
private Directory ramdDirectory = null; //内存索引目录
private Directory fsdDirectory = null;//文件索引目录
private IndexWriter iwriter_ram = null; //
private IndexWriter iwriter_fsd = null; //
private ResultSet Result = null;//
private ConnectionManage connManage = null;
private int currectPage = 0 ;
/**
* @desc 初始化lucene索引。
*/
public IndexManage(){
try {
fsdDirectory = FSDirectory.getDirectory("E:/temp/index/");
iwriter_fsd = new IndexWriter(fsdDirectory,new IKAnalyzer(),true,IndexWriter.MaxFieldLength.LIMITED);
connManage = new ConnectionManage();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 获取结果集
* @return
*/
public void getResult(int currectPage){
ramdDirectory = new RAMDirectory();
try {
iwriter_ram = new IndexWriter(ramdDirectory,new IKAnalyzer(),true,IndexWriter.MaxFieldLength.LIMITED);
ResultManage rsManage = new ResultManage(connManage.getConnection(),currectPage);
Result = rsManage.getResultSet();
int k = 0;
while(Result.next()){
//System.out.println(currectPage+" "+k++);
//添加document
Document doc = new Document();
doc.add(new Field("PatentNo",Result.getString("PatentNo"),Field.Store.YES,Field.Index.ANALYZED));
doc.add(new Field("PublicationDate",Result.getString("PublicationDate"),Field.Store.YES,Field.Index.ANALYZED));
doc.add(new Field("Title",Result.getString("Title"),Field.Store.YES,Field.Index.ANALYZED));
doc.add(new Field("Inventors",Result.getString("Inventors"),Field.Store.YES,Field.Index.ANALYZED));
doc.add(new Field("ANA",Result.getString("ANA"),Field.Store.YES,Field.Index.ANALYZED));
doc.add(new Field("SerialNo",Result.getString("SerialNo")+Result.getString("SeriesCode"),Field.Store.YES,Field.Index.ANALYZED));
doc.add(new Field("Filed",Result.getString("Filed"),Field.Store.YES,Field.Index.ANALYZED));
doc.add(new Field("USCurrentClass",Result.getString("USCurrentClass"),Field.Store.YES,Field.Index.ANALYZED));
doc.add(new Field("InternlClass",Result.getString("InternlClass"),Field.Store.YES,Field.Index.ANALYZED));
doc.add(new Field("Abstract",Result.getString("Abstract"),Field.Store.YES,Field.Index.ANALYZED));
iwriter_ram.addDocument(doc);
}
iwriter_ram.commit();
iwriter_fsd.addIndexes(new Directory[]{ramdDirectory});
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (SQLException e) {
e.printStackTrace();
}
}
/**
* 关闭indexWriter
*/
public void close(){
if(iwriter_fsd != null){
try {
iwriter_fsd.commit();
iwriter_fsd.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
其实么,说白了就是将数据库中的数据取出来然后构建document。不过今天遇到一个大问题了,取那些百万级别的数据到底怎么办,感觉速度好慢啊。