最近对lucene的检索进行了肤浅的学习 先是把论坛里大部分的lucene的帖子看了下 大致了解了下lucene 决定学习 在自己测试的时候 发现在对大表的创建索引时耗费的时间实在太长 想通过多线程来解决 对一个表的总记录数来决定创建几个线程来创建索引, 结果是报错:
D:\lucene\index\_a.fnm (系统找不到指定的文件。)
Lock obtain timed out: SimpleFSLock@D:\lucene\index\write.lock
之类
难道lucene创建索引耗费是太长的问题真的没有解决 ,因为是初学没有去研究源代码 只是在自己的博客上发发感概 无意看到我篇文章的朋友不要笑我肤浅啊 鼓了勇气才敢来写第一篇博客的
我测试的代码:
主函数:
java 代码
- package luceneTest;
- import java.io.File;
- import java.sql.Connection;
- import java.sql.PreparedStatement;
- import java.sql.ResultSet;
- import java.sql.SQLException;
- import java.sql.Statement;
- import dataConnectionPool.DataConnectionPool;
- public class TMain {
- private String find = " select id,name from author";
- private Connection con;
- private Statement stmt;
- private File indexFile;
- private int count ;
- public TMain(File indexFile)
- {
- this.indexFile = indexFile;
- this.count = countD();
- }
- public int countD()
- {
- int count=0;
- try {
- con = DataConnectionPool.getBasicDataSource().getConnection();
- stmt = con.createStatement(ResultSet.TYPE_SCROLL_INSENSITIVE,ResultSet.CONCUR_READ_ONLY);
- ResultSet rs = stmt.executeQuery(find);
- while(rs.next())
- {
- rs.last(); // 移动到最后一行
- count = rs.getRow(); // 获得当前行号:此处即为最大记录数
- }
- System.out.println(" 表的总记录数 :"+count);
- } catch (SQLException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- return count;
- }
- public void create() throws InterruptedException
- {
- int num = count/10;
- for(int i = 1;i<=num;i++)
- {
- System.out.println("main start :"+((i-1)*10+1));
- CDataIndex cd = new CDataIndex(((i-1)*10+1),10,indexFile);
- cd.start();
- }
- }
- public void print(File indexFile, String markInfo)
- {
- FData fd = new FData(indexFile,markInfo);
- fd.findData();
- }
- public static void main(String[] args) {
- // TODO Auto-generated method stub
- File indexFile = new File("D:/lucene/index");
- String markInfo = "you";
- TMain tm = new TMain(indexFile);
- try {
- tm.create();
- tm.print(indexFile, markInfo);
- } catch (InterruptedException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- }
- }
调用一个建立索引的类:
java 代码
- package luceneTest;
- import java.io.File;
- import java.io.IOException;
- import java.sql.Connection;
- import java.sql.PreparedStatement;
- import java.sql.ResultSet;
- import java.sql.SQLException;
- import java.util.Date;
- import org.apache.lucene.analysis.cjk.CJKAnalyzer;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.index.IndexReader;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.store.FSDirectory;
- import dataConnectionPool.DataConnectionPool;
- public class CDataIndex extends Thread {
- private Connection con;
- private String find = " select id,name from author";
- private PreparedStatement ps;
- private int start = 0;
- private int num = 0;
- private File indexFile;
- private boolean mark = false; //通过这个来控制是用增量索引还是全局索引
- public CDataIndex(int start,int num,File indexFile)
- {
- this.start = start;
- this.num = num;
- this.indexFile = indexFile;
- }
- public void run()
- {
- System.out.println(" 开始索引!");
- Date beginDate = new Date();
- try {
- FSDirectory fsd = FSDirectory.getDirectory(indexFile.getAbsolutePath(), mark);
- if(IndexReader.isLocked(fsd)){ //这
- IndexReader.unlock(fsd);;
- }
- IndexWriter writerF = new IndexWriter(fsd,new CJKAnalyzer());
- // RAMDirectory ramD = new RAMDirectory();
- // IndexWriter writerR = new IndexWriter(ramD,new StandardAnalyzer());
- con = DataConnectionPool.getBasicDataSource().getConnection();
- ps = con.prepareStatement(find,ResultSet.TYPE_SCROLL_INSENSITIVE,ResultSet.CONCUR_READ_ONLY);
- ResultSet rs = ps.executeQuery();
- System.out.println(" start :"+start);
- rs.absolute(start);
- rs.previous();
- while(rs.next()&&num!=0)
- {
- Document doc = new Document();
- doc.add(new Field("id",rs.getString(1),Field.Store.YES, Field.Index.UN_TOKENIZED));
- doc.add(new Field("content",rs.getString(2), Field.Store.YES,Field.Index.TOKENIZED ));
- //writerR.optimize();
- //writerR.addDocument(doc);
- writerF.optimize();
- writerF.addDocument(doc);
- num--;
- }
- // writerR.optimize();
- // writerR.close();
- writerF.close();
- // writerF.addIndexes(new Directory[]{ramD});
- Date endDate = new Date();
- System.out.println("索引耗去的时间(毫秒) :"
- + (endDate.getTime() - beginDate.getTime()));
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- } catch (SQLException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- }
- }