lucene搜索引擎（文件索引、数据库索引）

最新推荐文章于 2024-10-15 10:14:15 发布

rick_123

最新推荐文章于 2024-10-15 10:14:15 发布

阅读量1.4k

点赞数

分类专栏： lucene 搜索引擎文章标签： lucene 数据库搜索引擎 string exception null

本文链接：https://blog.csdn.net/rick_123/article/details/6637065

版权

lucene 搜索引擎专栏收录该内容

7 篇文章 1 订阅

订阅专栏

lucene搜索引擎（文件索引、数据库索引）

第一步是：搜集数据创建索引文件，供搜索时索引

//1

private static IndexWriter writer; // new index being built

//创建索引初始化indexwriter
private boolean createWriter(String IdxScrDir, boolean bool) {
  try {
   writer = new IndexWriter(IdxScrDir, new StandardAnalyzer(), bool);
   writer.setUseCompoundFile(false);
   writer.setMergeFactor(100);
   writer.setMaxMergeDocs(1000);
   writer.setMaxFieldLength(Integer.MAX_VALUE);
   writer.setMaxBufferedDocs(Integer.MAX_VALUE);
   return true;
  } catch (CorruptIndexException e) {
   e.printStackTrace();
  } catch (LockObtainFailedException e) {
   e.printStackTrace();
  } catch (IOException e) {
   e.printStackTrace();
  }
  return false;
}

//2

private String[] getFeildByPath(String path){
  if (path.indexOf("]") != -1) {
   String[] s = path.split("]");
   int start = 0;
   for (int i = 0; i < s.length; i++) {
    start = s[i].indexOf("[");
    if (start != -1) {
     s[i] = s[i].substring(start + 1);
    } else
     s[i] = null;
   }
   return s;
  }else{
   return null;
  }
}

//从数据库查找数据创建索引
private boolean createsqlDocument(searchtables[] st) {
  String sql = "";
  for (int i = 0; i < st.length; i++) {
   String urlpath = new String(st[i].getpath());
   System.out.println(urlpath);
   String[] s = getFeildByPath(urlpath);
   sql = st[i].gettablename();
   try {
    ResultSet rs = Global.getInstance().getDataSource(st[i].getdbname()).ExecuteSQL(sql);
    System.out.println(rs.getCount());
    System.out.println(rs.getSize());
    for (int j = 0; j < rs.getCount(); j++) {
     urlpath = st[i].getpath();
     if(s!=null){
      for (int j2 = 0; j2 < s.length; j2++) {
       if (s[j2]!=null) {
        ResultSetMetaData rsmd = rs.getMetaData(s[j2]);
        if(rsmd.getType()==93){
         String date = rs.GetCellValue(j, s[j2]);
         String newdate = date.substring(0, 4) + "/" +date.substring(5,7) + "/" +date.substring(8, 10);
         urlpath = urlpath.replace("["+s[j2]+"]", newdate);
        }else{
         urlpath = urlpath.replace("["+s[j2]+"]", rs.GetCellValue(j, s[j2]));
        }
       }
      }
     }
     Document doc = new Document();
     String content = rs.GetCellValue(j, st[i].getcontents());
     if (content != null && !content.equals("")) {
      content.replaceAll(
        "<head>.*</head>|<script>.*?</script>", "")
        .replaceAll("<.*?>|&.{2,5};", "");
     }
     doc.add(new Field("id", rs.GetCellValue(j, st[i].gettitleid()),Field.Store.YES, Field.Index.TOKENIZED));
     doc.add(new Field("title", rs.GetCellValue(j, st[i].gettitle()),Field.Store.YES, Field.Index.TOKENIZED));
//     doc.add(new Field("content", rs.GetCellValue(j, st[i].getcontents()),Field.Store.YES, Field.Index.TOKENIZED));
     doc.add(new Field("content", content,Field.Store.YES, Field.Index.TOKENIZED));
     doc.add(new Field("path", urlpath,Field.Store.YES, Field.Index.TOKENIZED));
     doc.add(new Field("type", rs.GetCellValue(j, st[i].gettypename()),Field.Store.YES, Field.Index.TOKENIZED));
     writer.addDocument(doc);
    }
   } catch (Exception e) {
    e.printStackTrace();
    return false;
   }
  }
  return true;
}

//3

/**
* 优化索引
*
* @throws CorruptIndexException
* @throws IOException
*/

private void optimizeWriter() throws CorruptIndexException, IOException {
writer.optimize();
}
/**
* 关闭索引
*
* @throws CorruptIndexException
* @throws IOException
*/

private void colseWriter() throws CorruptIndexException, IOException {
writer.close();
}

//4

public boolean createIndex(String dirpath,boolean bool,searchtables[] st){
  boolean flag = true;
  try {
   flag = createWriter(dirpath, bool);
   if(flag){
    flag = createsqlDocument(st);
    optimizeWriter();
    colseWriter();
   }
  } catch (Exception e) {
   e.printStackTrace();
  }
  return flag;
}