第一个lucene例子

最新推荐文章于 2024-09-03 15:27:57 发布

sdwenfj

最新推荐文章于 2024-09-03 15:27:57 发布

阅读量416

点赞数 1

文章标签： lucene exception string url query 数据库

本文链接：https://blog.csdn.net/sdwenfj/article/details/4843521

版权

目前需要解决的问题是：解决英文单字模糊查询

package com.test.search;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.analysis.cn.ChineseAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.Token;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MultiSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
import org.compass.core.util.reader.StringReader;

public class LuceneTest {
private static String indexFilePath = "e://lucene//";

//private static String indexFilePathClass = "e://lucene//class//";
//private static String indexFilePathSchool = "e://lucene//school";

public LuceneTest(String filepath) {
this.indexFilePath = filepath;
}

public static void main(String[] args) {
  try {
   LuceneTest luceneTest = new LuceneTest(indexFilePath);
   //luceneTest.initIndex();// 初始化索引
   //luceneTest.search("student", "w"); //需要解决分词问题，目前只支持整词检索// 调用search方法进行检索，填入想检索的文字多个词可用空格分开,
   luceneTest.search("class","一");// 调用search方法进行检索，填入想检索的文字多个词可用空格分开
   //luceneTest.search("school","东");// 调用search方法进行检索，填入想检索的文字多个词可用空格分开

// 删除一条记录
// indexWriter.deleteDocuments(new Term("title","1"));

// 增加一条记录
// indexWriter.addDocument(document);

// 更新一条记录
// indexWriter.updateDocument(new Term("title","1"), document);

   System.out.println("检索完毕！");
  } catch (Exception e) {
   e.printStackTrace();
  }
}

/**
* 执行查询
* @param searchType--查询类别（学生，班级，学校）
* @param serchString--输入的查询关键字
* @throws Exception
*/
public void search(String searchType, String serchWord) throws Exception {
Directory directory = new SimpleFSDirectory(new File(indexFilePath
+ File.separator + searchType));

  /* 创建一个搜索，搜索刚才创建的e://lucene//目录下的索引 */
  IndexSearcher indexSearcher = new IndexSearcher(directory, true);
  /* 在这里我们只需要搜索一个目录 */
  IndexSearcher indexSearchers[] = { indexSearcher };
  /* Multisearcher表示多目录搜索，在这里我们只有一个目录 */
  Searcher searcher = new MultiSearcher(indexSearchers);

/* 我们需要搜索两个域"ArticleTitle", "ArticleText"里面的内容 */
String[] fields = { "title", "content", "url" };

  /* 这里只搜索title域,只有title域中有符合条件的数据才做为搜索结果集中的一部分,MUST表示and，MUST_NOT表示not, SHOULD表示or*/
  BooleanClause.Occur[] clauses = { BooleanClause.Occur.MUST,
    BooleanClause.Occur.MUST_NOT, BooleanClause.Occur.MUST_NOT };

  /*
   * MultiFieldQueryParser表示多个域解析，
   * 同时可以解析含空格的字符串，如果我们搜索"老板"，根据前面的索引，显然搜到的是第二份文件
   */
  /*Query query = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT,
    serchWord, fields, clauses, new StandardAnalyzer(Version.LUCENE_CURRENT));
  */
  /*Query query = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT,
    serchWord, fields, clauses, new CJKAnalyzer(Version.LUCENE_CURRENT));
  */
  Query query=new WildcardQuery(new Term("title","*"+serchWord+"*"));

  /* 把搜索出来的所有文件打印出来 */
  int thispage = 300;
  int startindex = 0; // 前台可利用此参数来控制分页查询
  int maxpage = 300; // 每页最多显示50条结果记录
  thispage = maxpage;
  /* 开始搜索 */
  TopDocs h = searcher.search(query, maxpage);

  if ((startindex + maxpage) > h.totalHits) {
   thispage = h.totalHits - startindex; // set the max index to
   // maxpage or last
  }

  for (int i = startindex; i < (thispage + startindex); i++) {
   Document doc = searcher.doc(h.scoreDocs[i].doc); // get the next
   // document
   String doctitle = doc.get("title"); // get its title
   String content = doc.get("content"); // get its path field
   String url = doc.get("url"); // get its url field
   System.out.println(doctitle);
   System.out.println(content);
   System.out.println(url);
  }

/* 关闭 */
searcher.close();
}

public void closeIndex(IndexWriter indexWriter) {
  try {
   indexWriter.close();
  } catch (Exception e) {
   e.printStackTrace();
  }
}

/**
* 初始化索引，在系统第一次启动时用，以后只对其做更新、删除等操作，
* 系统再重启后也不需要，因为索引已经以文件的形式存储在的索引文件中，永久有效
*
* @return
*/
public String initIndex() {
  /添加学生
  String title = "wenfujun";
  String content = "文付军和文亚龙: congratulations on your success!";
  String url = "http://wenfujun_url";
  addIndex(title, content, url, "student");

  title = "wenbo";
  content = "文博和文昊: congratulations on your success!";
  url = "http://wenbo_url";
  addIndex(title, content, url, "student");

  /添加班级
  title = "05界一班";
  content = "一年级一班!";
  url = "http://wenfujun_url";
  addIndex(title, content, url, "class");

  title = "03界二班";
  content = "一年级二班";
  url = "http://wenbo_url";
  addIndex(title, content, url, "class");

  //添加学校
  title = "山东省东明县第二高级中学";
  content = "东明二中是个很不错的中学，是我的母校";
  url = "http://wenfujun_url";
  addIndex(title, content, url, "school");

  title = "山东省菏泽市第一中高级中学";
  content = "山东菏泽一中的升学率非常高，教学质量那个高啊!!!";
  url = "http://wenbo_url";
  addIndex(title, content, url, "school");
  return null;
}

/**
* 删除索引
*
* @return
*/
public String deleteIndex() {
  try {
   this.getWriter(indexFilePath).deleteDocuments(
     new Term("title", "1"));
  } catch (Exception e) {
   e.printStackTrace();
  }
  return null;
}

/**
* 更新（修改）索引
*
* @return
*/
public String updateIndex(String title, String content, String url) {
  Document document = new Document();
  // Field对象，相当于数据库中字段
  Field FiledTitle = new Field("title", title, Field.Store.YES,
    Field.Index.ANALYZED);// Field.Index.ANALYZED 这就能进行索引了,
  // 如果设置为NO的话就不能检索
  Field FiledContent = new Field("content", content, Field.Store.YES,
    Field.Index.ANALYZED);
  Field FieldBody = new Field("url", url, Field.Store.YES, Field.Index.NO);
  document.add(FieldBody);
  document.add(FiledContent);
  document.add(FiledTitle);
  try {
   this.getWriter(indexFilePath).updateDocument(
     new Term("title", "1"), document);
  } catch (Exception e) {
   e.printStackTrace();
  }
  return null;
}

/**
* 添加索引
* @param title
* @param content
* @param url
* @param indexType--索引类别（student,class,school）
* @return
*/
public boolean addIndex(String title, String content, String url,
   String indexType) {
  try {
   // 增加document到索引去
   // document对象，相当于数据库中一条记录
   Document document = new Document();
   // Field对象，相当于数据库中字段
   Field FiledTitle = new Field("title", title, Field.Store.YES,
     Field.Index.ANALYZED);// Field.Index.ANALYZED 这就能进行索引了,
   // 如果设置为NO的话就不能检索
   Field FiledContent = new Field("content", content, Field.Store.YES,
     Field.Index.ANALYZED);
   Field FieldBody = new Field("url", url, Field.Store.YES,
     Field.Index.NO);
   document.add(FieldBody);
   document.add(FiledContent);
   document.add(FiledTitle);
   IndexWriter indexWriter = this.getWriter(indexFilePath
     + File.separator + indexType);
   indexWriter.addDocument(document);

   indexWriter.optimize();
   indexWriter.close();
  } catch (Exception e) {
   e.printStackTrace();
   return false;
  }
  return true;
}

/**
* 判断索引是否存在
* @param indexDir
* @return
*/
public static boolean indexExist(String indexDir) {
  boolean isExt = true;
  try {
   isExt = IndexReader.indexExists(new SimpleFSDirectory(new File(
     indexDir)));
  } catch (Exception e) {
   e.printStackTrace();
  }
  return isExt;
}

/**
* 获得IndexWriter
* @param indexFilePath--索引目录，在调用此方法时已经指定类型是student、class或school
* @return
* @throws CorruptIndexException
* @throws LockObtainFailedException
* @throws IOException
*/
private IndexWriter getWriter(String indexFilePath)
   throws CorruptIndexException, LockObtainFailedException,
   IOException {
  Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
  //Analyzer analyzer = new ChineseAnalyzer();
  boolean append = true;
  File file = new File(indexFilePath + File.separator + "segments.gen");
  if (file.exists()) {
   append = false;
  }
  return new IndexWriter(new SimpleFSDirectory(new File(indexFilePath)),
    analyzer, append, new IndexWriter.MaxFieldLength(999999999));
}

/**
* 此方法暂时不用了，改写为用addIndex()方法
*
* @throws Exception
*/
public void index() throws Exception {
Directory directory = new SimpleFSDirectory(new File("e://lucene//"));
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);

  // 初始化索引，清空或创建e://lucene//下的索引
  IndexWriter writer1 = new IndexWriter(directory, analyzer, true,
    new IndexWriter.MaxFieldLength(25000));
  writer1.close();// 关闭

  /*
   * 往创建的初始化索引中添加索引内容，StandardAnalyzer表示用lucene自带的标准分词机制，
   * false表示不覆盖原来该目录的索引，细心的读者可能已经发现，这句话和上面的那句就这个false不一样
   */
  // IndexWriter writer2 = new IndexWriter("e://lucene//",new
  // StandardAnalyzer(), false);
  IndexWriter writer2 = new IndexWriter(directory, analyzer, false,
    new IndexWriter.MaxFieldLength(25000));
  Document doc = new Document();// 创建文件

  // 创建一个域ArticleTitle，并往这个域里面添加内容
  // "Field.Store.YES"表示域里面的内容将被存储到索引,"Field.Index.ANALYZED"表示域里面的内容将被索引，以便用来搜索
  Field articleTitle = new Field("title", "title:文付军和文亚龙",
    Field.Store.YES, Field.Index.ANALYZED);
  /* 往文件里添加这个域 */
  doc.add(articleTitle);

  /* 同理：创建另外一个域ArticleText，并往这个域里面添加内容 */
  Field srticleText = new Field("content",
    "content:文付军和文亚龙: congratulations on your success!",
    Field.Store.YES, Field.Index.ANALYZED);
  doc.add(srticleText);
  // 在这里还可以添加其他域

/* 添加这份文件到索引 */
writer2.addDocument(doc);

  /* 同理：创建第二份文件 */
  doc = new Document();// 创建文件
  articleTitle = new Field("title", "title:文博和文昊", Field.Store.YES,
    Field.Index.ANALYZED);
  doc.add(articleTitle);
  srticleText = new Field("content",
    "content:文博和文昊: congratulations on your success!",
    Field.Store.YES, Field.Index.ANALYZED);
  doc.add(srticleText);
  writer2.addDocument(doc);
  // 在这里可以添加其他文件

/* 关闭 */
writer2.close();
}

/**
* 暂时不用
* @param type
* @param keyword
*/
public void executeSearch(String type, String keyword) {
  Hits result = null;
  if (type != null && !"".equals(type) && keyword != null
    && !keyword.equals("")) {
   try {
    //根据关键字构造一个数组
    String key[] = { keyword, type };

//同时声明一个与之对应的字段数组
String fields[] = { "title", "type" };

    //声明BooleanClause.Occur[]数组，它表示多个条件之间的关系
    BooleanClause.Occur flags[] = { BooleanClause.Occur.MUST,
      BooleanClause.Occur.MUST };
    //ChineseAnalyzer analyzer = new ChineseAnalyzer();
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);

    //用MultiFieldQueryParser得到query对象
    Query query = MultiFieldQueryParser.parse(key, fields, flags,
      analyzer);

//e://lucene表示索引文件所在目录
IndexSearcher searcher = new IndexSearcher(indexFilePath);

//查询结果
//result = searcher.search(query);

    ///
    /* 把搜索出来的所有文件打印出来 */
    int thispage = 30;
    int startindex = 0; // 前台可利用此参数来控制分页查询
    int maxpage = 30; // 每页最多显示50条结果记录
    thispage = maxpage;
    TopDocs h = searcher.search(query, maxpage);

    if ((startindex + maxpage) > h.totalHits) {
     thispage = h.totalHits - startindex; // set the max index to
     // maxpage or last
    }

    for (int i = startindex; i < (thispage + startindex); i++) {
     Document doc = searcher.doc(h.scoreDocs[i].doc); // get the next
     // document
     String doctitle = doc.get("title"); // get its title
     String content = doc.get("type"); // get its path field
     System.out.println(doctitle);
     System.out.println(content);
    }

   } catch (Exception e) {
    e.printStackTrace();
   }
  }
  //return result;
}

/*
* public ArrayList<BaseItem> getDate(String sql) throws SQLException {
* ArrayList<BaseItem> item = new ArrayList<BaseItem>(); ConnBase dataConn =
* new ConnBase();//数据库连接 conn = dataConn.DBconn(); ps =
* conn.prepareStatement(sql); rs = ps.executeQuery(); //
* jdbcTemplate.execute(sql); while (rs.next()) { BaseItem i = new
* BaseItem(); i.setTitle(rs.getString("title")); // 对应你的Blog表里的title
* i.setContent(rs.getString("content")); // 取表里的博客内容
* i.setUr("SingleArticle_lucene.action?id=" + rs.getInt("blogId")); // 如 a.
* action ?id=8 item.add(i); } // 把数据库里的数据取出来 return item; }
*/

}

sdwenfj

关注

1
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
第一个lucene例子

目前需要解决的问题是：解决英文单字模糊查询 package com.test.search;import java.io.File;import java.io.IOException;import java.util.ArrayList;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.
复制链接

扫一扫