本文介绍了lucene的CRUD以及分页,并且着重介绍索引建立已经查询的两个类,本文对重点代码后有注释,本文对于有lucene基础的会有所帮助,如果初次没接触过lucene估计看的会吃力。
1:去官网下载相应的core包以及各种查询和分词器的包
2:以百度搜索为例:一条搜索结果有标题,内容,链接,作者,我们建立一个实体类
public class Article {
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getAuthor() {
return author;
}
public void setAuthor(String author) {
this.author = author;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public String getLink() {
return link;
}
public void setLink(String link) {
this.link = link;
}
private int id;
private String title;
private String author;
private String content;
private String link;
}
3:工具类
public class LuceneUtils {
private static Directory directory=null;
private static IndexWriterConfig config=null;
private static Version matchVersion=null;
private static Analyzer analyzer=null;
static{
try {
directory=FSDirectory.open(new File(“D://index/news”));//索引存放硬盘的位置
matchVersion=Version.LUCENE_44;//lucene的版本
analyzer=new StandardAnalyzer(matchVersion);//标准分词器,也是单字分词器
config=new IndexWriterConfig(matchVersion, analyzer);//索引建立的配置
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
/**
*
*
* @return返回用于操作索引的对象...
* @throws IOException
*/
public static IndexWriter getIndexWriter() throws IOException{
IndexWriter indexWriter=new IndexWriter(directory,config);
return indexWriter;
}
/**
*
* 返回用于读取索引的对象..
* @return
* @throws IOException
*/
public static IndexSearcher getIndexSearcher() throws IOException{
IndexReader indexReader=DirectoryReader.open(directory);
IndexSearcher indexSearcher=new IndexSearcher(indexReader);
return indexSearcher;
}
/**
* 返回lucene 当前使用的版本信息...
*
* @return
*/
public static Version getMatchVersion() {
return matchVersion;
}
/**
*
*
* 返回lucene 使用的分词器...
* @return
*/
public static Analyzer getAnalyzer() {
return analyzer;
}
}
/**
* article 的转换类...
*
* @author Administrator
*
*/
public class ArticleUtils {
/**
* 将article 转换成document
* 无非article 的值设置document里面去...
*
*
* @param article
* @return
*/
public static Document articleToDocument(Article article){
Document document=new Document();
IntField idfield=new IntField("id",article.getId(),Store.YES);
StringField authorfield=new StringField("author", article.getAuthor(), Store.YES);
StringField urlfield=new StringField("link", article.getLink(), Store.YES);
TextField title=new TextField("title", article.getTitle(),Store.YES);
TextField contentfield=new TextField("content", article.getContent(),Store.YES);
document.add(idfield);
document.add(authorfield);
document.add(urlfield);
document.add(title);
document.add(contentfield);
return document;
}
}
3:DAO层,增删改查以及分页
public class LuceneDao {
/**
* 增删改索引都是通过indexWriter 对象来完成...
* @throws IOException
*
*
*/
public void addIndex(Article article) throws IOException{
IndexWriter indexWriter=LuceneUtils.getIndexWriter();
Document doc=ArticleUtils.articleToDocument(article);
indexWriter.addDocument(doc);
indexWriter.close();
}
/**
*
* 删除索引,根据字段对应的值进行删除...
*
* @param fieldName
* @param fieldValue
* @throws IOException
*/
public void delIndex(String fieldName,String fieldValue) throws IOException{
IndexWriter indexWriter=LuceneUtils.getIndexWriter();
Term term=new Term(fieldName, fieldValue);
//delete from table where condition
indexWriter.deleteDocuments(term);
indexWriter.close();
}
/**
*
* 先删除符合条件的记录,再创建一个符合条件的记录....
* @param fieldName
* @param fieldValue
* @param article
* @throws IOException
*/
public void updateIndex(String fieldName,String fieldValue,Article article) throws IOException{
IndexWriter indexWriter=LuceneUtils.getIndexWriter();
Term term=new Term(fieldName, fieldValue);
Document doc=ArticleUtils.articleToDocument(article);
/**
*
* 1:设置更新的条件
*
*
* 2:设置更新的内容的对象...
*
*
*/
indexWriter.updateDocument(term, doc);
indexWriter.close();
}
/**
*
* 显示第一页 的数据 0,10
*
* 显示第二页 的数据 10,10
*
* 显示第三页 的数据 20,10
* @param keywords
* @return
* @throws Exception
*/
public List<Article> findIndex(String keywords,int start,int rows) throws Exception{
IndexSearcher indexSearcher=LuceneUtils.getIndexSearcher();
//需要根据那几个字段进行检索...
String fields []={"title","content"};
//title:yi title:抑,title:症
//第一种类型的条件..
// Query query=new TermQuery(new Term("author","毕加索"));
//第二种类型的条件
QueryParser queryParser=new MultiFieldQueryParser(LuceneUtils.getMatchVersion(),fields,LuceneUtils.getAnalyzer());
//不同的规则构造不同的子类..
//title:keywords ,content:keywords
Query query=queryParser.parse(keywords);
//这里检索的是索引目录
//会把整个索引目录都读取一边...
//检索符合query 前面N条记录...
TopDocs topDocs=indexSearcher.search(query, start+rows);
System.out.println("总记录数==total=="+topDocs.totalHits);
ScoreDoc scoreDocs []=topDocs.scoreDocs;
Article article=null;
List<Article> articlelist=new ArrayList<Article>();
//scoreDocs.length vs(比较) start+rows 取小值
//在java jdk 里面提供了一个类,可以用来比较两个数字类型的值,取小值..
int endResult=Math.min(scoreDocs.length, start+rows);
for(int i=start;i<endResult;i++){
//docID lucene 的索引库里面有很多的document,lucene 为每个document 定义一个编号,唯一标识.. 自增长
int docID=scoreDocs[i].doc;
System.out.println("编号的标识==="+docID);
article=new Article();
Document document=indexSearcher.doc(docID);
article.setId(Integer.parseInt(document.get("id")));
article.setTitle(document.get("title"));
article.setContent(document.get("content"));
article.setLink(document.get("link"));
article.setAuthor(document.get("author"));
articlelist.add(article);
}
return articlelist;
}
}
4:测试类
public class JunitTest {
private LuceneDao luceneDao=new LuceneDao();
@Test
public void addIndex() throws IOException{
for(int i=0;i<30;i++){
Article article=new Article();
article.setId(i);
article.setTitle("lucene 的一个全文检索服务器.");
article.setContent("lucene 的一个全文检索服务器");
article.setAuthor("java");
article.setLink("www.baidu.com");
luceneDao.addIndex(article);
}
}
@Test
public void testDel() throws IOException{
luceneDao.delIndex("title", "lucene");
}
//更新时通过查询条件删除所有命中的doc,建立一个新的
@Test
public void testUpdate() throws IOException{
String fieldName="title";
String fieldValue="lucene";
Article article=new Article();
article.setId(9527);
article.setAuthor("地方的");
article.setTitle("对付对付对付");
article.setContent("对付对付对付");
article.setLink("http://www.baidu.com");
luceneDao.updateIndex(fieldName, fieldValue, article);
}
@Test
public void testsearcher() throws Exception{
String keywords="lucene";
//title content textfield 现在使用的分词器是单字分词..
List<Article> listArticles=luceneDao.findIndex(keywords,0,10);
for(Article article:listArticles){
System.out.println(article.getId());
System.out.println(article.getAuthor());
System.out.println(article.getLink());
System.out.println(article.getContent());
System.out.println(article.getTitle());
}
}
}
至此一个简单的lucene CRUD以及分页就介绍完毕