我只提供入门级的教材, 后续的优化,连数据库,还需要你们自己去扩展。 授人以鱼不如授人以渔。
准备:
IKAnalyzer3.2.8.jar
lucene-analyzers-3.5.0.jar
lucene-core-3.5.0.jar
lucene-highlighter-3.5.0.jar
lucene-memory-3.5.0.jar
如果你使用maven的话:
<!--这里我使用的3.5的包,maven仓库最低为4.0.0的。所以我直接在lib下加的包,再就是IKAnalyzer包在仓库里没有,需要手动加到仓库里,我不喜欢麻烦 -->
<!-- 注:IKAnalyzer包在仓库里没有,需要手动加到仓库里,手动打包:http://blog.csdn.net/u013829202/article/details/71632636 -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>4.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
<version>4.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-memory</artifactId>
<version>4.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>4.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>4.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-sandbox</artifactId>
<version>4.0.0</version>
</dependency>
代码:
package com.xl.ssm.lucene.lucene;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
/**
* 简单入门 lucene
* @author 黑夜
* jar: IKAnalyzer3.2.8.jar lucene-analyzers-3.5.0.jar lucene-core-3.5.0.jar
* lucene-highlighter-3.5.0.jar lucene-memory-3.5.0.jar
*/
public class MyLucene2 {
private File indexFile=new File("E:\\lucene\\luceneIndex"); //索引存储地址
//private Analyzer analyzer = new IKAnalyzer(true);
//一元分词,全部拆分为单个字搜索
private Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35);
//这是创建的索引里面的的一条记录,这个是记录某个属性,【只记录你需要查询的即可】
private String[] queryString={"username","content"};
/**
* 创建索引
* @throws IOException
*/
public void createIndex() throws IOException {
//创建索引目录
Directory directory = FSDirectory.open(indexFile);
//建立索引创建类
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_35, analyzer);
//总是重新创建索引【测试用:你可以做一个更新索引,需要的时候进行更新就行】
indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE); //APPEND 追加 CREATE_OR_APPEND如果不存在则创建否则追加
IndexWriter writer = new IndexWriter(directory, indexWriterConfig);
//加载数据
MyLucene2 my2 = new MyLucene2();
List<Student> list = my2.getStore();
//建立索引
long time1 = System.currentTimeMillis();
for (Student student : list) {
Document document = new Document();
document.add(new Field("id", String.valueOf(student.getId()), Field.Store.YES, Field.Index.ANALYZED));
document.add(new Field("username", student.getName(), Field.Store.YES, Field.Index.ANALYZED));
document.add(new Field("content", student.getContent(), Field.Store.YES, Field.Index.ANALYZED));
writer.addDocument(document);
}
long time2 = System.currentTimeMillis();
System.out.println("创建了" + writer.numDocs() + "条索引");
System.out.println("一共花了" + (time2 - time1) + "毫秒");
System.out.println("==============================");
writer.close();
}
/**
* 搜索
* @throws IOException
* @throws ParseException
*/
public void search() {
IndexReader indexReader = null;
try {
indexReader = IndexReader.open(FSDirectory.open(indexFile));
//创建搜索类 使用不同的分词法,结果会不一样
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_35, queryString, analyzer);
//要搜索的文件名
Query query = queryParser.parse("吗");
TopDocs topDocs = indexSearcher.search(query, 10000);
System.out.println("一共查到:" + topDocs.totalHits + "记录");
ScoreDoc[] scoreDoc = topDocs.scoreDocs;
for (int i = 0; i < scoreDoc.length; i++) {
//内部编号
int doc = scoreDoc[i].doc;
System.out.println("编号:" + doc);
//根据文档id找到文档
Document mydoc = indexSearcher.doc(doc);
System.out.println("结果:" + mydoc.get("content"));
}
} catch (Exception e) {
e.printStackTrace();
}finally{
try {
indexReader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
//test
public static void main(String[] args) throws ParseException {
MyLucene2 lu = new MyLucene2();
try {
lu.createIndex();
lu.search();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 这个可以是从数据库查询出来的数据
* @return
*/
public List<Student> getStore(){
ArrayList<Student> list =new ArrayList<Student>();
Student stu1 = new Student();
stu1.setId(1);
stu1.setName("这是一个例子");;
stu1.setContent("你好");
Student stu2 = new Student();
stu2.setId(2);
stu2.setName("我也是一个例子");;
stu2.setContent("你好吗?");
list.add(stu1);
list.add(stu2);
return list;
}
}
/**
* 实体类
* @author lh
* 参考: 当做你们的数据来源
*/
class Student{
int id;
String name;
String content;
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public Student(){}
public Student(int id ,String name,String content){}
}
接下来,会写关于solr和es的全文检索,当然也是入门级的。