lucene的基本使用

一、创建项目(Maven或java项目)

二、加入jar包(maven的pom.xml

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <groupId>name.yaohuan</groupId>
    <artifactId>lucene</artifactId>
    <version>0.0.1-SNAPSHOT</version>
    <dependencies>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
            <version>4.10.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-analyzers-common</artifactId>
            <version>4.10.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-queryparser</artifactId>
            <version>4.10.3</version>
        </dependency>
        <!-- MySql -->
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>5.1.32</version>
        </dependency>
    </dependencies>
</project>


如果是非maven项目,则加入以下jar包


三、执行sql脚本,去创建一个数据库,以便于下面的操作,比如:下面这个图


四、pojo

package com.tf.pojo;
/** 
* @author tf
* @time 2017年11月13日
* @version 1.0
* 备注:
* 注意:
*/
public class Book {
	private Integer id;
	private String name;
	private Float price;
	private String pic;
	private String description;
	public Integer getId() {
		return id;
	}
	public void setId(Integer id) {
		this.id = id;
	}
	public String getName() {
		return name;
	}
	public void setName(String name) {
		this.name = name;
	}
	public Float getPrice() {
		return price;
	}
	public void setPrice(Float price) {
		this.price = price;
	}
	public String getPic() {
		return pic;
	}
	public void setPic(String pic) {
		this.pic = pic;
	}
	public String getDescription() {
		return description;
	}
	public void setDescription(String description) {
		this.description = description;
	}
	public Book() {
		super();
	}
	@Override
	public String toString() {
		return "Book [id=" + id + ", name=" + name + ", price=" + price + ", pic=" + pic + ", description="
				+ description + "]";
	}
	
}



五、dao
package com.tf.dao;

import java.util.List;

import com.tf.pojo.Book;

/** 
* @author tf
* @time 2017年11月13日
* @version 1.0
* 备注:
* 注意:
*/
public interface BookDao {
	public List<Book> queryBooks();
}



六、dao.impl
package com.tf.dao.impl;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.util.ArrayList;
import java.util.List;

import com.tf.dao.BookDao;
import com.tf.pojo.Book;

/** 
* @author tf
* @time 2017年11月13日
* @version 1.0
* 备注:
* 注意:
*/
public class BookDaoImpl implements BookDao{

	@Override
	public List<Book> queryBooks() {
		// TODO Auto-generated method stub
		Connection connection = null;
		PreparedStatement prepareStatement = null;
		ResultSet resultSet = null;
		List<Book> bookList = new ArrayList<Book>();
		try {
//		加载驱动
			Class.forName("com.mysql.jdbc.Driver");
//		连接数据库
			connection = DriverManager.getConnection("jdbc:mysql://localhost:3306/lucene","root","123");
//		创建preparedStatement
			String sql = "select * from book";
			prepareStatement = connection.prepareStatement(sql);
//		执行查询
			resultSet = prepareStatement.executeQuery();
//		解析结果
			while (resultSet.next()) {
				Book book = new Book();
				book.setId(resultSet.getInt("id"));
				book.setName(resultSet.getString("name"));
				book.setPic(resultSet.getString("pic"));
				book.setPrice(resultSet.getFloat("price"));
				book.setDescription(resultSet.getString("description"));
				bookList.add(book);
			}
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} finally {
			try {
				if (connection != null) {
					connection.close();
				}
				if (resultSet != null) {
					resultSet.close();
				}
				if (prepareStatement != null) {
					prepareStatement.close();
				}
			} catch (Exception e2) {
				// TODO: handle exception
				e2.printStackTrace();
			}
		}
		return bookList;
	}

}




七、建立索引
创建测试类CreateIndex
1、查询数据源,创建document
2、创建分词器
分词:将field域中的内容一个个的分词
过滤:将分好的词进行过滤,比如去掉标点符号、大写转小写、词的型还原(复数转单数、过去式转成现在式)、停用词过滤
停用词:单独应用没有特殊意义的词。比如的、啊、等,英文中的this is a the等等
package com.tf.test;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.FloatField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;

import com.tf.dao.BookDao;
import com.tf.dao.impl.BookDaoImpl;
import com.tf.pojo.Book;

/** 
* @author tf
* @time 2017年11月13日
* @version 1.0
* 备注:
* 注意:
*/
public class CreateIndex {
	/**
	 * 创建索引
	 * @throws IOException
	 */
	@Test
	public void createIndex() throws IOException{
//		采集数据
		BookDao bookDao = new BookDaoImpl();
		List<Book> bookList = bookDao.queryBooks();
		for (Book book : bookList) {
			System.out.println(book.getPic());
		}
//		创建索引库
		List<Document> doclist = new ArrayList<Document>();
		
		for (Book book : bookList) {
			Document doc = new Document();
//			商品id:存储 、不分词、索引
			Field id = new StringField("id",book.getId().toString(),Store.YES);
//			商品名称:存储、分词、索引
			Field name = new TextField("name", book.getName(), Store.YES);
//			在增加的时候,给id为4的商品加权,使其排名靠前
//			在查询的时候,直接在query后面加.setBoost(100f);
			if (book.getId() == 4) {
				name.setBoost(100f);
			}
//			商品价格:存储、不分词、索引
			Field price = new FloatField("price", book.getPrice(), Store.YES);
//			商品图片:存储、不分词、不索引
			Field pic = new StoredField("pic", book.getPic());
//			商品描述:不存储、分词、索引
			Field description = new TextField("description", book.getDescription(),Store.NO);
			 
			doc.add(id);
			doc.add(name);
			doc.add(price);
			doc.add(pic);
			doc.add(description);
			
			doclist.add(doc);
		}
//		创建分词器
//		Analyzer analyzer = new StandardAnalyzer();
//		中文分词器
		Analyzer analyzer = new IKAnalyzer();
//		创建索引库
		IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);
		File indexFile = new File("d:\\bookindex1711\\");
		FSDirectory directory = FSDirectory.open(indexFile);
		IndexWriter writer = new IndexWriter(directory, config);
//		写入索引库
		for (Document document : doclist) {
			writer.addDocument(document);
		}
		writer.close();
	}
	/**
	 * 查询索引
	 * @throws ParseException 
	 * @throws IOException 
	 */
	@Test
	public void searchIndex() throws ParseException, IOException{
//		参数1:指定查询的Field
//		参数2:指定分词器
		QueryParser parser = new QueryParser("description", new StandardAnalyzer());
		Query query = parser.parse("description:spring AND mybatis");
//		指定索引库的位置
		File indexFile = new File("d:\\bookindex1711\\");
		FSDirectory directory = FSDirectory.open(indexFile);
		
		DirectoryReader reader = DirectoryReader.open(directory);
		IndexSearcher searcher = new IndexSearcher(reader);
		
		TopDocs topDocs = searcher.search(query, 10);
		int count = topDocs.totalHits;
		System.out.println("查询到:"+count+"条记录");
		ScoreDoc[] scoreDocs = topDocs.scoreDocs;
		
		List<Book> list = new ArrayList<>();
		for (ScoreDoc scoreDoc : scoreDocs) {
			Book book = new Book();
//			索引库中的id
			int docId = scoreDoc.doc;
			Document doc = searcher.doc(docId);
			book.setId(Integer.valueOf(doc.get("id")));
			book.setName(doc.get("name"));
			book.setPrice(Float.valueOf(doc.get("price")));
			book.setPic(doc.get("pic"));
			book.setDescription(doc.get("description"));
			list.add(book);
		}
		for(Book book: list){
			System.out.println(book);
		}
	}
	/**
	 * 删除索引
	 * @throws IOException 
	 */
	@Test
	public void deleteIndex() throws IOException{
//		创建分词器
		Analyzer analyzer = new StandardAnalyzer();
//		创建索引库
		IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);
		File indexFile = new File("d:\\bookindex1711\\");
		FSDirectory directory = FSDirectory.open(indexFile);
		IndexWriter writer = new IndexWriter(directory, config);
		Term term = new Term("name","solr");
		writer.deleteDocuments(term);
		writer.close();
	}
	/**
	 * 删除全部
	 * @throws IOException 
	 */
	@Test
	public void deleteAll() throws IOException{
//		创建分词器
		Analyzer analyzer = new StandardAnalyzer();
//		创建索引库
		IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);
		File file = new File("d:\\bookindex1711\\");
		FSDirectory directory = FSDirectory.open(file);
		IndexWriter writer = new IndexWriter(directory,config);
		writer.deleteAll();
		writer.close();
	}
	/**
	 * 更新索引
	 * @throws IOException 
	 */
	@Test
	public  void  updateIndex() throws IOException {
//		创建分词器
		Analyzer analyzer = new StandardAnalyzer();

//		创建索引库
		IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);
		File indecFile = new File("d:\\bookindex1711\\");
		FSDirectory directory = FSDirectory.open(indecFile);
		IndexWriter writer = new IndexWriter(directory, config);
		
//		要更新的数据
		Document doc = new Document();
		Field name = new TextField("name", "JaveEE 实战",Store.YES); 
		doc.add(name);
		
//		原数据
		Term term = new Term("name", "solr");
		writer.updateDocument(term, doc);
		
		writer.close();
	}

}





八、数据查询
package com.tf.test;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;

import com.tf.pojo.Book;

/** 
* @author tf
* @time 2017年11月14日
* @version 1.0
* 备注:
* 注意:
*/
public class IndexSearch {
	
	private void doQuery(Query query) throws IOException {
		File indexFile = new File("d:\\bookindex1711\\");
		FSDirectory directory = FSDirectory.open(indexFile);
		
		IndexReader reader = DirectoryReader.open(directory);
		IndexSearcher searcher = new IndexSearcher(reader);
		
		TopDocs topDocs = searcher.search(query, 10);
		int count = topDocs.totalHits;
		System.out.println("查询到:"+count+"条记录");
		ScoreDoc[] scoreDocs = topDocs.scoreDocs;
		
		List<Book> list = new ArrayList<>();
		for (ScoreDoc scoreDoc : scoreDocs) {
			Book book = new Book();
//			索引库中的id
			int docId = scoreDoc.doc;
			Document doc = searcher.doc(docId);
			book.setId(Integer.valueOf(doc.get("id")));
			book.setName(doc.get("name"));
			book.setPrice(Float.valueOf(doc.get("price")));
			book.setPic(doc.get("pic"));
			book.setDescription(doc.get("description"));
			list.add(book);
		}
		for(Book book: list){
			System.out.println(book);
		}
	}
	
	/**
	 * TermQuery查询
	 * @throws IOException
	 */
	@Test
	public void termQuerySearch() throws IOException {
		Query query = new TermQuery(new Term("name","spring"));
		doQuery(query);
	}
	
	/**
	 * NumericRangeQuery查询
	 * @throws IOException
	 */
	@Test
	public void numericRangeQuerySearch() throws IOException{
		Query query = NumericRangeQuery.newFloatRange("price",50F,100F, false, true);
		doQuery(query);
	}
	/**
	 * 组合查询
	 * @throws IOException 
	 */
	@Test
	public void booleanQuerySearch() throws IOException {
		BooleanQuery query = new BooleanQuery();
		Query query1 = new TermQuery(new Term("name","spring"));
		Query query2 = NumericRangeQuery.newFloatRange("price",50F,100F, false, false);
		/*
		 * MUST:与
		 * MUST_NOT:必须不包含
		 * SHOULD:或
		 */
		query.add(query1,Occur.MUST);
		query.add(query2,Occur.MUST);
		doQuery(query);
	}
	/**
	 * QueryParser查询
	 * @throws Exception
	 */
	@Test
	public void queryParseSearch() throws Exception{
		QueryParser parser = new QueryParser("description",new StandardAnalyzer());
		Query query = parser.parse("description:spring AND mybatis");
		doQuery(query);
	}
	/**
	 * MultiFieldQueryParser
	 * @throws ParseException
	 * @throws IOException
	 */
	@Test
	public void multiFileQuerySearch() throws ParseException, IOException {
		String[] fields = {"name","description"};
		MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, new StandardAnalyzer());
//		+:与   空格:或  -:非
		Query query = parser.parse("+name:spring description:spring");
		doQuery(query);
	}
	/**
	 * 相关度排序
	 * @throws IOException 
	 */
	@Test
	public void Search() throws IOException {
		Query query = new TermQuery(new Term("name","solr"));
		doQuery(query);
	}
}


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值