1.Lucene是什么
Lucene是apache下的一个开源的全文检索引擎工具包。
2.什么是全文检索
全文检索就是先分词创建索引,再执行搜索的过程。
3.什么是分词
就是将一段文字分成一个个单词
4.Lucene实现全文检索的流程
全文检索的流程分为两大部分:索引流程、搜索流程。
索引流程:采集数据--->构建文档对象--->创建索引(将文档写入索引库)。
搜索流程:创建查询--->执行搜索--->渲染搜索结果。
5.代码
5.1项目结构
5.2BookDao代码
1 package cn.hc.dao; 2 3 import cn.hc.pojo.Book; 4 import cn.hc.util.GetConn; 5 import org.apache.lucene.document.*; 6 7 import java.sql.Connection; 8 import java.sql.ResultSet; 9 import java.sql.SQLException; 10 import java.sql.Statement; 11 import java.util.ArrayList; 12 import java.util.List; 13 14 public class BookDao { 15 16 public List<Book> queryAll(){ 17 /* 18 1.获得连接 19 2.获得操作对象 20 3.执行sql语句 21 4.关闭 22 * */ 23 Connection conn = GetConn.getConn(); 24 try { 25 Statement statement = conn.createStatement(); 26 ResultSet resultSet = statement.executeQuery("select * from book;"); 27 List<Book> books=new ArrayList<>(); 28 while(resultSet.next()){ 29 Book book = new Book(); 30 book.setId(resultSet.getInt("id")); 31 book.setDescription(resultSet.getString("description")); 32 book.setPic(resultSet.getString("pic")); 33 book.setPrice(resultSet.getFloat("price")); 34 book.setName(resultSet.getString("name")); 35 books.add(book); 36 } 37 return books; 38 } catch (SQLException e) { 39 e.printStackTrace(); 40 return null; 41 } 42 43 } 44 45 public List<Document> getDocuments(List<Book> books){ 46 List<Document> documents = new ArrayList<>(); 47 for (Book book : books) { 48 Document document = new Document(); 49 Field id = new StoredField("id",book.getId().toString()); 50 Field name = new TextField("name", book.getName(), Field.Store.YES); 51 Field price = new FloatField("price",book.getPrice(), Field.Store.YES); 52 Field pic = new StoredField("pic", book.getPic()); 53 Field description = new TextField("description", book.getDescription(), Field.Store.NO); 54 document.add(id); 55 document.add(name); 56 document.add(pic); 57 document.add(price); 58 document.add(description); 59 documents.add(document); 60 } 61 return documents; 62 } 63 64 65 }
5.3 pojo包Book代码
1 package cn.hc.pojo; 2 3 public class Book { 4 private Integer id;//int (11), 5 private String name;//varchar (192), 6 private Float price;//float , 7 private String pic;//varchar (96), 8 private String description;//text 9 10 public Integer getId() { 11 return id; 12 } 13 14 public void setId(Integer id) { 15 this.id = id; 16 } 17 18 public String getName() { 19 return name; 20 } 21 22 public void setName(String name) { 23 this.name = name; 24 } 25 26 public Float getPrice() { 27 return price; 28 } 29 30 public void setPrice(Float price) { 31 this.price = price; 32 } 33 34 public String getPic() { 35 return pic; 36 } 37 38 public void setPic(String pic) { 39 this.pic = pic; 40 } 41 42 public String getDescription() { 43 return description; 44 } 45 46 public void setDescription(String description) { 47 this.description = description; 48 } 49 50 @Override 51 public String toString() { 52 return "Book{" + 53 "id=" + id + 54 ", name='" + name + '\'' + 55 ", price=" + price + 56 ", pic='" + pic + '\'' + 57 ", description='" + description + '\'' + 58 '}'; 59 } 60 }
5.4 util包GetConn
1 package cn.hc.util; 2 3 4 import com.mysql.cj.jdbc.Driver; 5 6 import java.sql.Connection; 7 import java.sql.DriverManager; 8 9 public class GetConn { 10 11 public static Connection getConn() { 12 // 连接4要素 13 //1.驱动 14 String driver = "com.mysql.cj.jdbc.Driver"; 15 //2.url 16 String url = "jdbc:mysql://localhost:3307/lucene?useUnicode=true&characterEncoding=utf-8&serverTimezone=GMT"; 17 // 3.username 18 String username = "root"; 19 //4.pwd 20 String password = "root"; 21 22 try { 23 Class.forName(driver); 24 /* Driver driver =new Driver(); 25 DriverManager.registerDriver(driver);*/ 26 Connection connection = DriverManager.getConnection(url, username, password); 27 return connection; 28 } catch (Exception e) { 29 e.printStackTrace(); 30 } finally { 31 } 32 33 return null; 34 } 35 36 37 }
5.5 util包DoSearch方法
1 package cn.hc.util; 2 3 import org.apache.lucene.analysis.standard.StandardAnalyzer; 4 import org.apache.lucene.document.Document; 5 import org.apache.lucene.index.DirectoryReader; 6 import org.apache.lucene.index.IndexReader; 7 import org.apache.lucene.queryparser.classic.ParseException; 8 import org.apache.lucene.queryparser.classic.QueryParser; 9 import org.apache.lucene.search.IndexSearcher; 10 import org.apache.lucene.search.Query; 11 import org.apache.lucene.search.ScoreDoc; 12 import org.apache.lucene.search.TopDocs; 13 import org.apache.lucene.store.FSDirectory; 14 15 import java.io.File; 16 17 public class DoSearch { 18 19 public static void doSearch(Query query) throws Exception { 20 FSDirectory directory = FSDirectory.open(new File("D:\\lucene")); 21 IndexReader reader = DirectoryReader.open(directory); 22 IndexSearcher searcher = new IndexSearcher(reader); 23 TopDocs topDocs = searcher.search(query, 10); 24 ScoreDoc[] scoreDocs = topDocs.scoreDocs; 25 for (ScoreDoc scoreDoc : scoreDocs) { 26 //document 的id 27 int documentId = scoreDoc.doc; 28 Document document = searcher.doc(documentId); 29 System.out.println("编号:"+document.get("id")+",书名:"+document.get("name")); 30 31 } 32 reader.close(); 33 } 34 }
5.6 test包代码
1 package cn.hc.test; 2 3 import cn.hc.dao.BookDao; 4 import cn.hc.pojo.Book; 5 import cn.hc.util.DoSearch; 6 import cn.hc.util.GetConn; 7 import org.apache.lucene.analysis.standard.StandardAnalyzer; 8 import org.apache.lucene.document.Document; 9 import org.apache.lucene.document.Field; 10 import org.apache.lucene.document.TextField; 11 import org.apache.lucene.index.*; 12 import org.apache.lucene.queryparser.classic.ParseException; 13 import org.apache.lucene.queryparser.classic.QueryParser; 14 import org.apache.lucene.search.*; 15 import org.apache.lucene.store.FSDirectory; 16 import org.apache.lucene.util.Version; 17 import org.junit.Test; 18 import org.wltea.analyzer.lucene.IKAnalyzer; 19 20 21 import java.io.File; 22 import java.io.IOException; 23 import java.sql.Connection; 24 import java.sql.ResultSet; 25 import java.sql.SQLException; 26 import java.sql.Statement; 27 import java.util.ArrayList; 28 import java.util.List; 29 30 public class SimpleTest { 31 32 @Test 33 public void queryAll(){ 34 /* 35 1.获得连接 36 2.获得操作对象 37 3.执行sql语句 38 4.关闭 39 * */ 40 Connection conn = GetConn.getConn(); 41 try { 42 Statement statement = conn.createStatement(); 43 ResultSet resultSet = statement.executeQuery("select * from book;"); 44 List<Book> books=new ArrayList<>(); 45 while(resultSet.next()){ 46 Book book = new Book(); 47 book.setId(resultSet.getInt("id")); 48 book.setDescription(resultSet.getString("description")); 49 book.setPic(resultSet.getString("pic")); 50 book.setPrice(resultSet.getFloat("price")); 51 book.setName(resultSet.getString("name")); 52 books.add(book); 53 } 54 for (Book book : books) { 55 System.out.println(book.getName()); 56 } 57 58 } catch (SQLException e) { 59 e.printStackTrace(); 60 61 } 62 63 } 64 // 创建索引库 65 @Test 66 public void createIndex(){ 67 BookDao bookDao = new BookDao(); 68 //创建分词解析器 69 StandardAnalyzer analyzer = new StandardAnalyzer(); 70 try { 71 //打开解析文档 72 FSDirectory directory = FSDirectory.open(new File("D:\\lucene")); 73 //配置分词解析文件 74 IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer); 75 //创建索引写入对象 76 IndexWriter writer = new IndexWriter(directory, config); 77 //添加document索引 78 writer.addDocuments(bookDao.getDocuments(bookDao.queryAll())); 79 writer.close(); 80 System.out.println("创建索引库成功"); 81 } catch (IOException e) { 82 e.printStackTrace(); 83 } 84 85 86 } 87 // 通过docId查询document 88 @Test 89 public void searchDocumentByIndex() throws Exception { 90 //创建解析方法 91 StandardAnalyzer analyzer = new StandardAnalyzer(); 92 //创建查询解析器 93 QueryParser queryParser = new QueryParser("name", analyzer); 94 //创建查询条件对象 95 Query query = queryParser.parse("name:python python"); 96 FSDirectory directory = FSDirectory.open(new File("D:\\lucene")); 97 //创建索引读取对象 98 IndexReader reader = DirectoryReader.open(directory); 99 //创建搜索器 100 IndexSearcher searcher = new IndexSearcher(reader); 101 TopDocs topDocs = searcher.search(query, 10); 102 ScoreDoc[] scoreDocs = topDocs.scoreDocs; 103 for (ScoreDoc scoreDoc : scoreDocs) { 104 //document 的id 105 int documentId = scoreDoc.doc; 106 Document document = searcher.doc(documentId); 107 System.out.println("编号:"+document.get("id")+",书名:"+document.get("name")); 108 109 110 } 111 reader.close(); 112 113 } 114 115 //删除索引和文档 116 @Test 117 public void deleteIndex() throws IOException { 118 FSDirectory directory = FSDirectory.open(new File("D:\\lucene")); 119 IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, new StandardAnalyzer()); 120 IndexWriter writer = new IndexWriter(directory, config); 121 writer.deleteDocuments(new Term("name","java")); 122 writer.close(); 123 } 124 //删除所有 125 @Test 126 public void deleteAll() throws IOException { 127 FSDirectory directory = FSDirectory.open(new File("D:\\lucene")); 128 IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, new StandardAnalyzer()); 129 IndexWriter writer = new IndexWriter(directory, config); 130 writer.deleteAll(); 131 writer.close(); 132 } 133 //更新document 134 @Test 135 public void updateDocument() throws IOException { 136 FSDirectory directory = FSDirectory.open(new File("D:\\lucene")); 137 IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, new StandardAnalyzer()); 138 IndexWriter writer = new IndexWriter(directory, config); 139 Document document = new Document(); 140 Field field = new TextField("name", "python is good", Field.Store.YES); 141 document.add(field); 142 writer.updateDocument(new Term("name","java"),document); 143 writer.close(); 144 } 145 //精确查询 TermQuery 146 @Test 147 public void queryByTermQuery() throws Exception { 148 Query query = new TermQuery(new Term("name", "编程思想")); 149 DoSearch.doSearch(query); 150 } 151 152 //数字范围查询 NumericRangeQuery 153 @Test 154 public void queryByNumericRangeQuery() throws Exception { 155 Query query = NumericRangeQuery.newFloatRange("price", 60.0f, 80.0f, true, false); 156 DoSearch.doSearch(query); 157 } 158 //条件查询 BooleanQuery 159 @Test 160 public void queryByBooleanQuery() throws Exception { 161 Query price = NumericRangeQuery.newFloatRange("price", 60.0f, 80.0f, true, false); 162 Query name = new TermQuery(new Term("name", "solr")); 163 BooleanQuery query = new BooleanQuery(); 164 query.add(price, BooleanClause.Occur.MUST);//必须满足条件 165 query.add(name, BooleanClause.Occur.MUST_NOT);//限制条件,query没有值时无效,过滤 166 DoSearch.doSearch(query); 167 } 168 //使用中文分词器IKAnalyzer分词 169 @Test 170 public void createIndexByIKanalyzer() throws IOException { 171 BookDao bookDao = new BookDao(); 172 IKAnalyzer ikAnalyzer = new IKAnalyzer(); 173 FSDirectory directory = FSDirectory.open(new File("D:\\lucene")); 174 IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, ikAnalyzer); 175 IndexWriter writer = new IndexWriter(directory, config); 176 writer.addDocuments(bookDao.getDocuments(bookDao.queryAll())); 177 writer.close(); 178 } 179 180 }