Lucene入门及实际项目应用场景
1、lucene官网:http://lucene.apache.org/
2、导入的pop依赖:
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.javaxl</groupId>
<artifactId>javaxl_lunece_freemarker</artifactId>
<packaging>war</packaging>
<version>0.0.1-SNAPSHOT</version>
<name>javaxl_lunece_freemarker Maven Webapp</name>
<url>http://maven.apache.org</url>
<properties>
<httpclient.version>4.5.2</httpclient.version>
<jsoup.version>1.10.1</jsoup.version>
<!-- <lucene.version>7.1.0</lucene.version> -->
<lucene.version>5.3.1</lucene.version>
<ehcache.version>2.10.3</ehcache.version>
<junit.version>4.12</junit.version>
<log4j.version>1.2.16</log4j.version>
<mysql.version>5.1.44</mysql.version>
<fastjson.version>1.2.47</fastjson.version>
<struts2.version>2.5.16</struts2.version>
<servlet.version>4.0.1</servlet.version>
<jstl.version>1.2</jstl.version>
<standard.version>1.1.2</standard.version>
<tomcat-jsp-api.version>8.0.47</tomcat-jsp-api.version>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>${junit.version}</version>
<scope>test</scope>
</dependency>
<!-- jdbc驱动包 -->
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>${mysql.version}</version>
</dependency>
<!-- 添加Httpclient支持 -->
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>${httpclient.version}</version>
</dependency>
<!-- 添加jsoup支持 -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>${jsoup.version}</version>
</dependency>
<!-- 添加日志支持 -->
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>${log4j.version}</version>
</dependency>
<!-- 添加ehcache支持 -->
<dependency>
<groupId>net.sf.ehcache</groupId>
<artifactId>ehcache</artifactId>
<version>${ehcache.version}</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>${fastjson.version}</version>
</dependency>
<dependency>
<groupId>org.apache.struts</groupId>
<artifactId>struts2-core</artifactId>
<version>${struts2.version}</version>
</dependency>
<dependency>
<groupId>javax.servlet</groupId>
<artifactId>javax.servlet-api</artifactId>
<version>${servlet.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>${lucene.version}</version>
</dependency>
<!-- <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-common</artifactId>
<version>${lucene.version}</version> </dependency> -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-smartcn</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
<version>${lucene.version}</version>
</dependency>
<!-- 5.3、jstl、standard -->
<dependency>
<groupId>jstl</groupId>
<artifactId>jstl</artifactId>
<version>${jstl.version}</version>
</dependency>
<dependency>
<groupId>taglibs</groupId>
<artifactId>standard</artifactId>
<version>${standard.version}</version>
</dependency>
<!-- 5.4、tomcat-jsp-api -->
<dependency>
<groupId>org.apache.tomcat</groupId>
<artifactId>tomcat-jsp-api</artifactId>
<version>${tomcat-jsp-api.version}</version>
</dependency>
</dependencies>
<build>
<finalName>javaxl_lunece_freemarker</finalName>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.7.0</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
<encoding>UTF-8</encoding>
</configuration>
</plugin>
</plugins>
</build>
</project>
:理解图片
一:测试:
1、生成索引
Demo1:
package com.yj.lucene;
/**
* 生成索引测试
* @author Administrator
*
*/
public class Demo1 {
public static void main(String[] args) {
// 索引文件将要存放的位置
String indexDir = "E:\\temp\\lucene\\demo1";
// 数据源地址
String dataDir = "E:\\temp\\lucene\\demo1\\data";
IndexCreate ic = null;
try {
ic = new IndexCreate(indexDir);
long start = System.currentTimeMillis();
int num = ic.index(dataDir);
long end = System.currentTimeMillis();
System.out.println("检索指定路径下"+num+"个文件,一共花费了"+(end-start)+"毫秒");
} catch (Exception e) {
e.printStackTrace();
}finally {
try {
ic.closeIndexWriter();
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
方法:
IndexCreate(创建索引的方法)
package com.yj.lucene;
import java.io.File;
import java.io.FileReader;
import java.nio.file.Paths;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.FSDirectory;
/**
* 配合Demo1.java进行lucene的helloword实现
* @author Administrator
*
*/
public class IndexCreate {
private IndexWriter indexWriter;
/**
* 1、构造方法 实例化IndexWriter
* @param indexDir
* @throws Exception
*/
public IndexCreate(String indexDir) throws Exception{
// 获取索引文件的存放地址对象
FSDirectory dir = FSDirectory.open(Paths.get(indexDir));
// 标准分词器(针对英文)
Analyzer analyzer = new StandardAnalyzer();
// 索引输出流配置对象
IndexWriterConfig conf = new IndexWriterConfig(analyzer);
indexWriter = new IndexWriter(dir, conf);
}
/**
* 2、关闭索引输出流
* @throws Exception
*/
public void closeIndexWriter() throws Exception{
indexWriter.close();
}
/**
* 3、索引指定路径下的所有文件
* @param dataDir
* @return
* @throws Exception
*/
public int index(String dataDir) throws Exception{
File[] files = new File(dataDir).listFiles();//获取文件夹位置的所有文件
for (File file : files) {
indexFile(file);
}
return indexWriter.numDocs();
}
/**
* 4、索引指定的文件
* @param file
* @throws Exception
*/
private void indexFile(File file) throws Exception{
System.out.println("被索引文件的全路径:"+file.getCanonicalPath());
Document doc = getDocument(file);
indexWriter.addDocument(doc);
}
/**
* 5、获取文档(索引文件中包含的重要信息,key-value的形式)
* @param file
* @return
* @throws Exception
*/
private Document getDocument(File file) throws Exception{
Document doc = new Document();
doc.add(new TextField("contents", new FileReader(file)));
// Field.Store.YES是否存储到硬盘
doc.add(new TextField("fullPath", file.getCanonicalPath(),Field.Store.YES));
doc.add(new TextField("fileName", file.getName(),Field.Store.YES));
return doc;
}
}
结果图:
2、查询索引测试
Demo2:
package com.yj.lucene;
/**
* 查询索引测试
* @author Administrator
*
*/
public class Demo2 {
public static void main(String[] args) {
String indexDir = "E:\\temp\\lucene\\demo1";
String q = "EarlyTerminating-Collector";
try {
IndexUse.search(indexDir, q);
} catch (Exception e) {
e.printStackTrace();
}
}
}
方法:( IndexUse)
package com.yj.lucene;
import java.nio.file.Paths;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
/**
* 配合Demo2.java进行lucene的helloword实现
* @author Administrator
*
*/
public class IndexUse {
/**
* 通过关键字在索引目录中查询
* @param indexDir 索引文件所在目录
* @param q 关键字
*/
public static void search(String indexDir, String q) throws Exception{
FSDirectory indexDirectory = FSDirectory.open(Paths.get(indexDir));
// 注意:索引输入流不是new出来的,是通过目录读取工具类打开的
IndexReader indexReader = DirectoryReader.open(indexDirectory);
// 获取索引搜索对象
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
Analyzer analyzer = new StandardAnalyzer();//标准分词器
QueryParser queryParser = new QueryParser("contents", analyzer);//查询解析器对象
// 获取符合关键字的查询对象
Query query = queryParser.parse(q);
long start=System.currentTimeMillis();
// 获取关键字出现的前十次
TopDocs topDocs = indexSearcher.search(query , 10);//数据集合
long end=System.currentTimeMillis();
System.out.println("匹配 "+q+" ,总共花费"+(end-start)+"毫秒"+"查询到"+topDocs.totalHits+"个记录");
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
int docID = scoreDoc.doc;
// 索引搜索对象通过文档下标获取文档
Document doc = indexSearcher.doc(docID);
System.out.println("通过索引文件:"+doc.get("fullPath")+"拿数据");
}
indexReader.close();
}
}
结果图:
3、构建索引:(增删改)
Demo3
package com.yj.lucene;
import java.nio.file.Paths;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.FSDirectory;
import org.junit.Before;
import org.junit.Test;
/**
* 构建索引
* 对索引的增删改
* @author Administrator
*
*/
public class Demo3 {
private String ids[]={"1","2","3"};
private String citys[]={"qingdao","nanjing","shanghai"};
private String descs[]={
"Qingdao is a beautiful city.",
"Nanjing is a city of culture.",
"Shanghai is a bustling city."
};
private FSDirectory dir;
/**
* 每次都生成索引文件
* @throws Exception
*/
@Before
public void setUp() throws Exception {
dir = FSDirectory.open(Paths.get("E:\\temp\\lucene\\demo2\\indexDir"));
IndexWriter indexWriter = getIndexWriter();
for (int i = 0; i < ids.length; i++) {
Document doc = new Document();
doc.add(new StringField("id", ids[i], Field.Store.YES));
doc.add(new StringField("city", citys[i], Field.Store.YES));
doc.add(new TextField("desc", descs[i], Field.Store.NO));
indexWriter.addDocument(doc);
}
indexWriter.close();
}
/**
* 获取索引输出流
* @return
* @throws Exception
*/
private IndexWriter getIndexWriter() throws Exception{
Analyzer analyzer = new StandardAnalyzer();
IndexWriterConfig conf = new IndexWriterConfig(analyzer);
return new IndexWriter(dir, conf );
}
/**
* 测试写了几个索引文件
* @throws Exception
*/
@Test
public void getWriteDocNum() throws Exception {
IndexWriter indexWriter = getIndexWriter();
System.out.println("索引目录下生成"+indexWriter.numDocs()+"个索引文件");
}
/**
* 打上标记,该索引实际并未删除
* @throws Exception
*/
@Test
public void deleteDocBeforeMerge() throws Exception {
IndexWriter indexWriter = getIndexWriter();
System.out.println("最大文档数:"+indexWriter.maxDoc());//3
indexWriter.deleteDocuments(new Term("id", "1"));//删除id为1的
indexWriter.commit();
System.out.println("最大文档数:"+indexWriter.maxDoc());//3
System.out.println("实际文档数:"+indexWriter.numDocs());//2
indexWriter.close();
}
/**
* 对应索引文件已经删除,但是该版本的分词会保留
* @throws Exception
*/
@Test
public void deleteDocAfterMerge() throws Exception {
// https://blog.csdn.net/asdfsadfasdfsa/article/details/78820030
// org.apache.lucene.store.LockObtainFailedException: Lock held by this virtual machine:indexWriter是单例的、线程安全的,不允许打开多个。
IndexWriter indexWriter = getIndexWriter();
System.out.println("最大文档数:"+indexWriter.maxDoc());//3
indexWriter.deleteDocuments(new Term("id", "1"));
indexWriter.forceMergeDeletes(); //强制删除
indexWriter.commit();
System.out.println("最大文档数:"+indexWriter.maxDoc());//2
System.out.println("实际文档数:"+indexWriter.numDocs());//2
indexWriter.close();
}
/**
* 测试更新索引
* 修改
* @throws Exception
*/
@Test
public void testUpdate()throws Exception{
IndexWriter writer=getIndexWriter();
Document doc=new Document();
doc.add(new StringField("id", "1", Field.Store.YES));
doc.add(new StringField("city","qingdao",Field.Store.YES));
doc.add(new TextField("desc", "dsss is a city.", Field.Store.NO));
writer.updateDocument(new Term("id","1"), doc);
writer.close();
}
}
4、文档域加权:
Demo4:
package com.yj.lucene;
import java.nio.file.Paths;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.junit.Before;
import org.junit.Test;
/**
* 文档域加权:
* 意思:一般搜索会默认点击率最多的在前面,
* 而文档域加权就是强制让搜索的内容在前面显示,例如充钱玩家
* @author Administrator
*
*/
public class Demo4 {
private String ids[]={"1","2","3","4"};
private String authors[]={"Jack","Marry","John","Json"};
private String positions[]={"accounting","technician","salesperson","boss"};
private String titles[]={"Java is a good language.","Java is a cross platform language","Java powerful","You should learn java"};
private String contents[]={
"If possible, use the same JRE major version at both index and search time.",
"When upgrading to a different JRE major version, consider re-indexing. ",
"Different JRE major versions may implement different versions of Unicode,",
"For example: with Java 1.4, `LetterTokenizer` will split around the character U+02C6,"
};
private Directory dir;//索引文件目录
@Before
public void setUp()throws Exception {
dir = FSDirectory.open(Paths.get("E:\\temp\\lucene\\demo3\\indexDir"));
IndexWriter writer = getIndexWriter();
for (int i = 0; i < authors.length; i++) {
Document doc = new Document();
doc.add(new StringField("id", ids[i], Field.Store.YES));
doc.add(new StringField("author", authors[i], Field.Store.YES));
doc.add(new StringField("position", positions[i], Field.Store.YES));
TextField textField = new TextField("title", titles[i], Field.Store.YES);
// Json投钱做广告,把排名刷到第一了
if("boss".equals(positions[i])) {
textField.setBoost(2f);//设置权重,默认为1
}
doc.add(textField);
// TextField会分词,StringField不会分词
doc.add(new TextField("content", contents[i], Field.Store.NO));
writer.addDocument(doc);
}
writer.close();
}
private IndexWriter getIndexWriter() throws Exception{
Analyzer analyzer = new StandardAnalyzer();
IndexWriterConfig conf = new IndexWriterConfig(analyzer);
return new IndexWriter(dir, conf);
}
@Test
public void index() throws Exception{
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher searcher = new IndexSearcher(reader);
String fieldName = "title";//根据标题查询
String keyWord = "java";//查询含有java的
Term t = new Term(fieldName, keyWord);
Query query = new TermQuery(t);
TopDocs hits = searcher.search(query, 10);
System.out.println("关键字:‘"+keyWord+"’命中了"+hits.totalHits+"次");
for (ScoreDoc scoreDoc : hits.scoreDocs) {
Document doc = searcher.doc(scoreDoc.doc);
System.out.println(doc.get("author"));//打印作者
}
}
}
5、查询:
Demo5,6:
package com.yj.lucene;
import java.io.IOException;
import java.nio.file.Paths;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.junit.Before;
import org.junit.Test;
/**
* 特定项搜索
* 查询表达式(queryParser)
* @author Administrator
*
*/
public class Demo5 {
@Before
public void setUp() {
// 索引文件将要存放的位置
String indexDir = "E:\\temp\\lucene\\demo4";
// 数据源地址
String dataDir = "E:\\temp\\lucene\\demo4\\data";
IndexCreate ic = null;
try {
ic = new IndexCreate(indexDir);
long start = System.currentTimeMillis();
int num = ic.index(dataDir);
long end = System.currentTimeMillis();
System.out.println("检索指定路径下" + num + "个文件,一共花费了" + (end - start) + "毫秒");
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
ic.closeIndexWriter();
} catch (Exception e) {
e.printStackTrace();
}
}
}
/**
* 特定项搜索
*/
@Test
public void testTermQuery() {
String indexDir = "E:\\temp\\lucene\\demo4";
String fld = "contents";
String text = "indexformattoooldexception";
// 特定项片段名和关键字
Term t = new Term(fld , text);
TermQuery tq = new TermQuery(t );
try {
FSDirectory indexDirectory = FSDirectory.open(Paths.get(indexDir));
// 注意:索引输入流不是new出来的,是通过目录读取工具类打开的
IndexReader indexReader = DirectoryReader.open(indexDirectory);
// 获取索引搜索对象
IndexSearcher is = new IndexSearcher(indexReader);
TopDocs hits = is.search(tq, 100);
// System.out.println(hits.totalHits);
for(ScoreDoc scoreDoc: hits.scoreDocs) {
Document doc = is.doc(scoreDoc.doc);
System.out.println("文件"+doc.get("fullPath")+"中含有该关键字");
}
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 查询表达式(queryParser)
*/
@Test
public void testQueryParser() {
String indexDir = "E:\\temp\\lucene\\demo4";
// 获取查询解析器(通过哪种分词器去解析哪种片段)
QueryParser queryParser = new QueryParser("contents", new StandardAnalyzer());
try {
FSDirectory indexDirectory = FSDirectory.open(Paths.get(indexDir));
// 注意:索引输入流不是new出来的,是通过目录读取工具类打开的
IndexReader indexReader = DirectoryReader.open(indexDirectory);
// 获取索引搜索对象
IndexSearcher is = new IndexSearcher(indexReader);
// 由解析器去解析对应的关键字
TopDocs hits = is.search(queryParser.parse("indexformattoooldexception") , 100);
for(ScoreDoc scoreDoc: hits.scoreDocs) {
Document doc = is.doc(scoreDoc.doc);
System.out.println("文件"+doc.get("fullPath")+"中含有该关键字");
}
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
package com.yj.lucene;
import java.nio.file.Paths;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.junit.Before;
import org.junit.Test;
/**
* 指定数字范围查询
* 指定字符串开头字母查询(prefixQuery)
* @author Administrator
*
*/
public class Demo6 {
private int ids[]={1,2,3};
private String citys[]={"qingdao","nanjing","shanghai"};
private String descs[]={
"Qingdao is a beautiful city.",
"Nanjing is a city of culture.",
"Shanghai is a bustling city."
};
private FSDirectory dir;
/**
* 每次都生成索引文件
* @throws Exception
*/
@Before
public void setUp() throws Exception {
dir = FSDirectory.open(Paths.get("E:\\temp\\lucene\\demo2\\indexDir"));
IndexWriter indexWriter = getIndexWriter();
for (int i = 0; i < ids.length; i++) {
Document doc = new Document();
doc.add(new IntField("id", ids[i], Field.Store.YES));
doc.add(new StringField("city", citys[i], Field.Store.YES));
//YES:必须生成索引,NO:反之
doc.add(new TextField("desc", descs[i], Field.Store.YES));
indexWriter.addDocument(doc);
}
indexWriter.close();
}
/**
* 获取索引输出流
* @return
* @throws Exception
*/
private IndexWriter getIndexWriter() throws Exception{
Analyzer analyzer = new StandardAnalyzer();
IndexWriterConfig conf = new IndexWriterConfig(analyzer);
return new IndexWriter(dir, conf );
}
/**
* 指定数字范围查询
* @throws Exception
*/
@Test
public void testNumericRangeQuery()throws Exception{
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher is = new IndexSearcher(reader);
//两边都是闭区间
NumericRangeQuery<Integer> query=NumericRangeQuery.newIntRange("id", 1, 2, true, true);
TopDocs hits=is.search(query, 10);
for(ScoreDoc scoreDoc:hits.scoreDocs){
Document doc=is.doc(scoreDoc.doc);
System.out.println(doc.get("id"));
System.out.println(doc.get("city"));
System.out.println(doc.get("desc"));
}
}
/**
* 指定字符串开头字母查询(prefixQuery)
* @throws Exception
*/
@Test
public void testPrefixQuery()throws Exception{
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher is = new IndexSearcher(reader);
PrefixQuery query=new PrefixQuery(new Term("city","n"));//按照头部查询,不常用
TopDocs hits=is.search(query, 10);
for(ScoreDoc scoreDoc:hits.scoreDocs){
Document doc=is.doc(scoreDoc.doc);
System.out.println(doc.get("id"));
System.out.println(doc.get("city"));
System.out.println(doc.get("desc"));
}
}
/**
* 组合查询,常用
* @throws Exception
*/
@Test
public void testBooleanQuery()throws Exception{
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher is = new IndexSearcher(reader);
NumericRangeQuery<Integer> query1=NumericRangeQuery.newIntRange("id", 1, 2, true, true);
PrefixQuery query2=new PrefixQuery(new Term("city","N".toLowerCase()));//不区分大小写
BooleanQuery.Builder booleanQuery=new BooleanQuery.Builder();
//拼接
booleanQuery.add(query1,BooleanClause.Occur.MUST);
booleanQuery.add(query2,BooleanClause.Occur.MUST);
TopDocs hits=is.search(booleanQuery.build(), 10);
for(ScoreDoc scoreDoc:hits.scoreDocs){
Document doc=is.doc(scoreDoc.doc);
System.out.println(doc.get("id"));
System.out.println(doc.get("city"));
System.out.println(doc.get("desc"));
}
}
}
6、中文、高亮效果显示:
Demo7:
package com.yj.lucene;
import java.io.StringReader;
import java.nio.file.Paths;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.store.FSDirectory;
import org.junit.Before;
import org.junit.Test;
public class Demo7 {
private Integer ids[] = { 1, 2, 3 };
private String citys[] = { "青岛", "南京", "上海" };
// private String descs[]={
// "青岛是个美丽的城市。",
// "南京是个有文化的城市。",
// "上海市个繁华的城市。"
// };
private String descs[] = { "青岛是个美丽的城市。",
"南京是一个文化的城市南京,简称宁,是江苏省会,地处中国东部地区,长江下游,濒江近海。全市下辖11个区,总面积6597平方公里,2013年建成区面积752.83平方公里,常住人口818.78万,其中城镇人口659.1万人。[1-4] “江南佳丽地,金陵帝王州”,南京拥有着6000多年文明史、近2600年建城史和近500年的建都史,是中国四大古都之一,有“六朝古都”、“十朝都会”之称,是中华文明的重要发祥地,历史上曾数次庇佑华夏之正朔,长期是中国南方的政治、经济、文化中心,拥有厚重的文化底蕴和丰富的历史遗存。[5-7] 南京是国家重要的科教中心,自古以来就是一座崇文重教的城市,有“天下文枢”、“东南第一学”的美誉。截至2013年,南京有高等院校75所,其中211高校8所,仅次于北京上海;国家重点实验室25所、国家重点学科169个、两院院士83人,均居中国第三。[8-10]",
"上海市个繁华的城市。" };
private FSDirectory dir;
/**
* 每次都生成索引文件
*
* @throws Exception
*/
@Before
public void setUp() throws Exception {
dir = FSDirectory.open(Paths.get("E:\\temp\\lucene\\demo2\\indexDir"));
IndexWriter indexWriter = getIndexWriter();
for (int i = 0; i < ids.length; i++) {
Document doc = new Document();
doc.add(new IntField("id", ids[i], Field.Store.YES));
doc.add(new StringField("city", citys[i], Field.Store.YES));
doc.add(new TextField("desc", descs[i], Field.Store.YES));
indexWriter.addDocument(doc);
}
indexWriter.close();
}
/**
* 获取索引输出流
*
* @return
* @throws Exception
*/
private IndexWriter getIndexWriter() throws Exception {
// Analyzer analyzer = new StandardAnalyzer();
Analyzer analyzer = new SmartChineseAnalyzer();//设置中文分词器
IndexWriterConfig conf = new IndexWriterConfig(analyzer);
return new IndexWriter(dir, conf);
}
/**
* luke查看索引生成
*
* @throws Exception
*/
@Test
public void testIndexCreate() throws Exception {
}
/**
* 测试高亮
*
* @throws Exception
*/
@Test
public void testHeight() throws Exception {
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher searcher = new IndexSearcher(reader);
SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
QueryParser parser = new QueryParser("desc", analyzer);
// Query query = parser.parse("南京文化");
Query query = parser.parse("南京文明");
TopDocs hits = searcher.search(query, 100);
// 查询得分项:查询到符合条件的内容
QueryScorer queryScorer = new QueryScorer(query);
// 得分项对应的内容片段
SimpleSpanFragmenter fragmenter = new SimpleSpanFragmenter(queryScorer);
// 高亮显示的样式
SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter("<span color='red'><b>", "</b></span>");
// 高亮显示对象
Highlighter highlighter = new Highlighter(htmlFormatter, queryScorer);
// 设置需要高亮显示对应的内容片段
highlighter.setTextFragmenter(fragmenter);
for (ScoreDoc scoreDoc : hits.scoreDocs) {
Document doc = searcher.doc(scoreDoc.doc);
String desc = doc.get("desc");
if (desc != null) {
// tokenstream是从doucment的域(field)中抽取的一个个分词而组成的一个数据流,用于分词。
TokenStream tokenStream = analyzer.tokenStream("desc", new StringReader(desc));
System.out.println("高亮显示的片段:" + highlighter.getBestFragment(tokenStream, desc));
}
System.out.println("所有内容:" + desc);
}
}
}
二:实际项目应用场景:
首先,工具类如下:
BaseDao
package com.yj.blog.util;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
/**
* 通用的查询方法 23种设计模式之策略模式
* 作用:在方法或类中已经完成了对应的功能,然后在调用方去根据自己的需求去处理结果。 使得代码更加灵活。
*
* @author Administrator
*
* @param <T>
*/
public class BaseDao<T> {
// $.ajax
protected interface Callback<T> {
public List<T> foreach(ResultSet rs) throws SQLException, InstantiationException, IllegalAccessException;
}
public List<T> executeQuery(String sql, PageBean pageBean, Callback<T> callback)
throws SQLException, InstantiationException, IllegalAccessException {
if (pageBean != null && pageBean.isPagination()) {
Connection con = DBAccess.getConnection();
String countSql = getCountSql(sql);
PreparedStatement countPst = con.prepareStatement(countSql);
ResultSet countRs = countPst.executeQuery();
if (countRs.next()) {
pageBean.setTotal(countRs.getObject(1).toString());
}
DBAccess.close(null, countPst, countRs);
String pageSql = getPageSql(sql, pageBean);
PreparedStatement pagePst = con.prepareStatement(pageSql);
ResultSet pageRs = pagePst.executeQuery();
try {
return callback.foreach(pageRs);
} finally {
DBAccess.close(con);
}
} else {
Connection con = DBAccess.getConnection();
PreparedStatement pst = con.prepareStatement(sql);
ResultSet rs = pst.executeQuery();
try {
return callback.foreach(rs);
} finally {
DBAccess.close(con);
}
}
}
/**
* 将原生态的sql语句转换成查对应的当页记录数sql语句
*
* @param sql
* @param pageBean
* @return
*/
private String getPageSql(String sql, PageBean pageBean) {
return sql + " limit " + pageBean.getStartIndex() + "," + pageBean.getRows();
}
/**
* 将原生态的sql语句转换成查总记录输的sql语句
*
* @param sql
* @return
*/
private String getCountSql(String sql) {
return "select count(1) from (" + sql + " ) t";
}
}
DateUtil(日期工具类):
package com.yj.blog.util;
import java.text.SimpleDateFormat;
import java.util.Date;
/**
* 日期工具类
* @author user
*
*/
public class DateUtil {
/**
* 获取当前年月日路径
* @return
* @throws Exception
*/
public static String getCurrentDatePath()throws Exception{
Date date=new Date();
SimpleDateFormat sdf=new SimpleDateFormat("yyyy/MM/dd");
return sdf.format(date);
}
public static void main(String[] args) {
try {
System.out.println(getCurrentDatePath());
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
数据库连接:(DBAccess )
package com.yj.blog.util;
import java.io.InputStream;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Properties;
/**
* 提供了一组获得或关闭数据库对象的方法
*
*/
public class DBAccess {
private static String driver;
private static String url;
private static String user;
private static String password;
static {// 静态块执行一次,加载 驱动一次
try {
driver = PropertiesUtil.getValue("driver");
url = PropertiesUtil.getValue("url");
user = PropertiesUtil.getValue("user");
password = PropertiesUtil.getValue("pwd");
Class.forName(driver);
} catch (Exception e) {
e.printStackTrace();
throw new RuntimeException(e);
}
}
/**
* 获得数据连接对象
*
* @return
*/
public static Connection getConnection() {
try {
Connection conn = DriverManager.getConnection(url, user, password);
return conn;
} catch (SQLException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
}
public static void close(ResultSet rs) {
if (null != rs) {
try {
rs.close();
} catch (SQLException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
}
}
public static void close(Statement stmt) {
if (null != stmt) {
try {
stmt.close();
} catch (SQLException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
}
}
public static void close(Connection conn) {
if (null != conn) {
try {
conn.close();
} catch (SQLException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
}
}
public static void close(Connection conn, Statement stmt, ResultSet rs) {
close(rs);
close(stmt);
close(conn);
}
public static boolean isOracle() {
return "oracle.jdbc.driver.OracleDriver".equals(driver);
}
public static boolean isSQLServer() {
return "com.microsoft.sqlserver.jdbc.SQLServerDriver".equals(driver);
}
public static boolean isMysql() {
return "com.mysql.jdbc.Driver".equals(driver);
}
public static void main(String[] args) {
Connection conn = DBAccess.getConnection();
DBAccess.close(conn);
System.out.println("isOracle:" + isOracle());
System.out.println("isSQLServer:" + isSQLServer());
System.out.println("isMysql:" + isMysql());
System.out.println("数据库连接(关闭)成功");
}
}
EntityBaseDao(看自己开发运用baseDao)
package com.yj.blog.util;
import java.lang.reflect.Field;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
public class EntityBaseDao<T> extends BaseDao<T> {
public List<T> executeQuery(String sql, PageBean pageBean, Class clz) throws SQLException, InstantiationException, IllegalAccessException{
return super.executeQuery(sql, pageBean, new Callback<T>() {
@Override
public List<T> foreach(ResultSet rs) throws SQLException, InstantiationException, IllegalAccessException {
/*
* 1、创建一个实体类的实例
* 2、给创建的实例属性赋值
* 3、将添加完类容的实体类添加到list集合中
*/
// list.add(new Book(rs.getInt("bid"), rs.getString("bname"), rs.getFloat("price")));
List<T> list = new ArrayList<>();
while(rs.next()) {
T t = (T) clz.newInstance();
Field[] fields = clz.getDeclaredFields();
for (Field field : fields) {
field.setAccessible(true);
field.set(t, rs.getObject(field.getName()));
}
list.add(t);
}
return list;
}
});
}
}
JsonBaseDao:
package com.yj.blog.util;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class JsonBaseDao extends BaseDao<Map<String,Object>> {
public List<Map<String,Object>> executeQuery(String sql, PageBean pageBean) throws SQLException, InstantiationException, IllegalAccessException{
return super.executeQuery(sql, pageBean, new Callback<Map<String,Object>>() {
@Override
public List<Map<String,Object>> foreach(ResultSet rs) throws SQLException, InstantiationException, IllegalAccessException {
/*
* 1、创建一个实体类的实例
* 2、给创建的实例属性赋值
* 3、将添加完类容的实体类添加到list集合中
*/
// list.add(new Book(rs.getInt("bid"), rs.getString("bname"), rs.getFloat("price")));
List<Map<String,Object>> list = new ArrayList<>();
// 获取源数据
ResultSetMetaData md = rs.getMetaData();
int count = md.getColumnCount();
Map<String,Object> map = null;
while(rs.next()) {
map = new HashMap<>();
for (int i = 1; i <= count; i++) {
map.put(md.getColumnName(i), rs.getObject(i));
}
list.add(map);
}
return list;
}
});
}
/**
*
* @param sql
* @param attrs map中的key
* @param paMap jsp向后台传递的参数集合
* @return
* @throws SQLException
* @throws NoSuchFieldException
* @throws SecurityException
* @throws IllegalArgumentException
* @throws IllegalAccessException
*/
public int executeUpdate(String sql, String[] attrs, Map<String,String[]> paMap) throws SQLException, NoSuchFieldException, SecurityException, IllegalArgumentException, IllegalAccessException {
Connection con = DBAccess.getConnection();
PreparedStatement pst = con.prepareStatement(sql);
for (int i = 0; i < attrs.length; i++) {
pst.setObject(i+1, JsonUtils.getParamVal(paMap, attrs[i]));
}
return pst.executeUpdate();
}
}
JsonUtils:
package com.yj.blog.util;
import java.util.Arrays;
import java.util.Map;
/**
* 专门用来处理json数据的工具包
* @author Administrator
*
*/
public class JsonUtils {
/**
* 从paramMap拿到咱们所需要用到的查询维度,用于sql语句拼接
* @param paramMap 获取从jsp页面传递到后台的参数集合(req.getParamterMap)
* @param key
* @return
*/
public static String getParamVal(Map<String,String[]> paramMap, String key) {
if(paramMap != null && paramMap.size()>0) {
String[] vals = paramMap.get(key);
if(vals != null && vals.length > 0) {
String val = Arrays.toString(vals);
return val.substring(1, val.length()-1);
}
return "";
}
return "";
}
}
LuceneUtil(一些Lucene的通用代码)
package com.yj.blog.util;
import java.io.IOException;
import java.nio.file.Paths;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryTermScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
/**
* lucene工具类
* @author Administrator
*
*/
public class LuceneUtil {
/**
* 获取索引文件存放的文件夹对象
*
* @param path
* @return
*/
public static Directory getDirectory(String path) {
Directory directory = null;
try {
directory = FSDirectory.open(Paths.get(path));
} catch (IOException e) {
e.printStackTrace();
}
return directory;
}
/**
* 索引文件存放在内存
*
* @return
*/
public static Directory getRAMDirectory() {
Directory directory = new RAMDirectory();
return directory;
}
/**
* 文件夹读取对象
*
* @param directory
* @return
*/
public static DirectoryReader getDirectoryReader(Directory directory) {
DirectoryReader reader = null;
try {
reader = DirectoryReader.open(directory);
} catch (IOException e) {
e.printStackTrace();
}
return reader;
}
/**
* 文件索引对象
*
* @param reader
* @return
*/
public static IndexSearcher getIndexSearcher(DirectoryReader reader) {
IndexSearcher indexSearcher = new IndexSearcher(reader);
return indexSearcher;
}
/**
* 写入索引对象
*
* @param directory
* @param analyzer:此处可以改成中文分词器
* @return
*/
public static IndexWriter getIndexWriter(Directory directory, Analyzer analyzer)
{
IndexWriter iwriter = null;
try {
IndexWriterConfig config = new IndexWriterConfig(analyzer);
config.setOpenMode(OpenMode.CREATE_OR_APPEND);
// Sort sort=new Sort(new SortField("content", Type.STRING));
// config.setIndexSort(sort);//排序
config.setCommitOnClose(true);
// 自动提交
// config.setMergeScheduler(new ConcurrentMergeScheduler());
// config.setIndexDeletionPolicy(new
// SnapshotDeletionPolicy(NoDeletionPolicy.INSTANCE));
iwriter = new IndexWriter(directory, config);
} catch (IOException e) {
e.printStackTrace();
}
return iwriter;
}
/**
* 关闭索引文件生成对象以及文件夹对象
*
* @param indexWriter
* @param directory
*/
public static void close(IndexWriter indexWriter, Directory directory) {
if (indexWriter != null) {
try {
indexWriter.close();
} catch (IOException e) {
indexWriter = null;
}
}
if (directory != null) {
try {
directory.close();
} catch (IOException e) {
directory = null;
}
}
}
/**
* 关闭索引文件读取对象以及文件夹对象
*
* @param reader
* @param directory
*/
public static void close(DirectoryReader reader, Directory directory) {
if (reader != null) {
try {
reader.close();
} catch (IOException e) {
reader = null;
}
}
if (directory != null) {
try {
directory.close();
} catch (IOException e) {
directory = null;
}
}
}
/**
* 高亮标签
*
* @param query
* @param fieldName
* @return
*/
public static Highlighter getHighlighter(Query query, String fieldName)
{
Formatter formatter = new SimpleHTMLFormatter("<span style='color:red'>", "</span>");
Scorer fragmentScorer = new QueryTermScorer(query, fieldName);
Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
highlighter.setTextFragmenter(new SimpleFragmenter(200));
return highlighter;
}
}
PageBean:(分页代码)
package com.yj.blog.util;
import java.util.Map;
import javax.servlet.http.HttpServletRequest;
/**
* 分页工具类
*
*/
public class PageBean {
private int page = 1;// 页码
private int rows = 10;// 页大小
private int total = 0;// 总记录数
private boolean pagination = true;// 是否分页
// 保存上次查询的参数
private Map<String, String[]> paramMap;
// 保存上次查询的url
private String url;
public void setRequest(HttpServletRequest request) {
String page = request.getParameter("page");
String rows = request.getParameter("rows");
String pagination = request.getParameter("pagination");
this.setPage(page);
this.setRows(rows);
this.setPagination(pagination);
this.setUrl(request.getRequestURL().toString());
this.setParamMap(request.getParameterMap());
}
public PageBean() {
super();
}
public Map<String, String[]> getParamMap() {
return paramMap;
}
public void setParamMap(Map<String, String[]> paramMap) {
this.paramMap = paramMap;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public int getPage() {
return page;
}
public void setPage(int page) {
this.page = page;
}
public void setPage(String page) {
if(StringUtils.isNotBlank(page)) {
this.page = Integer.parseInt(page);
}
}
public int getRows() {
return rows;
}
public void setRows(String rows) {
if(StringUtils.isNotBlank(rows)) {
this.rows = Integer.parseInt(rows);
}
}
public int getTotal() {
return total;
}
public void setTotal(int total) {
this.total = total;
}
public void setTotal(String total) {
if(StringUtils.isNotBlank(total)) {
this.total = Integer.parseInt(total);
}
}
public boolean isPagination() {
return pagination;
}
public void setPagination(boolean pagination) {
this.pagination = pagination;
}
public void setPagination(String pagination) {
if(StringUtils.isNotBlank(pagination) && "false".equals(pagination)) {
this.pagination = Boolean.parseBoolean(pagination);
}
}
/**
* 最大页
* @return
*/
public int getMaxPage() {
int max = this.total/this.rows;
if(this.total % this.rows !=0) {
max ++ ;
}
return max;
}
/**
* 下一页
* @return
*/
public int getNextPage() {
int nextPage = this.page + 1;
if(nextPage > this.getMaxPage()) {
nextPage = this.getMaxPage();
}
return nextPage;
}
/**
* 上一页
* @return
*/
public int getPreviousPage() {
int previousPage = this.page -1;
if(previousPage < 1) {
previousPage = 1;
}
return previousPage;
}
/**
* 获得起始记录的下标
*
* @return
*/
public int getStartIndex() {
return (this.page - 1) * this.rows;
}
@Override
public String toString() {
return "PageBean [page=" + page + ", rows=" + rows + ", total=" + total + ", pagination=" + pagination + "]";
}
}
properties工具类:
package com.yj.blog.util;
import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;
/**
* properties工具类
* @author user
*
*/
public class PropertiesUtil {
/**
* 根据key获取value值
* @param key
* @return
*/
public static String getValue(String key){
Properties prop=new Properties();
InputStream in=new PropertiesUtil().getClass().getResourceAsStream("/lucene.properties");
try {
prop.load(in);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return prop.getProperty(key);
}
}
StringUtils:(处理空值)
package com.yj.blog.util;
public class StringUtils {
// 私有的构造方法,保护此类不能在外部实例化
private StringUtils() {
}
/**
* 如果字符串等于null或去空格后等于"",则返回true,否则返回false
*
* @param s
* @return
*/
public static boolean isBlank(String s) {
boolean b = false;
if (null == s || s.trim().equals("")) {
b = true;
}
return b;
}
/**
* 如果字符串不等于null或去空格后不等于"",则返回true,否则返回false
*
* @param s
* @return
*/
public static boolean isNotBlank(String s) {
return !isBlank(s);
}
}
Dao层:
package com.javaxl.blog.dao;
import java.sql.SQLException;
import java.util.List;
import java.util.Map;
import com.javaxl.blog.util.JsonBaseDao;
import com.javaxl.blog.util.PageBean;
import com.javaxl.blog.util.StringUtils;
public class BlogDao extends JsonBaseDao{
public List<Map<String,Object>> list(String title, PageBean pageBean) throws InstantiationException, IllegalAccessException, SQLException{
String sql = "select * from t_lucene_crawler_blog where 1=1";
if(StringUtils.isNotBlank(title)) {
sql += " and title like '%"+title+"%'";
}
return super.executeQuery(sql, pageBean);
}
public int save(Map<String,String[]> paMap) throws InstantiationException, IllegalAccessException, SQLException, NoSuchFieldException, SecurityException, IllegalArgumentException{
String sql = "insert into t_lucene_crawler_blog values(?,?,?,?,0)";
return super.executeUpdate(sql, new String[] {"id","title","content","url"}, paMap);
}
}
web层:
BlogActionBak :
package com.yj.blog.web;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.servlet.http.HttpServletRequest;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.FSDirectory;
import org.apache.struts2.ServletActionContext;
import com.javaxl.blog.dao.BlogDao;
import com.javaxl.blog.util.PropertiesUtil;
import com.javaxl.blog.util.StringUtils;
/**
* IndexReader
* IndexSearcher
* Highlighter
* @author Administrator
*
*/
public class BlogActionBak {
private String title;
private BlogDao blogDao = new BlogDao();
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String execute() {
try {
HttpServletRequest request = ServletActionContext.getRequest();
if (StringUtils.isBlank(title)) {
List<Map<String, Object>> blogList = this.blogDao.list(title, null);
request.setAttribute("blogList", blogList);
}else {
SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
IndexReader indexReader = DirectoryReader.open(FSDirectory.open(Paths.get(PropertiesUtil.getValue("indexPath"))));
IndexSearcher searcher = new IndexSearcher(indexReader);
// 拿一句话到索引目中的索引文件中的词库进行关键词碰撞
Query query = new QueryParser("title", analyzer).parse(title);
TopDocs topDocs = searcher.search(query , 100);
// 将碰撞出来的关键词给点亮
QueryScorer queryScorer = new QueryScorer(query);
// 以什么形式点亮关键词
Formatter formatter = new SimpleHTMLFormatter("<span style='color:red;'><b>", "</span></b>");
Highlighter highlighter = new Highlighter(formatter , queryScorer);
List<Map<String, Object>> blogList = new ArrayList<>();
Map<String, Object> map = null;
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (ScoreDoc scoreDoc : scoreDocs) {
map = new HashMap<>();
Document doc = searcher.doc(scoreDoc.doc);
map.put("id", doc.get("id"));
String titleHighlighter = doc.get("title");
if(StringUtils.isNotBlank(titleHighlighter)) {
titleHighlighter = highlighter.getBestFragment(analyzer, "title", titleHighlighter);
}
map.put("title", titleHighlighter);
map.put("url", doc.get("url"));
blogList.add(map);
}
request.setAttribute("blogList", blogList);
}
} catch (Exception e) {
e.printStackTrace();
}
return "blogList";
}
}
构建lucene索引:
IndexStarter :
package com.yj.blog.web;
import java.io.IOException;
import java.nio.file.Paths;
import java.sql.SQLException;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import com.javaxl.blog.dao.BlogDao;
import com.javaxl.blog.util.PropertiesUtil;
/**
* 构建lucene索引
* @author Administrator
* 1。构建索引 IndexWriter
* 2、读取索引文件,获取命中片段
* 3、使得命中片段高亮显示
*
*/
public class IndexStarter {
private static BlogDao blogDao = new BlogDao();
public static void main(String[] args) {
IndexWriterConfig conf = new IndexWriterConfig(new SmartChineseAnalyzer());
Directory d;
IndexWriter indexWriter = null;
try {
d = FSDirectory.open(Paths.get(PropertiesUtil.getValue("indexPath")));
indexWriter = new IndexWriter(d , conf );
// 为数据库中的所有数据构建索引
List<Map<String, Object>> list = blogDao.list(null, null);
for (Map<String, Object> map : list) {
Document doc = new Document();
doc.add(new StringField("id", (String) map.get("id"), Field.Store.YES));
// TextField用于对一句话分词处理 java培训机构
doc.add(new TextField("title", (String) map.get("title"), Field.Store.YES));
doc.add(new StringField("url", (String) map.get("url"), Field.Store.YES));
indexWriter.addDocument(doc);
}
} catch (IOException e) {
e.printStackTrace();
} catch (InstantiationException e) {
e.printStackTrace();
} catch (IllegalAccessException e) {
e.printStackTrace();
} catch (SQLException e) {
e.printStackTrace();
}finally {
try {
if(indexWriter!= null) {
indexWriter.close();
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
文件类:
lucene.properties:
url=jdbc:mysql://localhost:3306/yj?autoReconnect=true
user=root
pwd=123
driver=com.mysql.jdbc.Driver
ehcacheXmlPath=C://blogCrawler/ehcache.xml
blogImages=C://blogCrawler/blogImages/
indexPath=C://blogCrawler/lucene
log4j.properties:
log4j.rootLogger=INFO, stdout,D
#Console
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.Target = System.out
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=[%-5p] %d{yyyy-MM-dd HH:mm:ss,SSS} method:%l%n%m%n
#D
log4j.appender.D = org.apache.log4j.RollingFileAppender
log4j.appender.D.File = C://blogCrawler/bloglogs/log.log
log4j.appender.D.MaxFileSize=100KB
log4j.appender.D.MaxBackupIndex=100
log4j.appender.D.Append = true
log4j.appender.D.layout = org.apache.log4j.PatternLayout
log4j.appender.D.layout.ConversionPattern = %-d{yyyy-MM-dd HH:mm:ss} [ %t:%r ] - [ %p ] %m%n
struts-base.xml:
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE struts PUBLIC
"-//Apache Software Foundation//DTD Struts Configuration 2.5//EN"
"http://struts.apache.org/dtds/struts-2.5.dtd">
<struts>
<constant name="struts.i18n.encoding" value="UTF-8" />
<constant name="struts.devMode" value="true" />
<constant name="struts.configuration.xml.reload" value="true" />
<constant name="struts.i18n.reload" value="true" />
<constant name="struts.enable.DynamicMethodInvocation" value="true" />
<package name="base" extends="struts-default" abstract="true">
<global-allowed-methods>regex:.*</global-allowed-methods>
</package>
</struts>
struts-sy.xml:
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE struts PUBLIC
"-//Apache Software Foundation//DTD Struts Configuration 2.5//EN"
"http://struts.apache.org/dtds/struts-2.5.dtd">
<struts>
<package name="sy" extends="base" namespace="/sy">
<action name="/blog_*" class="com.yj.blog.web.BlogAction" method="{1}">
<result name="blogList">/blogList.jsp</result>
</action>
</package>
</struts>
struts.xml:
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE struts PUBLIC
"-//Apache Software Foundation//DTD Struts Configuration 2.5//EN"
"http://struts.apache.org/dtds/struts-2.5.dtd">
<struts>
<include file="struts-default.xml"></include>
<include file="struts-base.xml"></include>
<include file="struts-sy.xml"></include>
</struts>
jsp页面:
<%@ page language="java" contentType="text/html; charset=UTF-8"
pageEncoding="UTF-8"%>
<%@ taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c" %>
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<title>Insert title here</title>
</head>
<body>
<form action="${pageContext.request.contextPath}/sy/blog_list.action"
method="post">
博客标题:<input type="text" name="title"> <input type="submit"
value="确定">
</form>
<button id="add">添加</button>
<button id="refresh">刷新全局索引</button>
<table border="1" width="100%">
<tr>
<td>编号</td>
<td>名称</td>
<td>价格</td>
<td>操作</td>
</tr>
<c:forEach items="${blogList }" var="blog">
<tr>
<td>${blog.id }</td>
<td>${blog.title }</td>
<td><a href="${blog.url }">${blog.title }</a></td>
<td>
<a href="">修改</a>
<a href="">删除</a>
</td>
</tr>
</c:forEach>
</table>
</body>
</html>
查询效果图:
未完待续…