package com.lin.util;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.apache.tika.Tika;
import org.apache.tika.exception.TikaException;
import org.wltea.analyzer.lucene.IKAnalyzer;
public class LuceneUtil {
private Log log = LogFactory.getLog(LuceneUtil.class);
private IndexWriter writer;
private IndexReader reader;
private static Tika tika = new Tika();
/**
* 建立索引
* @param srcDriectory 需要建立索引的文件位置
* @param indexDirectory 索引放置位置
* @param analyzer 解析器
* @param version lucene版本
* @param openMode 打开方式(1.创建,2追加,3创建或追加)
* @throws IOException
* @throws TikaException
*/
@SuppressWarnings("deprecation")
public void diskIndex(File srcDriectory,File indexDirectory, Analyzer analyzer, Version version,OpenMode openMode )
throws IOException, TikaException {
if(!indexDirectory.exists()){
indexDirectory.mkdirs();
}
FSDirectory fsd = FSDirectory.open(indexDirectory);
IndexWriterConfig config = new IndexWriterConfig(version, analyzer);
config.setOpenMode(openMode);
writer = new IndexWriter(fsd, config);
List<File> files = FileUtil.listFile(srcDriectory);
Document doc = null;
for (File file : files) {
doc = new Document();
doc.add(new Field("name", file.getName(), Store.YES, Index.ANALYZED));
doc.add(new Field("path", file.getAbsolutePath(), Store.YES,
Index.NO));
doc.add(new Field("content", tikaParseFileToString(file), Store.YES,
Index.ANALYZED));
writer.addDocument(doc);
}
writer.commit();
}
/**
* 获取查询把柄
* @param indexDirectory
* @return
* @throws IOException
*/
public IndexSearcher getIndexSearch(File indexDiretory) throws IOException{
Directory directory = FSDirectory.open(indexDiretory);
return new IndexSearcher(reader.open(directory));
}
public String search(File indexDirectory,String word,Analyzer analyzer) throws IOException, ParseException{
IndexSearcher indexSearch = getIndexSearch(indexDirectory);
QueryParser parser = new QueryParser( "content",analyzer);
Query query = parser.parse(word);
TopDocs docs = indexSearch.search(query, 10);
ScoreDoc[] sds = docs.scoreDocs;
for(ScoreDoc sd:sds){
Document document = indexSearch.doc(sd.doc);
System.out.println("name==========="+document.get("name")+"path==========="+document.get("path")
);
}
return null;
}
public String tikaParseFileToString(File file) throws IOException, TikaException{
return tika.parseToString(file);
}
public static void main(String[] args)throws Exception {
//new LuceneUtil().diskIndex(new File("d:\\lucene"), new File("d:\\luceneIndex"), new IKAnalyzer(), Version.LUCENE_4_10_2, OpenMode.CREATE);
new LuceneUtil().search(new File("d:\\luceneIndex"),"接口",new IKAnalyzer());
Tika tika = new Tika();
String str = tika.parseToString(new FileInputStream("d:\\lucene\\IKAnalyzer中文分词器V2012_FF使用手册.pdf"));
System.out.println(str);
}
}
项目依赖使用maven:
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.lin.project</groupId>
<artifactId>learn</artifactId>
<packaging>war</packaging>
<version>0.0.1-SNAPSHOT</version>
<name>mybatis Maven Webapp</name>
<url>http://maven.apache.org</url>
<properties>
<redis.clients.version>2.6.0</redis.clients.version>
<spring.data.redis.version>1.4.0.RELEASE</spring.data.redis.version>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
<version>1.2</version>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>
<dependency>
<groupId>commons-dbcp</groupId>
<artifactId>commons-dbcp</artifactId>
<version>1.4</version>
</dependency>
<dependency>
<groupId>org.mybatis</groupId>
<artifactId>mybatis</artifactId>
<version>3.2.7</version>
</dependency>
<dependency>
<groupId>org.mybatis</groupId>
<artifactId>mybatis-spring</artifactId>
<version>1.2.2</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-core</artifactId>
<version>4.0.6.RELEASE</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-beans</artifactId>
<version>4.0.6.RELEASE</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-tx</artifactId>
<version>4.0.6.RELEASE</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-aop</artifactId>
<version>4.0.6.RELEASE</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-jdbc</artifactId>
<version>4.0.6.RELEASE</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-webmvc</artifactId>
<version>4.0.6.RELEASE</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-web</artifactId>
<version>4.0.6.RELEASE</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-context-support</artifactId>
<version>4.0.6.RELEASE</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-orm</artifactId>
<version>4.0.6.RELEASE</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-test</artifactId>
<version>4.0.6.RELEASE</version>
</dependency>
<dependency>
<groupId>org.aspectj</groupId>
<artifactId>aspectjweaver</artifactId>
<version>1.8.2</version>
</dependency>
<dependency>
<groupId>jstl</groupId>
<artifactId>jstl</artifactId>
<version>1.2</version>
</dependency>
<dependency>
<groupId>taglibs</groupId>
<artifactId>standard</artifactId>
<version>1.1.2</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.32</version>
</dependency>
<dependency>
<groupId>org.quartz-scheduler</groupId>
<artifactId>quartz</artifactId>
<version>2.2.1</version>
</dependency>
<dependency>
<groupId>org.quartz-scheduler</groupId>
<artifactId>quartz-jobs</artifactId>
<version>2.2.1</version>
</dependency>
<dependency>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-core-asl</artifactId>
<version>1.9.13</version>
</dependency>
<dependency>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-mapper-asl</artifactId>
<version>1.9.13</version>
</dependency>
<dependency>
<groupId>commons-fileupload</groupId>
<artifactId>commons-fileupload</artifactId>
<version>1.3.1</version>
</dependency>
<dependency>
<groupId>redis.clients</groupId>
<artifactId>jedis</artifactId>
<version>${redis.clients.version}</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.springframework.data</groupId>
<artifactId>spring-data-redis</artifactId>
<version>${spring.data.redis.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>4.10.2</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
<version>4.10.2</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>4.10.2</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>4.10.2</version>
</dependency>
<!-- <dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-app</artifactId>
<version>1.6</version>
</dependency> -->
</dependencies>
<build>
<finalName>learn</finalName>
</build>
</project>
添加额外tika和IKAnalyzer的jar包
http://pan.baidu.com/s/1o69fCeQ 提取码:122b
http://pan.baidu.com/s/1hq6AalY 提取码:k3xp