Lucene4.2.1示例,之前也做过3.6的示例。3.6的分词需要使用IKAnalyzer或者其他的分词,对中文的支持可能才会更好,但是4.2为我们提供了SmartChineseAnalyzer这个中文分词器。
下面是一个简单的示例程序,分别对应增删改查:
package com.xiva.test.lucene;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
public class IvFileIndex
{
private static List<File> fileList = new ArrayList<File>(1024);
public static void listAllFile(File fileDir)
{
File[] files = fileDir.listFiles();
for (File file : files)
{
if (file.isDirectory())
{
listAllFile(file);
}
else
{
fileList.add(file);
}
}
}
public static void main(String[] args) throws Exception
{
File fileDir = new File("F:\\WorkSpace");
File indexDir = new File("F:\\WorkSpace\\EclipseProjects\\luceneIndex");
Analyzer luceneAnalyzer = new SmartChineseAnalyzer(Version.LUCENE_42);
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_42, luceneAnalyzer);
config.setOpenMode(org.apache.lucene.index.IndexWriterConfig.OpenMode.CREATE);
Directory fsDir = new SimpleFSDirectory(indexDir);
IndexWriter indexWriter = new IndexWriter(fsDir, config);
listAllFile(fileDir);
long startTime = new Date().getTime();
indexWriter.deleteAll();
// 增加document到索引去
for (File txtFile : fileList)
{
if (txtFile.isFile() && txtFile.getName().endsWith(".java"))
{
System.out.println(txtFile.getName());
FileInputStream fis = null;
try
{
fis = new FileInputStream(txtFile);
}
catch (FileNotFoundException fnfe)
{
continue;
}
try
{
Document document = new Document();
Field fieldPath = new StringField("path", txtFile.getPath(), Field.Store.YES);
Field fieldBody = new TextField("body", new BufferedReader(new InputStreamReader(fis, "GBK")));
document.add(fieldPath);
document.add(fieldBody);
indexWriter.addDocument(document);
}
finally
{
fis.close();
}
System.out.println("被索引文件:" + txtFile.getCanonicalPath());
}
}
// 对索引进行优化
indexWriter.forceMerge(10);
indexWriter.close();
// 测试一下索引的时间
long endTime = new Date().getTime();
System.out.println("索引耗费时间:" + (endTime - startTime) + " 毫秒!");
}
}
package com.xiva.test.lucene;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.util.Date;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
/**
*
* 删除索引
* @author xiva
* @version [版本号, 2013-4-30]
* @see [相关类/方法]
* @since [产品、模块版本]
*/
public class IvIndexDelete
{
public static void main(String[] args) throws Exception
{
File fileDir = new File("E:\\data\\lucene");
File indexDir = new File("E:\\data\\index");
Analyzer luceneAnalyzer = new SmartChineseAnalyzer(Version.LUCENE_42);
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_42,
luceneAnalyzer);
config.setOpenMode(org.apache.lucene.index.IndexWriterConfig.OpenMode.APPEND);
Directory fsDir = new SimpleFSDirectory(indexDir);
IndexWriter indexWriter = new IndexWriter(fsDir, config);
File[] txtFiles = fileDir.listFiles();
long startTime = new Date().getTime();
// 增加document到索引去
for (int i = 0; i < txtFiles.length; i++)
{
if (txtFiles[i].isFile() && txtFiles[i].getName().endsWith("u.txt"))
{
FileInputStream fis = null;
try
{
fis = new FileInputStream(txtFiles[i]);
}
catch (FileNotFoundException fnfe)
{
continue;
}
try
{
indexWriter.deleteDocuments(new Term("path",
txtFiles[i].getPath()));
}
finally
{
fis.close();
}
System.out.println("被删除索引文件:" + txtFiles[i].getCanonicalPath());
}
}
indexWriter.forceMerge(10);
indexWriter.close();
//测试一下索引的时间
long endTime = new Date().getTime();
System.out.println("删除索引耗费时间:" + (endTime - startTime) + " 毫秒!");
}
}
package com.xiva.test.lucene;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStreamReader;
import java.util.Date;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
public class IvIndexUpdate
{
public static void updateIndex() throws Exception
{
File fileDir = new File("E:\\data\\lucene");
File indexDir = new File("E:\\data\\index");
Analyzer luceneAnalyzer = new SmartChineseAnalyzer(Version.LUCENE_42);
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_42, luceneAnalyzer);
config.setOpenMode(org.apache.lucene.index.IndexWriterConfig.OpenMode.APPEND);
Directory fsDir = new SimpleFSDirectory(indexDir);
IndexWriter indexWriter = new IndexWriter(fsDir, config);
File[] txtFiles = fileDir.listFiles();
long startTime = new Date().getTime();
// 增加document到索引去
for (int i = 0; i < txtFiles.length; i++)
{
if (txtFiles[i].isFile() && txtFiles[i].getName().endsWith("u.txt"))
{
FileInputStream fis;
try
{
fis = new FileInputStream(txtFiles[i]);
}
catch (FileNotFoundException fnfe)
{
continue;
}
try
{
Document document = new Document();
Field fieldPath = new StringField("path", txtFiles[i].getPath(), Field.Store.YES);
Field fieldBody = new TextField("body", new BufferedReader(new InputStreamReader(fis, "GBK")));
document.add(fieldPath);
document.add(fieldBody);
indexWriter.updateDocument(new Term("path", txtFiles[i].getPath()), document);
}
finally
{
fis.close();
}
System.out.println("被更新索引文件:" + txtFiles[i].getCanonicalPath());
}
}
indexWriter.forceMerge(10);
indexWriter.close();
// 测试一下索引的时间
long endTime = new Date().getTime();
System.out.println("更新索引耗费时间:" + (endTime - startTime) + " 毫秒!");
}
public static void main(String[] args) throws Exception
{
updateIndex();
}
}
package com.xiva.test.lucene;
import java.io.File;
import java.io.IOException;
import java.util.Date;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class IvFileSearch
{
public static void main(String[] args) throws IOException
{
String queryString = "索引";
String field = "body";
Query query = null;
TopDocs docs = null;
File indexDir = new File("F:\\WorkSpace\\EclipseProjects\\luceneIndex");
IndexReader reader = DirectoryReader.open(FSDirectory.open(indexDir));
IndexSearcher searcher = new IndexSearcher(reader);
// StopFilterFactory factory = new StopFilterFactory();
// factory.getStopWords()
Analyzer analyzer = new SmartChineseAnalyzer(Version.LUCENE_42);
try
{
long startTime = new Date().getTime();
QueryParser qp = new QueryParser(Version.LUCENE_42, field, analyzer);
query = qp.parse(queryString);
long endTime = new Date().getTime();
System.out.println("索引耗费时间:" + (endTime - startTime) + " 毫秒!");
}
catch (ParseException e)
{
e.printStackTrace();
}
if (searcher != null)
{
docs = searcher.search(query, 25);// 可以分页查询
ScoreDoc scoreDocs[] = docs.scoreDocs;
for (int i = 0; i < docs.totalHits; i++)
{
Document targetDoc = searcher.doc(scoreDocs[i].doc);
String path = targetDoc.get("path");
System.out.println("path:" + path);
}
}
}
}
PS:对于数据库操作时,相信大家都有相关的方法去更新或者删除索引,比如及时更新或者使用定时扫描表的方法。数据库本身也具有全文索引的特性,比如Oracle和MSSQL。
对与文件的操作,我的解决方法是:可以采用 利用JNA对文件进行监听之观察者模式 这里给出的方法来更新或者删除索引。