maven dependency:
<dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> <version>4.3.0</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-common</artifactId> <version>4.3.0</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-queryparser</artifactId> <version>4.3.0</version> </dependency>
package com.tch.test.lucene.ram;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
public final class TestLucene {
private static Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43);
public static void main(String[] args) throws Exception {
searchOnDisk();
searchOnMemory();
}
public static void searchOnDisk() throws IOException {
Directory directory = indexOnDisk("D:\\lucene-test\\source",
"D:\\lucene-test\\index");
search(directory);
directory.close();
}
public static void searchOnMemory() throws IOException {
Directory directory = indexOnMemory("D:\\lucene-test\\source");
search(directory);
directory.close();
}
public static void search(Directory directory) {
try {
// Now search the index:
DirectoryReader ireader = DirectoryReader.open(directory);
IndexSearcher isearcher = new IndexSearcher(ireader);
// Parse a simple query that searches for "text":
QueryParser parser = new QueryParser(Version.LUCENE_43, "content",
analyzer);
Query query = parser.parse("如何进行主题抓取");
ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs;
// Iterate through the results:
for (int i = 0; i < hits.length; i++) {
Document hitDoc = isearcher.doc(hits[i].doc);
System.out.println(hitDoc.get("fileName"));
System.out.println(hitDoc.get("content"));
}
ireader.close();
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e) {
e.printStackTrace();
}
}
public static Directory indexOnDisk(String sourceDir, String indexDir) {
// Store the index in memory:
// Directory directory = new RAMDirectory();
// To store an index on disk, use this instead:
Directory directory = null;
try {
directory = FSDirectory.open(new File(indexDir));
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43,
analyzer);
config.setOpenMode(OpenMode.CREATE);
IndexWriter iwriter = new IndexWriter(directory, config);
File[] textFiles = new File(sourceDir).listFiles();
for (int i = 0; i < textFiles.length; i++) {
File currentFile = textFiles[i];
System.out.println(String.format("开始在文件 %s 上创建索引",
currentFile.getAbsolutePath()));
Document doc = new Document();
doc.add(new Field("content", readFileContent(
currentFile.getAbsolutePath(), "GBK"),
TextField.TYPE_STORED));
doc.add(new Field("fileName", currentFile.getAbsolutePath(),
TextField.TYPE_STORED));
iwriter.addDocument(doc);
}
iwriter.close();
} catch (IOException e) {
e.printStackTrace();
}
return directory;
}
public static Directory indexOnMemory(String sourceDir) {
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43);
// Store the index in memory:
Directory directory = new RAMDirectory();
try {
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43,
analyzer);
IndexWriter iwriter = new IndexWriter(directory, config);
File[] textFiles = new File(sourceDir).listFiles();
for (int i = 0; i < textFiles.length; i++) {
File currentFile = textFiles[i];
System.out.println(String.format("开始在文件 %s 上创建索引",
currentFile.getAbsolutePath()));
Document doc = new Document();
doc.add(new Field("content", readFileContent(
currentFile.getAbsolutePath(), "GBK"),
TextField.TYPE_STORED));
doc.add(new Field("fileName", currentFile.getAbsolutePath(),
TextField.TYPE_STORED));
iwriter.addDocument(doc);
}
iwriter.close();
} catch (IOException e) {
e.printStackTrace();
}
return directory;
}
public static String readFileContent(String FileName, String charset)
throws IOException {
BufferedReader reader = new BufferedReader(new InputStreamReader(
new FileInputStream(FileName), charset));
String line = new String();
StringBuilder content = new StringBuilder();
while ((line = reader.readLine()) != null) {
content.append(line);
}
reader.close();
return content.toString();
}
}