分词是用中科院的分词。
1,对指定的目录,里面的文件建立索引
public class Constants { //要建立索引的文件的存放路径 public static final String INDEX_FILE_PATH = "test" ; //索引存放的位置 public static final String INDEX_STORE_PATH = "index" ; }
public class LuceneIndex { // 索引器对象 private IndexWriter writer = null ; // 在构造函数中建立索引器 public LuceneIndex() { try { Analyzer analyzer = new IKAnalyzer(); writer = new IndexWriter(FSDirectory.open( new File(Constants.INDEX_STORE_PATH)), analyzer, true , IndexWriter.MaxFieldLength.LIMITED); // 有变化的地方 } catch (Exception e) { e.printStackTrace(); } } public Document getDocument(File f) throws Exception { // 生成文档对象 Document doc = new Document(); String text = ""; String line = ""; // 获取文件输入流 FileInputStream input = new FileInputStream(f); BufferedReader bufferedReader = new BufferedReader( new InputStreamReader(input)); line = bufferedReader.readLine(); while(line!= null){ text +=line; line = bufferedReader.readLine(); } bufferedReader.close(); // 添加索引内容 // doc.add(new Field( "content" , bufferedReader)); // Lucene3.0有变化的地方 doc.add(new Field( "content" , text, Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field( "path" , f.getAbsolutePath(), Field.Store.YES, Field.Index.ANALYZED)); // Lucene3.0有变化的地方 return doc; } public void writeToIndex() throws Exception { File folder = new File(Constants.INDEX_FILE_PATH); if (folder.isDirectory()) { String[] files = folder.list(); for ( int i = 0 ; i < files.length; i++) { File file = new File(folder, files[i]); System.out.println("name:"+file); Document doc = getDocument(file); System.out.println("正在建立索引:" + file + " " ); // 添加索引文件 writer.addDocument(doc); } }else { System.out.println("-----folder.isDirectory():false." ); } } public void close() throws Exception { writer.close(); } public static void main(String[] args) throws Exception { // 声明一个对象 LuceneIndex indexer = new LuceneIndex(); // 建立索引 Date start = new Date(); indexer.writeToIndex(); Date end = new Date(); System.out.println("建立索引用时:" + (end.getTime() - start.getTime()) + "毫秒" ); // 关闭索引器 indexer.close(); } }
2,该类用来查询结果public class LuceneSearch { // 声明一个IndexSearcher对象 private IndexSearcher searcher = null; // 声明一个Query对象 private Query query = null; public LuceneSearch() { try { // 创建索引器 searcher = new IndexSearcher(IndexReader.open(FSDirectory .open(new File(Constants.INDEX_STORE_PATH)))); } catch (Exception e) { e.printStackTrace(); } } public final TopDocs search(String keyword) throws IOException { System.out.println("正在搜素关键字:" + keyword); // Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30); Analyzer analyzer = new IKAnalyzer(); showToken(analyzer, keyword); try { QueryParser queryParser = new QueryParser(Version.LUCENE_30, "content", analyzer); // 将待检索关键字打包成Query对象 query = queryParser.parse(keyword); Date start = new Date(); // 使用索引器检索Query,得到检索结果Hits对象 TopDocs hits = searcher.search(query, 30); // 有变化的地方 Date end = new Date(); System.out.println("搜索完毕用时:" + (end.getTime() - start.getTime()) + "毫秒"); return hits; } catch (Exception ex) { return null; } } // 该方法用于查看分词结果 public static void showToken(Analyzer analyzer, String text) throws IOException { Reader reader = new StringReader(text); TokenStream stream = (TokenStream) analyzer.tokenStream("", reader); // 添加工具类 注意:以下这些与之前lucene2.x版本不同的地方 TermAttribute termAtt = (TermAttribute) stream .addAttribute(TermAttribute.class); OffsetAttribute offAtt = (OffsetAttribute) stream .addAttribute(OffsetAttribute.class); // 循环打印出分词的结果,及分词出现的位置 while (stream.incrementToken()) { System.out.print(termAtt.term() + "|(" + offAtt.startOffset() + " " + offAtt.endOffset() + ")"); } System.out.println(); } public void printResult(TopDocs hits) throws CorruptIndexException, IOException, ParseException { Analyzer analyzer = new IKAnalyzer(); if (hits.totalHits == 0) { System.out.println("没有找到您需要的结果"); return; } else { for (int i = 0; i < hits.scoreDocs.length; i++) { try { ScoreDoc scoreDoc = hits.scoreDocs[i];// 有变化的地方 Document doc = searcher.doc(scoreDoc.doc);// 有变化的地方 String text = doc.get("content"); System.out.print("这是第" + (i + 1) + "个检索结果,文件路径为:"); System.out.println(doc.get("path")); System.out.println(text); } catch (Exception ex) { ex.printStackTrace(); } } } System.out.println("--------------------------------"); } public static void main(String[] args) throws Exception { LuceneSearch test = new LuceneSearch(); TopDocs hits = null; try { hits = test.search("楚"); test.printResult(hits); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (ParseException e) { e.printStackTrace(); } } }