运行weblucene.war需要先运行
java org.apache.lucene.demo.IndexHTML -create -index "D:\eclipse" D:\llunece\index"
(D:\eclipse这个文件夹是数据文件所在目录) 后面那个是索引所在目录,然后启动tomcat ,查询英文单词可以显示结果,但是输入中文就没有结果
http://localhost:8080/lucene2.4/results.jsp?query=%D3%A2%CE%C4&maxresults=100(输入中文就成乱码了)
queryString=new String(queryString.getBytes("iso8859-1"),"gb2312");
设置maxresult的大小可以实现象分页的样式
http://www.iteye.com/topic/326422
希望简单和SOA风格的话就用solr
希望和hibernate或者JPA集成就用compass
1.命令:java org.apache.lucene.demo.IndexHTML -create -index "D:\eclipse" D:\llunece\index" ...
命令不对。用法是这样的:IndexHTML [-create] [-index <index>] <root_directory>
比如 IndexHTML -create -index E:\EclipseSDK\workspace\CoreLabView\WEB-INF\LuceneIndex E:\EclipseSDK\workspace\CoreLabView\help\output
注意<index>是将要被创建的索引的保存目录,而<root_directory>则是你的数据文件所在的目录
To use Lucene, an application should:
- Create Documents by adding Fields;
- Create an IndexWriter and add documents to it with addDocument();
- Call QueryParser.parse() to build a query from a string; and
- Create an IndexSearcher and pass the query to its search() method.
中文分词组件下载:http://d.download.csdn.net/down/160753/linliangyi2006
MIK_CAnalyzer mkAnalyzer = new MIK_CAnalyzer();
IndexWriter writer = new IndexWriter(directory,mkAnalyzer,true);
http://www.iteye.com/topic/418685
http://phantom.iteye.com/blog/66068
//采用正向最大匹配的中文分词算法
Analyzer analyzer = new MMAnalyzer();
QueryParser parser = new QueryParser(fieldName, analyzer);
Query query = parser.parse("印度尼西亚 6.2级地震");//检索词(检索内容并不连在一起,中间有空格)
http://www.chedong.com/tech/lucene.html
http://pan-java.iteye.com/blog/364513
http://www.jdon.com/jivejdon/query/taggedThreadList.shtml?tagID=577
一.创建索引:
索引可以保存在磁盘,也可以保存在内存。
http://www.iteye.com/topic/415288
Exception in thread "main" java.io.IOException: 设备未就绪。??
Directory index = FSDirectory.getDirectory(new File(indexDir));这一行??
我的d盘不存在
2. doc.add(new Field("content",curArt.getContent(),Field.Store.NO,Field.Index.TOKENIZED));
//content:不存储但分词。
Query query2 = new TermQuery(new Term("content", "南海"));//查询能使用它,但是
doc2.get("content")=null
二.高亮显示:
public class MyHighLighter {
private String indexPath = "F:\\index";
private Analyzer analyzer;
private IndexSearcher searcher;
public MyHighLighter(){
analyzer = new ThesaurusAnalyzer(); //用这个分析器可以高亮显示
}
public void createIndex() throws IOException { // 该方法建立索引
IndexWriter writer = new IndexWriter(indexPath,analyzer,true);
Document docA = new Document();
String fileTextA = "因为火烧云总是燃烧着消失在太阳冲下地平线的时刻,然后便是宁静的自然的天籁,没有谁会在这样的时光的镜片里伤感自语,因为灿烂给人以安静的舒适感。";
Field fieldA = new Field("contents", fileTextA, Field.Store.YES,Field.Index.TOKENIZED);
docA.add(fieldA);
writer.addDocument(docA);
writer.optimize();
writer.close();
}
public void search(String fieldName,String keyword) throws CorruptIndexException, IOException, ParseException{ // 检索的方法,并实现高亮显示
searcher = new IndexSearcher(indexPath);
QueryParser queryParse = new QueryParser(fieldName, analyzer); // 构造QueryParser,解析用户输入的检索关键字
Query query = queryParse.parse(keyword);
Hits hits = searcher.search(query);
for(int i=0;i<hits.length();i++){
Document doc = hits.doc(i);
String text = doc.get(fieldName);
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");
Highlighter highlighter = new Highlighter(simpleHTMLFormatter,new QueryScorer(query));
highlighter.setTextFragmenter(new SimpleFragmenter(text.length()));
if (text != null) {
TokenStream tokenStream = analyzer.tokenStream(fieldName,new StringReader(text));
String highLightText = highlighter.getBestFragment(tokenStream, text);
System.out.println("★高亮显示第 "+(i+1) +" 条检索结果如下所示:");
System.out.println(highLightText);
}
}
searcher.close();
}
public static void main(String[] args) { // 测试主函数
MyHighLighter mhl = new MyHighLighter();
try {
mhl.createIndex();
mhl.search("contents", "因为");
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e) {
e.printStackTrace();
}
}
}
http://idealab.iteye.com/blog/365869
http://tech.ddvip.com/2008-10/122343242974974.html deleteIndex