开发自己的搜索引擎完成了一段时间了,现在准备开始梳理一下思路,把以前的总结一下,为以后做真正的“谷歌”埋下伏笔,呵呵。。。。。。
一。Lucene的下载
牛逼的Apache旗下的Lucene,呵呵,无人不知啊,http://lucene.apache.org/,去这个地址自己下载,别说不会Dowmload
二.使用Lucene建立索引
将下载下来的包解压,把里面的Core,memory,analyzer啥的都拿出来,配置到自己的Eclipse上面,下面的事情就是写代码了。
- package com.dreamers.creatindex;
- import java.io.File;
- import java.util.ArrayList;
- import org.apache.lucene.analysis.Analyzer;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.store.FSDirectory;
- import org.dom4j.DocumentException;
- import org.wltea.analyzer.lucene.IKAnalyzer;
- import com.dreamers.xml.*;
- import com.dreamers.read.*;
- /**
- * @category 创建所有XML索引
- * @author bird
- *
- */
- public class CreatIndex {
- private String INDEX_STORE_PATH ;
- //创建索引
- @SuppressWarnings("deprecation")
- public void creatIndex(){
- try{
- GetPath path = new GetPath();
- INDEX_STORE_PATH = path.getIndexPath();
- File file = new File(INDEX_STORE_PATH);
- Analyzer analyzer = new IKAnalyzer();
- XmlReader xml = new XmlReader();
- FSDirectory directory = FSDirectory.open(file);
- IndexWriter writer = new IndexWriter(directory, analyzer, true,IndexWriter.MaxFieldLength.LIMITED);
- ArrayList<String> lisId = xml.getId();
- ArrayList<String> lisTitle = xml.getTitle();
- ArrayList<String> lisKeyWords = xml.getKeyWords();
- ArrayList<String> lisKind = xml.getKind();
- ArrayList<String> lisDescribe = xml.getDescribe();
- ArrayList<String> lisDate = xml.getDate();
- ArrayList<String> lisUrl = xml.getUrl();
- ArrayList<String> lisAuthor = xml.getAuthor();
- ArrayList<String> lisPublisher = xml.getPublisher();
- //System.out.println(lisUrl.get(5));
- for (int i = 0; i < xml.getCount();i++){
- Document doc = new Document();
- //为ID创建Field
- Field field = new Field("id",lisId.get(i),Field.Store.YES,Field.Index.NOT_ANALYZED );
- doc.add(field);
- //为title创建索引
- field = new Field("title",lisTitle.get(i),Field.Store.YES,Field.Index.ANALYZED);
- doc.add(field);
- //为keywords创建索引
- field = new Field("keywords",lisKeyWords.get(i),Field.Store.YES,Field.Index.ANALYZED);
- doc.add(field);
- //为kind创建索引
- field = new Field("kind",lisKind.get(i),Field.Store.YES,Field.Index.NOT_ANALYZED);
- doc.add(field);
- //为describe创建索引
- field = new Field("describe",lisDescribe.get(i),Field.Store.YES,Field.Index.ANALYZED);
- doc.add(field);
- //为data创建索引
- field = new Field("date",lisDate.get(i),Field.Store.YES,Field.Index.NOT_ANALYZED);
- doc.add(field);
- //为URL创建索引
- field = new Field("url",lisUrl.get(i),Field.Store.YES,Field.Index.NOT_ANALYZED);
- doc.add(field);
- //为author创建索引
- field = new Field("author",lisAuthor.get(i),Field.Store.YES,Field.Index.NOT_ANALYZED);
- doc.add(field);
- //为publisher创建索引
- field = new Field("publisher",lisPublisher.get(i),Field.Store.YES,Field.Index.NOT_ANALYZED);
- doc.add(field);
- }
- writer.addDocument(doc);
- }
- writer.close();
- //directory.close();
- System.out.println("索引创建完毕");
- } catch (Exception e){
- e.printStackTrace();
- }
- }
- public static void main(String [] args) throws DocumentException{
- CreatIndex index = new CreatIndex();
- index.creatIndex();
- }
- }