搜索技术--建立索引(lucene3.0)

 
搜索技术--建立索引(lucene3.0)(2011-01-11 14:52:53)转载标签: indexwriteranalyzerdocumentfield搜索引擎索引文档域javait 分类: JAVA高级软件工程师  
导言:从数据库中取出数据,预先进行处理;按照 IndexWriter---->Analyzer---->Docoument----->Field 的顺序对数据建立索引。

利用索引完成搜索,请查看:搜索技术--搜索、排序并高亮显示(lucene)

package com.jrj.datamart.service;

import java.io.File;
import java.sql.ResultSet;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import com.jrj.datamart.model.ApiIndexEntity;
import com.jrj.datamart.model.ApiIndicator;
import com.jrj.datamart.model.ApiInfo;
import com.jrj.datamart.model.ApiInfoQuery;

// 对datamart的元数据的建立索引;并给每个表添加,表的说明和字段的中文名;

//方便查询 索引 Lucene 3.0.2
public class IndexerServiceImp implements IndexerService {

 // 保存索引文件的地方
 private String INDEX_DIR = "F:\\MyLuceneMD4\\LuceneFileIndexDir";
 private String index_dir;
 // private File file=new File(INDEX_DIR);
 private String data_dir;
 private ApiInfo apiInfo;
 private ApiInfoQuery apiInfoQuery = new ApiInfoQuery();
 private ApiInfoService apiInfoService;
 private ApiPrepareService apiPrepareService;
 private List<ApiIndicator> apiIndicators = new ArrayList<ApiIndicator>();
 private ApiIndicator apiIndicator;
 private StringBuilder newsb = new StringBuilder();
 private ResultSet tempRs = null;
 private Document doc;
 private Map<Integer, String> idFieldMap = new HashMap<Integer, String>();
 // 表的字段集合
 private List<String> outputFileds = new ArrayList<String>();
 // lucene的field集合
 private List<Field> fields = new ArrayList<Field>();

//struts2下的主方法

 @Override
 public String execute() throws Exception {
  // File file = getFile(getAbsoultePath());
  File file = getFile(null);
  long start = new Date().getTime();
  // 2
  int numIndexed = getAllDataAndIndexing(file);
  long end = new Date().getTime();
  System.out.println("Indexing " + numIndexed + " files took "
    + (end - start) + " milliseconds");
  return "success";
 }

 @Override

//从数据库中取得所有需要的数据,并建所有
 public int getAllDataAndIndexing(File file) throws Exception {
  SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer(
    Version.LUCENE_30, true);
  IndexWriter writer = new IndexWriter(FSDirectory.open(file), analyzer,
    true, IndexWriter.MaxFieldLength.LIMITED);

  // 获取表中的数据; 对数据进行处理,使得其变为索引需要的数据的结构。
  List<ApiIndexEntity> apiIndexEntityLists = gainApiInfoAndComposite();
  // 建立索引
  indexData(writer, apiIndexEntityLists);
  int numIndexed = writer.numDocs();
  writer.optimize();
  writer.close();
  // JDBCUtil.close();
  return numIndexed;
 }

// 对特定表的记录,采用特定writer,索引该方法是lucene的indexer的关键方法
// writer---->document---->field
 private void indexData(IndexWriter writer,
   List<ApiIndexEntity> apiIndexEntityLists) throws Exception {
  if (apiIndexEntityLists == null) {
   return;
  }
  ApiIndexEntity apiIndexEntity;
  // 遍历结果集,建立三个域:内容(contents),ApiId,API中文名
  for (int k = 0; k < apiIndexEntityLists.size(); k++) {
   apiIndexEntity = apiIndexEntityLists.get(k);
   if (apiIndexEntity == null) {
    continue;
   }
   doc = new Document();
   Field fieldIcnname=new Field("icnname",
     (apiIndexEntity.getIcnname() == null ? "" : apiIndexEntity
       .getIcnname()), Field.Store.YES,
     Field.Index.ANALYZED);
   fieldIcnname.setBoost(10);
   doc.add(fieldIcnname);
   doc.add(new Field("contents",
     apiIndexEntity.getContents() == null ? "" : apiIndexEntity
       .getContents(), Field.Store.YES,
     Field.Index.ANALYZED));
   doc.add(new Field("apiid", apiIndexEntity.getApiid() == null ? ""
     : apiIndexEntity.getApiid().toString(), Field.Store.YES,
     Field.Index.ANALYZED));
   writer.addDocument(doc);
  }
 }

//处理数据库中的数据,使得满足建索引和浏览结果的要求

//将ID、多字段值组合为map

 @Override
 public Map<Integer, String> getIdFieldsMap() {
  ApiIndicator myapiIndicator = null;
  Integer myapiid;
  // System.out.println(" apiPrepareService:"+ apiPrepareService);
  // 获取apiid数组或list
  apiIndicators = apiPrepareService.gainApiIndicatorIDCnnameList();
  List<Integer> apiids = apiPrepareService.gainApiids();
  for (int i = 0; i < apiids.size(); i++) {
   // System.out.println("apiids.get(i): " + apiids.get(i));
  }
  for (int h = 0; h < apiids.size(); h++) {
   StringBuffer mysb = new StringBuffer();
   myapiid = apiids.get(h);
   // System.out.println("apiid: " + myapiid);
   for (int k = 0; k < apiIndicators.size(); k++) {
    myapiIndicator = apiIndicators.get(k);
    if (myapiid.equals(myapiIndicator.getApiid())) {
     // System.out.println("myapiIndicator.getCnname(): "+myapiIndicator.getCnname());
     mysb.append(myapiIndicator.getCnname() + " ");
    }
   }
   // System.out.println("mysb.toString(): "+mysb.toString());
   idFieldMap.put(myapiid, mysb.toString());
  }
  return idFieldMap;
 }

//将api名、api描述和api的多个属性结合起来

 @Override
 public List<ApiIndexEntity> gainApiInfoAndComposite() {
  List<ApiIndexEntity> apiIndexEntityLists = new ArrayList<ApiIndexEntity>();
  ApiIndexEntity apiIndexEntity;
  Integer apiid;
  String fields;
  idFieldMap = getIdFieldsMap();
  List<ApiInfo> apiInfoLists = apiInfoService.gainApiInfoList();
  for (int i = 0; i < apiInfoLists.size(); i++) {
   StringBuilder contents = new StringBuilder();
   apiIndexEntity = new ApiIndexEntity();
   apiInfo = apiInfoLists.get(i);
   apiid = apiInfo.getId();
   fields = idFieldMap.get(apiid); // fix it. problematic....
   // contents.append(apiInfo.getCnname() + " ").
   contents.append(
     apiInfo.getDescription() + " \n" + apiInfo.getCnname()
       + "包含以下信息: ").append(fields);
   apiIndexEntity.setApiid(apiid);
   apiIndexEntity.setIcnname(apiInfo.getCnname());
   apiIndexEntity.setContents(contents.toString());
   apiIndexEntityLists.add(apiIndexEntity);
  }
  return apiIndexEntityLists;
 }

 @Override
 public String getIndex_dir() {
  return index_dir;
 }

 @Override
 public void setIndex_dir(String index_dir) {
  this.index_dir = index_dir;
 }

 @Override
 public String getData_dir() {
  return data_dir;
 }

 @Override
 public void setData_dir(String data_dir) {
  this.data_dir = data_dir;
 }

 @Override
 public List<String> getOutputFileds() {
  return outputFileds;
 }

 @Override
 public void setOutputFileds(List<String> outputFileds) {
  this.outputFileds = outputFileds;
 }

 

 @Override
 public File getFile(String path) {
  // if (path == null) {
  // return null;
  // }
  // if (path.indexOf("\") != -1) {
  // return new File(path, "f:\\lucene\\luceneIndexDir");
  return new File("f:\\lucene\\luceneIndexDir");
  // } else {
  // return new File(path, "/mylucene/luceneIndexDir");
  // }
 }

 @Override
 public String getAbsoultePath() {
  // ActionContext ac = ActionContext.getContext();
  // ServletContext sc = (ServletContext) ac
  // .get(ServletActionContext.SERVLET_CONTEXT);
  // String path = sc.getRealPath("/");
  // System.out.println("path: " + path);
  return null;
 }

 @Override
 public String getINDEX_DIR() {
  return INDEX_DIR;
 }

 @Override
 public String getINDEX_DIRLinux() {
  return INDEX_DIRLinux;
 }

 @Override
 public ApiInfo getApiInfo() {
  return apiInfo;
 }

 @Override
 public ApiInfoQuery getApiInfoQuery() {
  return apiInfoQuery;
 }

 @Override
 public ApiInfoService getApiInfoService() {
  return apiInfoService;
 }

 @Override
 public ApiPrepareService getApiIndicatorService() {
  return apiPrepareService;
 }

 @Override
 public List<ApiIndicator> getApiIndicators() {
  return apiIndicators;
 }

 @Override
 public ApiIndicator getApiIndicator() {
  return apiIndicator;
 }

 @Override
 public StringBuilder getNewsb() {
  return newsb;
 }

 @Override
 public ResultSet getTempRs() {
  return tempRs;
 }

 @Override
 public Document getDoc() {
  return doc;
 }

 @Override
 public Map<Integer, String> getIdFieldMap() {
  return idFieldMap;
 }

 @Override
 public List<Field> getFields() {
  return fields;
 }

 @Override
 public void setINDEX_DIR(String iNDEX_DIR) {
  INDEX_DIR = iNDEX_DIR;
 }

 @Override
 public void setINDEX_DIRLinux(String iNDEX_DIRLinux) {
  INDEX_DIRLinux = iNDEX_DIRLinux;
 }

 @Override
 public void setApiInfo(ApiInfo apiInfo) {
  this.apiInfo = apiInfo;
 }

 @Override
 public void setApiInfoQuery(ApiInfoQuery apiInfoQuery) {
  this.apiInfoQuery = apiInfoQuery;
 }

 @Override
 public void setApiInfoService(ApiInfoService apiInfoService) {
  this.apiInfoService = apiInfoService;
 }

 @Override
 public void setApiIndicators(List<ApiIndicator> apiIndicators) {
  this.apiIndicators = apiIndicators;
 }

 @Override
 public void setApiIndicator(ApiIndicator apiIndicator) {
  this.apiIndicator = apiIndicator;
 }

 @Override
 public void setNewsb(StringBuilder newsb) {
  this.newsb = newsb;
 }

 @Override
 public void setTempRs(ResultSet tempRs) {
  this.tempRs = tempRs;
 }

 @Override
 public void setDoc(Document doc) {
  this.doc = doc;
 }

 @Override
 public void setIdFieldMap(Map<Integer, String> idFieldMap) {
  this.idFieldMap = idFieldMap;
 }

 @Override
 public void setFields(List<Field> fields) {
  this.fields = fields;
 }

 @Override
 public ApiPrepareService getApiPrepareService() {
  return apiPrepareService;
 }

 @Override
 public void setApiPrepareService(ApiPrepareService apiPrepareService) {
  this.apiPrepareService = apiPrepareService;
 }

 

}



  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值