搜索技术--建立索引(lucene3.0)(2011-01-11 14:52:53)转载标签: indexwriteranalyzerdocumentfield搜索引擎索引文档域javait 分类: JAVA高级软件工程师
导言:从数据库中取出数据,预先进行处理;按照 IndexWriter---->Analyzer---->Docoument----->Field 的顺序对数据建立索引。
利用索引完成搜索,请查看:搜索技术--搜索、排序并高亮显示(lucene)
package com.jrj.datamart.service;
import java.io.File;
import java.sql.ResultSet;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import com.jrj.datamart.model.ApiIndexEntity;
import com.jrj.datamart.model.ApiIndicator;
import com.jrj.datamart.model.ApiInfo;
import com.jrj.datamart.model.ApiInfoQuery;
// 对datamart的元数据的建立索引;并给每个表添加,表的说明和字段的中文名;
//方便查询 索引 Lucene 3.0.2
public class IndexerServiceImp implements IndexerService {
// 保存索引文件的地方
private String INDEX_DIR = "F:\\MyLuceneMD4\\LuceneFileIndexDir";
private String index_dir;
// private File file=new File(INDEX_DIR);
private String data_dir;
private ApiInfo apiInfo;
private ApiInfoQuery apiInfoQuery = new ApiInfoQuery();
private ApiInfoService apiInfoService;
private ApiPrepareService apiPrepareService;
private List<ApiIndicator> apiIndicators = new ArrayList<ApiIndicator>();
private ApiIndicator apiIndicator;
private StringBuilder newsb = new StringBuilder();
private ResultSet tempRs = null;
private Document doc;
private Map<Integer, String> idFieldMap = new HashMap<Integer, String>();
// 表的字段集合
private List<String> outputFileds = new ArrayList<String>();
// lucene的field集合
private List<Field> fields = new ArrayList<Field>();
//struts2下的主方法
@Override
public String execute() throws Exception {
// File file = getFile(getAbsoultePath());
File file = getFile(null);
long start = new Date().getTime();
// 2
int numIndexed = getAllDataAndIndexing(file);
long end = new Date().getTime();
System.out.println("Indexing " + numIndexed + " files took "
+ (end - start) + " milliseconds");
return "success";
}
@Override
//从数据库中取得所有需要的数据,并建所有
public int getAllDataAndIndexing(File file) throws Exception {
SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer(
Version.LUCENE_30, true);
IndexWriter writer = new IndexWriter(FSDirectory.open(file), analyzer,
true, IndexWriter.MaxFieldLength.LIMITED);
// 获取表中的数据; 对数据进行处理,使得其变为索引需要的数据的结构。
List<ApiIndexEntity> apiIndexEntityLists = gainApiInfoAndComposite();
// 建立索引
indexData(writer, apiIndexEntityLists);
int numIndexed = writer.numDocs();
writer.optimize();
writer.close();
// JDBCUtil.close();
return numIndexed;
}
// 对特定表的记录,采用特定writer,索引该方法是lucene的indexer的关键方法
// writer---->document---->field
private void indexData(IndexWriter writer,
List<ApiIndexEntity> apiIndexEntityLists) throws Exception {
if (apiIndexEntityLists == null) {
return;
}
ApiIndexEntity apiIndexEntity;
// 遍历结果集,建立三个域:内容(contents),ApiId,API中文名
for (int k = 0; k < apiIndexEntityLists.size(); k++) {
apiIndexEntity = apiIndexEntityLists.get(k);
if (apiIndexEntity == null) {
continue;
}
doc = new Document();
Field fieldIcnname=new Field("icnname",
(apiIndexEntity.getIcnname() == null ? "" : apiIndexEntity
.getIcnname()), Field.Store.YES,
Field.Index.ANALYZED);
fieldIcnname.setBoost(10);
doc.add(fieldIcnname);
doc.add(new Field("contents",
apiIndexEntity.getContents() == null ? "" : apiIndexEntity
.getContents(), Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("apiid", apiIndexEntity.getApiid() == null ? ""
: apiIndexEntity.getApiid().toString(), Field.Store.YES,
Field.Index.ANALYZED));
writer.addDocument(doc);
}
}
//处理数据库中的数据,使得满足建索引和浏览结果的要求
//将ID、多字段值组合为map
@Override
public Map<Integer, String> getIdFieldsMap() {
ApiIndicator myapiIndicator = null;
Integer myapiid;
// System.out.println(" apiPrepareService:"+ apiPrepareService);
// 获取apiid数组或list
apiIndicators = apiPrepareService.gainApiIndicatorIDCnnameList();
List<Integer> apiids = apiPrepareService.gainApiids();
for (int i = 0; i < apiids.size(); i++) {
// System.out.println("apiids.get(i): " + apiids.get(i));
}
for (int h = 0; h < apiids.size(); h++) {
StringBuffer mysb = new StringBuffer();
myapiid = apiids.get(h);
// System.out.println("apiid: " + myapiid);
for (int k = 0; k < apiIndicators.size(); k++) {
myapiIndicator = apiIndicators.get(k);
if (myapiid.equals(myapiIndicator.getApiid())) {
// System.out.println("myapiIndicator.getCnname(): "+myapiIndicator.getCnname());
mysb.append(myapiIndicator.getCnname() + " ");
}
}
// System.out.println("mysb.toString(): "+mysb.toString());
idFieldMap.put(myapiid, mysb.toString());
}
return idFieldMap;
}
//将api名、api描述和api的多个属性结合起来
@Override
public List<ApiIndexEntity> gainApiInfoAndComposite() {
List<ApiIndexEntity> apiIndexEntityLists = new ArrayList<ApiIndexEntity>();
ApiIndexEntity apiIndexEntity;
Integer apiid;
String fields;
idFieldMap = getIdFieldsMap();
List<ApiInfo> apiInfoLists = apiInfoService.gainApiInfoList();
for (int i = 0; i < apiInfoLists.size(); i++) {
StringBuilder contents = new StringBuilder();
apiIndexEntity = new ApiIndexEntity();
apiInfo = apiInfoLists.get(i);
apiid = apiInfo.getId();
fields = idFieldMap.get(apiid); // fix it. problematic....
// contents.append(apiInfo.getCnname() + " ").
contents.append(
apiInfo.getDescription() + " \n" + apiInfo.getCnname()
+ "包含以下信息: ").append(fields);
apiIndexEntity.setApiid(apiid);
apiIndexEntity.setIcnname(apiInfo.getCnname());
apiIndexEntity.setContents(contents.toString());
apiIndexEntityLists.add(apiIndexEntity);
}
return apiIndexEntityLists;
}
@Override
public String getIndex_dir() {
return index_dir;
}
@Override
public void setIndex_dir(String index_dir) {
this.index_dir = index_dir;
}
@Override
public String getData_dir() {
return data_dir;
}
@Override
public void setData_dir(String data_dir) {
this.data_dir = data_dir;
}
@Override
public List<String> getOutputFileds() {
return outputFileds;
}
@Override
public void setOutputFileds(List<String> outputFileds) {
this.outputFileds = outputFileds;
}
@Override
public File getFile(String path) {
// if (path == null) {
// return null;
// }
// if (path.indexOf("\") != -1) {
// return new File(path, "f:\\lucene\\luceneIndexDir");
return new File("f:\\lucene\\luceneIndexDir");
// } else {
// return new File(path, "/mylucene/luceneIndexDir");
// }
}
@Override
public String getAbsoultePath() {
// ActionContext ac = ActionContext.getContext();
// ServletContext sc = (ServletContext) ac
// .get(ServletActionContext.SERVLET_CONTEXT);
// String path = sc.getRealPath("/");
// System.out.println("path: " + path);
return null;
}
@Override
public String getINDEX_DIR() {
return INDEX_DIR;
}
@Override
public String getINDEX_DIRLinux() {
return INDEX_DIRLinux;
}
@Override
public ApiInfo getApiInfo() {
return apiInfo;
}
@Override
public ApiInfoQuery getApiInfoQuery() {
return apiInfoQuery;
}
@Override
public ApiInfoService getApiInfoService() {
return apiInfoService;
}
@Override
public ApiPrepareService getApiIndicatorService() {
return apiPrepareService;
}
@Override
public List<ApiIndicator> getApiIndicators() {
return apiIndicators;
}
@Override
public ApiIndicator getApiIndicator() {
return apiIndicator;
}
@Override
public StringBuilder getNewsb() {
return newsb;
}
@Override
public ResultSet getTempRs() {
return tempRs;
}
@Override
public Document getDoc() {
return doc;
}
@Override
public Map<Integer, String> getIdFieldMap() {
return idFieldMap;
}
@Override
public List<Field> getFields() {
return fields;
}
@Override
public void setINDEX_DIR(String iNDEX_DIR) {
INDEX_DIR = iNDEX_DIR;
}
@Override
public void setINDEX_DIRLinux(String iNDEX_DIRLinux) {
INDEX_DIRLinux = iNDEX_DIRLinux;
}
@Override
public void setApiInfo(ApiInfo apiInfo) {
this.apiInfo = apiInfo;
}
@Override
public void setApiInfoQuery(ApiInfoQuery apiInfoQuery) {
this.apiInfoQuery = apiInfoQuery;
}
@Override
public void setApiInfoService(ApiInfoService apiInfoService) {
this.apiInfoService = apiInfoService;
}
@Override
public void setApiIndicators(List<ApiIndicator> apiIndicators) {
this.apiIndicators = apiIndicators;
}
@Override
public void setApiIndicator(ApiIndicator apiIndicator) {
this.apiIndicator = apiIndicator;
}
@Override
public void setNewsb(StringBuilder newsb) {
this.newsb = newsb;
}
@Override
public void setTempRs(ResultSet tempRs) {
this.tempRs = tempRs;
}
@Override
public void setDoc(Document doc) {
this.doc = doc;
}
@Override
public void setIdFieldMap(Map<Integer, String> idFieldMap) {
this.idFieldMap = idFieldMap;
}
@Override
public void setFields(List<Field> fields) {
this.fields = fields;
}
@Override
public ApiPrepareService getApiPrepareService() {
return apiPrepareService;
}
@Override
public void setApiPrepareService(ApiPrepareService apiPrepareService) {
this.apiPrepareService = apiPrepareService;
}
}
搜索技术--建立索引(lucene3.0)
最新推荐文章于 2024-09-23 10:19:58 发布