实现从svn上面下载文件对文件建立索引搜索索引的功能,类似于百度

最新推荐文章于 2024-09-30 11:27:43 发布

把每天当成生命中的最后一天

最新推荐文章于 2024-09-30 11:27:43 发布

阅读量666

点赞数

分类专栏： svn lucene 文章标签： eclipse jar java-ee

本文链接：https://blog.csdn.net/L905128009/article/details/119530509

版权

本文介绍了如何利用Java EE和Eclipse开发一个功能，能够从SVN仓库下载文件并创建索引，实现类似百度的搜索功能。通过对SVN上的文件进行索引，用户可以快速查找并访问所需文件。

摘要由CSDN通过智能技术生成

package com.jtv.oaquery.module.developdocument.web;


import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.StringReader;
import java.net.URLDecoder;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import javassist.bytecode.analysis.Analyzer;

import javax.management.Query;
import javax.swing.text.Highlighter;

import net.paoding.analysis.analyzer.PaodingAnalyzer;

import org.apache.commons.io.FilenameUtils;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.TermVector;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.FSDirectory;
import org.apache.tika.Tika;
import org.apache.tika.metadata.Metadata;
import org.springframework.web.servlet.ModelAndView;

import antlr.TokenStream;
import bsh.ParseException;
import cn.com.jtv.mf.core.web.WebContextHolder;
import cn.com.jtv.mf.core.web.mvc.BaseEntityJsonAction;

import com.drew.metadata.Directory;
import com.jtv.oaquery.module.developdocument.entity.Page;
import com.jtv.oaquery.module.developdocument.util.SVNUtil;
import com.sun.org.apache.xerces.internal.impl.xs.identity.Field;
/**
 * 发布主题管理控制器.
 * <p>
 * 
 * @version 2016-11-16 
 * @author liuyy 
 */
public class DevelopDocumentAction extends BaseEntityJsonAction {  
	
	/**
	 * 重写query方法跳转到搜索页面
	 * @author liuyy
	 * @date 2016-11-16 
	 */
	@Override
	public Object query() throws Exception {
		return "search";
	}
	
	/**
	 * 用于和SVN建立连接并下载SVN上面的资源
	 * @author liuyy
	 * @date 2016-11-16 
	 */
	public List<Map<String,Object>> downloadFile() {
		List<Map<String,Object>> list = null;
		try {
			list = SVNUtil.download();
		} catch (Exception e) {
			String info = "下载文档失败";
			logger.error(info, e);
			throw new RuntimeException(info, e);
		}
		
		return list;
	}
	
	/**
	 * 对文件夹的内容创建索引
	 * @author liuyy
	 * @throws Exception 
	 * @date 2016-11-16 
	 */
	public void createIndex() {
		//得到发布之后的Tomcat下的项目的路径
		String path = WebContextHolder.getServletContext().getRealPath("/");
		Directory directory = null;
		IndexWriter writer = null;
		File f = new File(path + "doc");		
		Document doc = null; 
		 
		try {
			//将索引存储在dic文件夹下
			directory = FSDirectory.getDirectory(new File(path + "dic"));
			
			writer = new IndexWriter(directory, new PaodingAnalyzer(), true,IndexWriter.MaxFieldLength.UNLIMITED);
			//对doc文件夹的文件遍历
			for(File file : f.listFiles()) {
				//得到文件的名称
				String name = file.getName();
				if(".svn".equals(name)) {
					continue;
				}
				doc = generatorDoc(file);
				
				if(doc!=null && doc.getFields().size() > 0) {
					writer.setMergeFactor(8192);
					writer.optimize();
					writer.addDocument(doc, new PaodingAnalyzer());
				}
			}
		} catch (Exception e) {
			String info = "创建索引失败";
			logger.error(info, e);
			throw new RuntimeException(info, e);
		} finally {
			try {
				if(writer!=null) writer.close();
			} catch (Exception e) {
				String info = "创建索引失败";
				logger.error(info, e);
				throw new RuntimeException(info, e);
			}
			 
			try {
				if(directory != null) directory.close();
			} catch (IOException e) {
				String info = "创建索引失败";
				logger.error(info, e);
				throw new RuntimeException(info, e);
			}
		}
	}
	
	/**
	 * 获取文件最后修改的格式化后的时间
	 * @param file
	 * @return
	 * @author liuyy
	 * @date 2016-11-16 
	 */
	public String getDate(File file) {
		List<Map<String, Object>> list = downloadFile();
		Date date = null;
		for(int j = 0;j < list.size();j++) {
			String svnName = list.get(j).get("name").toString();
			String name = file.getName();
			if(name.equalsIgnoreCase(svnName)) {
				Map<String, Object> map = list.get(j);
				date = (Date) map.get("date");
			}
		}
		SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
		String formatDate = sdf.format(date);
		return formatDate;
	}
	
	/**
	 * 获取文件在SVN上面的路径
	 * @param file
	 * @return
	 * @author liuyy
	 * @throws Exception 
	 * @date 2016-11-16 
	 */
	public String getSvnUrl(File file) throws Exception {
		List<Map<String, Object>> list = downloadFile();
		String name = file.getName();
		String url = null;
		for(int j = 0;j < list.size();j++) {
			String svnName = list.get(j).get("name").toString();
			if(name.equalsIgnoreCase(svnName)) {
				url = list.get(j).get("url").toString(); 
				//对url路径进行解码
				url = URLDecoder.decode(url, "UTF-8");
			}
		}
		return url;
	}

	/**
	 * 遍历文件夹下面的文件并对文件建立索引
	 * @author liuyy
	 * @throws Exception 
	 * @date 2016-11-16 
	 */
	public Document generatorDoc(File f) throws Exception {
		
		Document doc = new Document();
		Metadata metadata = new Metadata();
		
		if(f.isDirectory()) { //文件夹的话遍历文件夹里面的文件并创建索引
			File[] listFiles = f.listFiles();
			for(File file : listFiles) {
				//获得文件的扩展名
				String extension = FilenameUtils.getExtension(file.getName());
				
				//不对bpmn,pptx,rar,jpg扩展名的文件建立索引
				if("bpmn".equals(extension) || "pptx".equals(extension) || "rar".equals(extension) || "jpg".equals(extension)) {
					continue;
				}
				//对文件的内容,日期,时间和svn上面的路径创建索引
				doc.add(new Field("content",new Tika().parse(new FileInputStream(file),metadata),TermVector.WITH_POSITIONS_OFFSETS));
				doc.add(new Field("date",getDate(file),Field.Store.YES,Field.Index.NOT_ANALYZED));
				doc.add(new Field("path",file.getAbsolutePath(),Field.Store.YES,Field.Index.NOT_ANALYZED));
				doc.add(new Field("svnUrl",getSvnUrl(file),Field.Store.YES,Field.Index.NOT_ANALYZED));
			}
		} else {//文件的话直接建立索引
			doc.add(new Field("content",new Tika().parse(new FileInputStream(f),metadata),TermVector.WITH_POSITIONS_OFFSETS));
			doc.add(new Field("date",getDate(f),Field.Store.YES,Field.Index.NOT_ANALYZED));
			doc.add(new Field("path",f.getAbsolutePath(),Field.Store.YES,Field.Index.NOT_ANALYZED));
			doc.add(new Field("svnUrl",getSvnUrl(f),Field.Store.YES,Field.Index.NOT_ANALYZED));
		}
		
		return doc;
	}
	
	/**
	 * 显示当前的page列表
	 * @param pageNum 当前页
	 * @param length 总记录数
	 * @param subList 当前页集合
	 * @return page对象
	 * @date 2016-12-05
	 * @author liuyy
	 */
	public Page getPageResult(int pageNum,int length,List<Map<String, String>> subList) {
		int pageSize = 10;
		return new Page(pageNum, pageSize, length, subList);
	}
	
	/**
	 * 获取检索的结果
	 * @param searcher
	 * @param query
	 * @return
	 * @date 2016-12-05
	 * @author liuyy
	 */
	public ScoreDoc[] getScoreDoc(IndexSearcher searcher,Query query) {
		TopDocs tds = null;
		try {
			tds = searcher.search(query, 200);
		} catch (IOException e) {