package com.jtv.oaquery.module.developdocument.web;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.StringReader;
import java.net.URLDecoder;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javassist.bytecode.analysis.Analyzer;
import javax.management.Query;
import javax.swing.text.Highlighter;
import net.paoding.analysis.analyzer.PaodingAnalyzer;
import org.apache.commons.io.FilenameUtils;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.TermVector;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.FSDirectory;
import org.apache.tika.Tika;
import org.apache.tika.metadata.Metadata;
import org.springframework.web.servlet.ModelAndView;
import antlr.TokenStream;
import bsh.ParseException;
import cn.com.jtv.mf.core.web.WebContextHolder;
import cn.com.jtv.mf.core.web.mvc.BaseEntityJsonAction;
import com.drew.metadata.Directory;
import com.jtv.oaquery.module.developdocument.entity.Page;
import com.jtv.oaquery.module.developdocument.util.SVNUtil;
import com.sun.org.apache.xerces.internal.impl.xs.identity.Field;
/**
* 发布主题管理控制器.
* <p>
*
* @version 2016-11-16
* @author liuyy
*/
public class DevelopDocumentAction extends BaseEntityJsonAction {
/**
* 重写query方法跳转到搜索页面
* @author liuyy
* @date 2016-11-16
*/
@Override
public Object query() throws Exception {
return "search";
}
/**
* 用于和SVN建立连接并下载SVN上面的资源
* @author liuyy
* @date 2016-11-16
*/
public List<Map<String,Object>> downloadFile() {
List<Map<String,Object>> list = null;
try {
list = SVNUtil.download();
} catch (Exception e) {
String info = "下载文档失败";
logger.error(info, e);
throw new RuntimeException(info, e);
}
return list;
}
/**
* 对文件夹的内容创建索引
* @author liuyy
* @throws Exception
* @date 2016-11-16
*/
public void createIndex() {
//得到发布之后的Tomcat下的项目的路径
String path = WebContextHolder.getServletContext().getRealPath("/");
Directory directory = null;
IndexWriter writer = null;
File f = new File(path + "doc");
Document doc = null;
try {
//将索引存储在dic文件夹下
directory = FSDirectory.getDirectory(new File(path + "dic"));
writer = new IndexWriter(directory, new PaodingAnalyzer(), true,IndexWriter.MaxFieldLength.UNLIMITED);
//对doc文件夹的文件遍历
for(File file : f.listFiles()) {
//得到文件的名称
String name = file.getName();
if(".svn".equals(name)) {
continue;
}
doc = generatorDoc(file);
if(doc!=null && doc.getFields().size() > 0) {
writer.setMergeFactor(8192);
writer.optimize();
writer.addDocument(doc, new PaodingAnalyzer());
}
}
} catch (Exception e) {
String info = "创建索引失败";
logger.error(info, e);
throw new RuntimeException(info, e);
} finally {
try {
if(writer!=null) writer.close();
} catch (Exception e) {
String info = "创建索引失败";
logger.error(info, e);
throw new RuntimeException(info, e);
}
try {
if(directory != null) directory.close();
} catch (IOException e) {
String info = "创建索引失败";
logger.error(info, e);
throw new RuntimeException(info, e);
}
}
}
/**
* 获取文件最后修改的格式化后的时间
* @param file
* @return
* @author liuyy
* @date 2016-11-16
*/
public String getDate(File file) {
List<Map<String, Object>> list = downloadFile();
Date date = null;
for(int j = 0;j < list.size();j++) {
String svnName = list.get(j).get("name").toString();
String name = file.getName();
if(name.equalsIgnoreCase(svnName)) {
Map<String, Object> map = list.get(j);
date = (Date) map.get("date");
}
}
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
String formatDate = sdf.format(date);
return formatDate;
}
/**
* 获取文件在SVN上面的路径
* @param file
* @return
* @author liuyy
* @throws Exception
* @date 2016-11-16
*/
public String getSvnUrl(File file) throws Exception {
List<Map<String, Object>> list = downloadFile();
String name = file.getName();
String url = null;
for(int j = 0;j < list.size();j++) {
String svnName = list.get(j).get("name").toString();
if(name.equalsIgnoreCase(svnName)) {
url = list.get(j).get("url").toString();
//对url路径进行解码
url = URLDecoder.decode(url, "UTF-8");
}
}
return url;
}
/**
* 遍历文件夹下面的文件并对文件建立索引
* @author liuyy
* @throws Exception
* @date 2016-11-16
*/
public Document generatorDoc(File f) throws Exception {
Document doc = new Document();
Metadata metadata = new Metadata();
if(f.isDirectory()) { //文件夹的话遍历文件夹里面的文件并创建索引
File[] listFiles = f.listFiles();
for(File file : listFiles) {
//获得文件的扩展名
String extension = FilenameUtils.getExtension(file.getName());
//不对bpmn,pptx,rar,jpg扩展名的文件建立索引
if("bpmn".equals(extension) || "pptx".equals(extension) || "rar".equals(extension) || "jpg".equals(extension)) {
continue;
}
//对文件的内容,日期,时间和svn上面的路径创建索引
doc.add(new Field("content",new Tika().parse(new FileInputStream(file),metadata),TermVector.WITH_POSITIONS_OFFSETS));
doc.add(new Field("date",getDate(file),Field.Store.YES,Field.Index.NOT_ANALYZED));
doc.add(new Field("path",file.getAbsolutePath(),Field.Store.YES,Field.Index.NOT_ANALYZED));
doc.add(new Field("svnUrl",getSvnUrl(file),Field.Store.YES,Field.Index.NOT_ANALYZED));
}
} else {//文件的话直接建立索引
doc.add(new Field("content",new Tika().parse(new FileInputStream(f),metadata),TermVector.WITH_POSITIONS_OFFSETS));
doc.add(new Field("date",getDate(f),Field.Store.YES,Field.Index.NOT_ANALYZED));
doc.add(new Field("path",f.getAbsolutePath(),Field.Store.YES,Field.Index.NOT_ANALYZED));
doc.add(new Field("svnUrl",getSvnUrl(f),Field.Store.YES,Field.Index.NOT_ANALYZED));
}
return doc;
}
/**
* 显示当前的page列表
* @param pageNum 当前页
* @param length 总记录数
* @param subList 当前页集合
* @return page对象
* @date 2016-12-05
* @author liuyy
*/
public Page getPageResult(int pageNum,int length,List<Map<String, String>> subList) {
int pageSize = 10;
return new Page(pageNum, pageSize, length, subList);
}
/**
* 获取检索的结果
* @param searcher
* @param query
* @return
* @date 2016-12-05
* @author liuyy
*/
public ScoreDoc[] getScoreDoc(IndexSearcher searcher,Query query) {
TopDocs tds = null;
try {
tds = searcher.search(query, 200);
} catch (IOException e) {
实现从svn上面下载文件对文件建立索引搜索索引的功能,类似于百度
最新推荐文章于 2024-09-30 11:27:43 发布
本文介绍了如何利用Java EE和Eclipse开发一个功能,能够从SVN仓库下载文件并创建索引,实现类似百度的搜索功能。通过对SVN上的文件进行索引,用户可以快速查找并访问所需文件。
摘要由CSDN通过智能技术生成