package IndexService;
/**
*
* @method Lucene索引操作对象工具类
* @author Mr yi
* @time 2019年5月23日
*/
public class LuceneUtils {
private static Directory directory = null;
// private static IndexWriterConfig indexWriterConfig = null;
private static Analyzer analyzer = null;
private static Version matchVersion = null;
//static 代码块随着类的加载,只加载一次。作用是初始化类。
static{
try {
//在 6.6 以上版本中 version 不再是必要的,并且,存在无参构造方法,可以直接使用默认的 StandardAnalyzer 分词器。
matchVersion = Version.LUCENE_8_5_0;
//索引存放的位置,设置在当前目录中(项目根路径下)
// final String INDEXURL = "E://users//Administrator//eclipse-workspace//searchEngine//indexDir";
// directory = FSDirectory.open(new File(INDEXURL).toPath());/* Paths.get(INDEXURL)*/
//analyzer = new StandardAnalyzer(); // 标准分词器,适用于英文[支持中文采用的方法为单字切分。他会将词汇单元转换成小写形式,并去除停用词和标点符号]
//analyzer = new SmartChineseAnalyzer();//中文分词
//analyzer = new ComplexAnalyzer();//中文分词
//analyzer = new IKAnalyzer();//中文分词
analyzer = new IKAnalyzer();//中文分词
} catch (Exception e) {
e.printStackTrace();
}
}
public static Directory getDirectory() {
return directory;
}
/**
*
* @method 返回用于操作索引的对象
* @author Mr yi
* @time 2019年5月23日
* @return
* @throws Exception
*/
public static IndexWriter getIndexWriter() throws Exception{
//创建索引写入配置
// indexWriterConfig = new IndexWriterConfig(analyzer);
//创建索引写入对象
// IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);
IndexWriter indexWriter = new IndexWriter("E://users//Administrator//eclipse-workspace//searchEngine//indexDir",analyzer, true);
return indexWriter;
}
/**
*
* @method 返回用于读取索引的对象
* @author Mr yi
* @time 2019年5月23日
* @return
* @throws Exception
*/
public static IndexSearcher getIndexSearcher() throws Exception{
IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
return indexSearcher;
}
/**
*
* @method 返回当前版本
* @author Mr yi
* @time 2019年5月23日
* @return
*/
public static Version getMatchVersion() {
return matchVersion;
}
/**
*
* @method 返回当前使用的分词器
* @author Mr yi
* @time 2019年5月23日
* @return
*/
public static Analyzer getAnalyzer() {
return analyzer;
}
/**
*
* @method 将Article数据转换为Document
* @author Mr yi
* @time 2019年5月23日
* @param article 对象
* @return
* @throws IOException
*/
public static Document articleToDocument(News news) throws IOException{
if(news==null)
return null;
Document document = new Document();
StringField identifier = new StringField("newsId",news.getId(),Store.YES);
StringField newsurl = new StringField("newsUrl",news.getUrl(),Store.YES);
TextField newstitle = new TextField("newsTitle",news.getTitle(),Store.YES);
TextField newsdate = new TextField("newsDate",news.getDate(),Store.YES);
TextField newsbody = new TextField("newsBody",news.getBody(),Store.YES);
long mills = System.currentTimeMillis();
StringField indextime = new StringField("indexTime",mills+"",Store.YES);
StringField newsdate2 = new StringField("newDate2",news.getDate().substring(0, 4)+news.getDate().substring(5, 7)+news.getDate().substring(8, 10),Store.YES);
document.add(identifier);
document.add(newsurl);
document.add(newstitle);
document.add(newsdate);
document.add(newsbody);
document.add(indextime);
document.add(newsdate2);
return document;
}
/**
*
* @method 添加索引
* @author Mr yi
* @time 2019年5月24日
* @param document
* @throws Exception
*/
public static void addIndex(String path) throws Exception{
//获取indexWrite对象
IndexWriter indexWriter = LuceneUtils.getIndexWriter();
File folder = new File(path);
if (folder.isDirectory()) {
File[] list = folder.listFiles();
for (File f : list) {
File file = new File("E://users//Administrator//eclipse-workspace//searchEngine//dataDir//"+f.getName());
Newsdao nd=new Newsdao();
ArrayList<String> content = nd.readFileContent(file);
String id = nd.readFileId(file);
News news = new News();
news.setId(id);
// System.out.println(news.getId());
news.setUrl(content.get(0).replace("url:", ""));
// System.out.println(news.getUrl());
news.setTitle(content.get(1).replace("title:", ""));
// System.out.println(news.getTitle());
news.setDate(content.get(2).replace("time:", ""));
// System.out.println(news.getDate());
news.setBody(content.get(3));
// System.out.println(news.getBody());
Document document = LuceneUtils.articleToDocument(news);
try {
//将document写入磁盘中
indexWriter.addDocument(document);
}finally {//定要注意关闭indexWrite. 包括异常下,用finally关闭.否则会导致下一次写索引失败.,修改程序后,直接删除write.lock文件后就可以
indexWriter.close();
}
}}
}
/**
* 添加索引
* @param article
* @throws Exception
*/
public static void main(String[] args) throws Exception{
// IndexWriter indexWriter = LuceneUtils.getIndexWriter();
// News news = new News(); //这里需要给news 对象赋值
String path = "E://users//Administrator//eclipse-workspace//searchEngine//dataDir";
LuceneUtils.addIndex(path);
}
}