lucene小练习

最新推荐文章于 2024-09-27 00:45:01 发布

weixin_33708432

最新推荐文章于 2024-09-27 00:45:01 发布

阅读量56

点赞数

文章标签： java python

原文链接：https://my.oschina.net/chenliyong/blog/689043

版权

2019独角兽企业重金招聘Python工程师标准>>>

工具类======================================》

public class LuceneUtil {
   //设为私有，防止外部实例化这个类的对象
   private LuceneUtil(){}

   private static Directory directory;
   //版本
   private static Version version;
   private static Analyzer analyZer;
   private static MaxFieldLength maxFieldLength;
   static{
       try {
           directory=FSDirectory.open(new File("E:/testLucene"));
           version=Version.LUCENE_30;
           analyZer=new StandardAnalyzer(version);
           maxFieldLength=MaxFieldLength.LIMITED;
       } catch (IOException e) {
           e.printStackTrace();
       }
   }
   public static <T> Document java2document(T t) throws NoSuchMethodException, SecurityException, IllegalAccessException, IllegalArgumentException, InvocationTargetException{
       //获得字节码
       Class clazz=t.getClass();
       //获得字段
       java.lang.reflect.Field[] fields=clazz.getDeclaredFields();
       Document document=new Document();
       for(java.lang.reflect.Field field:fields){
           field.setAccessible(true);
           String fieldName=field.getName();
           String firstName=fieldName.substring(0, 1).toUpperCase();
           String lastName=fieldName.substring(1);
           String newName="get"+firstName+lastName;
           Method method=clazz.getMethod(newName,null);
           //执行方法
           String value=method.invoke(t,null).toString();
           //添加到document中去
           document.add(new Field(fieldName,value,Store.YES,Index.ANALYZED));
//           System.out.println(document.get(fieldName));
       }
       return document;
   }
   public static <T> T document2java(Document document,Class<T> clazz) throws InstantiationException, IllegalAccessException, NoSuchMethodException, SecurityException, IllegalArgumentException, InvocationTargetException{
       java.lang.reflect.Field[] fields=clazz.getDeclaredFields();
       T t=clazz.newInstance();
       for(java.lang.reflect.Field field:fields){
           field.setAccessible(true);
           String fieldName=field.getName();
           String value=document.get(fieldName);
           BeanUtils.setProperty(t,fieldName,value);
       }
       return t;
   }
   public Directory getDirectory() {
       return directory;
   }
   public void setDirectory(Directory directory) {
       LuceneUtil.directory = directory;
   }
   public Version getVersion() {
       return version;
   }
   public void setVersion(Version version) {
       LuceneUtil.version = version;
   }
   public Analyzer getAnalyZer() {
       return analyZer;
   }
   public void setAnalyZer(Analyzer analyZer) {
       LuceneUtil.analyZer = analyZer;
   }
   public MaxFieldLength getMaxFieldLength() {
       return maxFieldLength;
   }
   public void setMaxFieldLength(MaxFieldLength maxFieldLength) {
       LuceneUtil.maxFieldLength = maxFieldLength;
   }
}

没用工具类的操作================================================

public class FirstLucene {
//   @Test
   public void createLucene() throws CorruptIndexException, LockObtainFailedException, IOException{
       Article article=new Article(1,"高考新闻","高考第一天");
       //创建document对象
       Document document=new Document();
       //将Article中的三个属性绑定到Document中
       /*
       * 参数一：document对象中的属性名叫xid,article对象中的属性名叫id，项目中提倡相同
       * 参数二：document对象中的属性xid的值，与article对象中相同
       * 参数三：是否将xid属性值存入由原始表中转存入词汇表
       *            Store.YES表示该属性值会存入词汇表
       *            Store.NO表示该属性值不会存入词汇表
       *            项目中提倡非id值都存入词汇表中
       * 参数四：是否将xid属性值进行分词算法
       *            Index.ANALYZED表示该属性值会进行词汇拆分
       *            Index.NOT_ANALYZED表示该属性值不会进行词汇拆分
       *            项目中提倡非id值都进行词汇拆分
       */
       document.add(new Field("xid",article.getId()+"",Store.YES,Index.ANALYZED));
       document.add(new Field("xtitle",article.getTitle(),Store.YES,Index.ANALYZED));
       document.add(new Field("xcontent",article.getContent(),Store.YES,Index.ANALYZED));
       /*
       * 参数一：lucene索引库最终应对与银盘中的目录，就是索引库存在硬盘哪儿
       * 参数二：采用什么策略将文本拆分,一个策略就是一个具体的实现类
       * 参数三：最多将文本拆分出多少词汇 LIMITED表示10000
       */
       Directory directory=FSDirectory.open(new File("E:/testLucene"));
       //版本
       Version version=Version.LUCENE_30;
       Analyzer analyZer=new StandardAnalyzer(version);
       MaxFieldLength maxFieldLength=MaxFieldLength.LIMITED;
       //写操作对象
       IndexWriter indexWriter=new IndexWriter(directory,analyZer,maxFieldLength);
       //将document对象写入lucene索引库
       indexWriter.addDocument(document);
       //关闭字符流
       indexWriter.close();
   }
   @Test
   public void queryLucene() throws IOException, ParseException{
       String keyWord="高考";
       List<Article> articleList=new ArrayList<Article>();

       /*
       * 参数一：lucene索引库最终应对与银盘中的目录，就是索引库存在硬盘哪儿
       * 参数二：采用什么策略将文本拆分,一个策略就是一个具体的实现类
       * 参数三：最多将文本拆分出多少词汇 LIMITED表示10000
       */
       Directory directory=FSDirectory.open(new File("E:/testLucene"));
       //版本
       Version version=Version.LUCENE_30;
       Analyzer analyZer=new StandardAnalyzer(version);
       MaxFieldLength maxFieldLength=MaxFieldLength.LIMITED;
       //查询操作对象
       IndexSearcher indexSearcher=new IndexSearcher(directory);
       /*
       * 参数一：使用分词器的版本，提倡使用最高版本
       * 参数二：针对document对象中的哪个属性进行搜索
       * 参数三：策略
       */
       QueryParser queryParser=new QueryParser(version,"xcontent",analyZer);
       Query query=queryParser.parse(keyWord);
       //根据词汇表搜索
       /*
       * 参数一：表示封装关键字查询对象
       * 参数二：MAX_RECORD表示如果根据关键字搜索出来的内容较多，只取前MAX_RECORD个内容
       *                不足MAX_RECORD个数的话，一实际为准
       */
       int MAX_RECORD=100;
       TopDocs topDocs=indexSearcher.search(query, MAX_RECORD);
       //迭代符合情况
       for(int i=0;i<topDocs.scoreDocs.length;i++){
           //获取ScoreDoc对象（封装了编号和分数）
           ScoreDoc scoreDoc=topDocs.scoreDocs[i];
           //取出每一个编号
           int no=scoreDoc.doc;
           //取出分数
           float score=scoreDoc.score;
           //根据编号去原始表查询对应的document对象
           Document document=indexSearcher.doc(no);
           //把获取到的document对象中的三个属性封装到Article对象中
           int xid=Integer.valueOf(document.get("xid"));
           String xtitle=document.get("xtitle");
           String xcontent=document.get("xcontent");
           Article article=new Article(xid,xtitle,xcontent);
           articleList.add(article);
       }

       for(Article article:articleList){
           System.out.println(article.getId());
           System.out.println(article.getTitle());
           System.out.println(article.getContent());
       }
   }
}