lucene 索引过程

最新推荐文章于 2024-07-27 17:56:15 发布

weixin_33907511

最新推荐文章于 2024-07-27 17:56:15 发布

阅读量54

点赞数

文章标签： java

原文链接：http://blog.51cto.com/yaomy/1721546

版权

IndexSearcher searcher = null;
        try {
            QueryParser parser = new QueryParser(Version.LUCENE_35,"content",new StandardAnalyzer(Version.LUCENE_35));
            parser.setAllowLeadingWildcard(true);
            Query query = parser.parse("*");
            
        //    TermQuery query = new TermQuery(new Term("content","like"));
            
            searcher = getIndexSearcher();
            TopDocs docs = searcher.search(query, 10);
            System.out.println("一共查詢了 "+docs.totalHits+" 条");
            
            System.out.println("一共有文档 "+reader.maxDoc() +" 条");
            System.out.println("一共有未删除的文档 "+reader.numDocs() +" 条");
            for(ScoreDoc sd:docs.scoreDocs) {
                Document doc = searcher.doc(sd.doc);
                System.out.println(doc.get("id")+"-------------"+doc.get("name")+"---------"+doc.get("email")+"-----------");
            }

document.setBoost(float) 对于通配符的模糊查询无效。

package com.lucene.study;

import java.io.IOException;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;

public class Index0Util {
    private Directory directory;
    private IndexReader reader;

    public Index0Util() {
        directory =  new RAMDirectory();
        index();
    }
    private String[] ids = {"1","2","3","4","5","6"};
    private String[] emails = {"test0@itat.org","b@itat.orx","c@163.org","yao@sina.org","e@126.edu","f@139.org"};
    private String[] contents = {
            "welcome to visited the space,I like book",
            "hello boy, I like pingpeng ball",
            "my name is cc I like game",
            "I like football",
            "I like football and I like basketball too",
            "I like movie and swim"
    };
    private String[] names = {"yaomy","leo","cherry","low","uppder","tiger"};
    
    private int[] attachs = {2,3,1,4,5,5};
    
    public void index() {
        IndexWriter writer = null;
        try {
            IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35,
                    new StandardAnalyzer(Version.LUCENE_35));
            writer = new IndexWriter(directory, config);
        //    writer.deleteAll();
            
            for (int i=0;i<ids.length;i++) {
                Document document = new Document();
                document.add(new Field("id", ids[i], Field.Store.YES,
                        Field.Index.NOT_ANALYZED));
                document.add(new Field("email", emails[i],
                        Field.Store.YES, Field.Index.NOT_ANALYZED));
                document.add(new Field("name", names[i],
                        Field.Store.YES, Field.Index.NOT_ANALYZED));
                document.add(new NumericField("attach",Field.Store.YES,true).setIntValue(attachs[i]));
                document.add(new Field("content",contents[i] ,Field.Store.NO,Field.Index.ANALYZED));
                
                if(emails[i].endsWith("163.org")) {
                    document.setBoost(1.5f);
                } else {
                    document.setBoost(0.1f);
                }
                
                writer.addDocument(document);
            }

        } catch (CorruptIndexException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (LockObtainFailedException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } finally {
            if (writer != null) {
                try {
                    writer.close();
                } catch (CorruptIndexException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                } catch (IOException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
        }

    }

    public void search() {
        IndexSearcher searcher = null;
        try {
            QueryParser parser = new QueryParser(Version.LUCENE_35,"content",new StandardAnalyzer(Version.LUCENE_35));
            //parser.setAllowLeadingWildcard(true);
            Query query = parser.parse("hello like");
            
        //    TermQuery query = new TermQuery(new Term("content","like"));
            
            searcher = getIndexSearcher();
            TopDocs docs = searcher.search(query, 10);
            System.out.println("一共查詢了 "+docs.totalHits+" 条");
            
            System.out.println("一共有文档 "+reader.maxDoc() +" 条");
            System.out.println("一共有未删除的文档 "+reader.numDocs() +" 条");
            System.out.println("一共删除的文档 "+reader.numDeletedDocs() +" 条");
            for(ScoreDoc sd:docs.scoreDocs) {
                Document doc = searcher.doc(sd.doc);
                System.out.println(doc.get("id")+"-------------"+doc.get("name")+"---------"+doc.get("email")+"-----------"+doc.getBoost());
            }

        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (ParseException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }  finally {
            if(searcher != null) {
                try {
                    searcher.close();
                } catch (IOException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
        }
    }
    
    public void search01() {
        IndexSearcher searcher = null;
        try {
        //    QueryParser parser = new QueryParser(Version.LUCENE_35,"content",new StandardAnalyzer(Version.LUCENE_35));
            //parser.setAllowLeadingWildcard(true);
            //Query query = parser.parse("like");
            
            TermQuery query = new TermQuery(new Term("name","test0"));
            
            searcher = getIndexSearcher();
            TopDocs docs = searcher.search(query, 10);
            System.out.println("一共查詢了 "+docs.totalHits+" 条");
            
            System.out.println("一共有文档 "+reader.maxDoc() +" 条");
            System.out.println("一共有未删除的文档 "+reader.numDocs() +" 条");
            System.out.println("一共删除的文档 "+reader.numDeletedDocs() +" 条");
            for(ScoreDoc sd:docs.scoreDocs) {
                Document doc = searcher.doc(sd.doc);
                System.out.println(doc.get("id")+"-------------"+doc.get("name")+"---------"+doc.get("email")+"-----------");
            }

        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }  finally {
            if(searcher != null) {
                try {
                    searcher.close();
                } catch (IOException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
        }
    }
    
    
    public void searchByTermRange() {
        IndexSearcher searcher = null;
        try {
            TermRangeQuery query = new TermRangeQuery("email","a","d",true,true);
            
            searcher = getIndexSearcher();
            TopDocs docs = searcher.search(query, 10);
            System.out.println("一共查詢了 "+docs.totalHits+" 条");
            
            System.out.println("一共有文档 "+reader.maxDoc() +" 条");
            System.out.println("一共有未删除的文档 "+reader.numDocs() +" 条");
            System.out.println("一共删除的文档 "+reader.numDeletedDocs() +" 条");
            for(ScoreDoc sd:docs.scoreDocs) {
                Document doc = searcher.doc(sd.doc);
                System.out.println(doc.get("id")+"-------------"+doc.get("name")+"---------"+doc.get("email")+"-----------");
            }

        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }  finally {
            if(searcher != null) {
                try {
                    searcher.close();
                } catch (IOException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
        }
    }
    
    public void searchByNumericRange() {
        IndexSearcher searcher = null;
        try {
            NumericRangeQuery query = NumericRangeQuery.newIntRange("attach", 0, 4, true, true);
            
            searcher = getIndexSearcher();
            TopDocs docs = searcher.search(query, 10);
            System.out.println("一共查詢了 "+docs.totalHits+" 条");
            
            System.out.println("一共有文档 "+reader.maxDoc() +" 条");
            System.out.println("一共有未删除的文档 "+reader.numDocs() +" 条");
            System.out.println("一共删除的文档 "+reader.numDeletedDocs() +" 条");
            for(ScoreDoc sd:docs.scoreDocs) {
                Document doc = searcher.doc(sd.doc);
                System.out.println(doc.get("id")+"-------------"+doc.get("name")+"---------"+doc.get("email")+"-----------"+doc.get("attach"));
            }

        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }  finally {
            if(searcher != null) {
                try {
                    searcher.close();
                } catch (IOException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
        }
    }
    
    
    public void searchByPrefix() {
        IndexSearcher searcher = null;
        try {

            PrefixQuery query = new PrefixQuery(new Term("content","like"));
            System.out.println(query.toString());
            searcher = getIndexSearcher();
            TopDocs docs = searcher.search(query, 10);
            System.out.println("一共查詢了 "+docs.totalHits+" 条");
            
            System.out.println("一共有文档 "+reader.maxDoc() +" 条");
            System.out.println("一共有未删除的文档 "+reader.numDocs() +" 条");
            System.out.println("一共删除的文档 "+reader.numDeletedDocs() +" 条");
            for(ScoreDoc sd:docs.scoreDocs) {
                Document doc = searcher.doc(sd.doc);
                System.out.println(doc.get("id")+"-------------"+doc.get("name")+"---------"+doc.get("email")+"-----------"+doc.get("attach"));
            }

        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }  finally {
            if(searcher != null) {
                try {
                    searcher.close();
                } catch (IOException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
        }
    }
    
    
    public void searchByPhrase() {
        IndexSearcher searcher = null;
        try {

            PhraseQuery query = new PhraseQuery();
            query.setSlop(1);
            query.add(new Term("content","like"));
            query.add(new Term("content","ball"));
            System.out.println(query.toString());
            searcher = getIndexSearcher();
            TopDocs docs = searcher.search(query, 10);
            System.out.println("一共查詢了 "+docs.totalHits+" 条");
            
            System.out.println("一共有文档 "+reader.maxDoc() +" 条");
            System.out.println("一共有未删除的文档 "+reader.numDocs() +" 条");
            System.out.println("一共删除的文档 "+reader.numDeletedDocs() +" 条");
            for(ScoreDoc sd:docs.scoreDocs) {
                Document doc = searcher.doc(sd.doc);
                System.out.println(doc.get("id")+"-------------"+doc.get("name")+"---------"+doc.get("email")+"-----------"+doc.get("attach"));
            }

        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }  finally {
            if(searcher != null) {
                try {
                    searcher.close();
                } catch (IOException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
        }
    }
    
    
    public void searchByParser() {
        IndexSearcher searcher = null;
        try {

        
            QueryParser parser = new QueryParser(Version.LUCENE_35,"content",new StandardAnalyzer(Version.LUCENE_35));
            parser.setAllowLeadingWildcard(true);
            parser.setPhraseSlop(3);
            //parser.setDefaultOperator(QueryParser.AND_OPERATOR);
            Query query = parser.parse("\"I swim\"");
            
            System.out.println(query.toString());
            searcher = getIndexSearcher();
            TopDocs docs = searcher.search(query, 10);
            System.out.println("一共查詢了 "+docs.totalHits+" 条");
            
            System.out.println("一共有文档 "+reader.maxDoc() +" 条");
            System.out.println("一共有未删除的文档 "+reader.numDocs() +" 条");
            System.out.println("一共删除的文档 "+reader.numDeletedDocs() +" 条");
            for(ScoreDoc sd:docs.scoreDocs) {
                Document doc = searcher.doc(sd.doc);
                System.out.println(doc.get("id")+"-------------"+doc.get("name")+"---------"+doc.get("email")+"-----------"+doc.get("attach"));
            }

        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (ParseException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }  finally {
            if(searcher != null) {
                try {
                    searcher.close();
                } catch (IOException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
        }
    }
    
    
    
    private IndexSearcher getIndexSearcher() {

        try {
            if (reader == null) {
                reader = IndexReader.open(directory);
            } else {
                IndexReader ir = IndexReader.openIfChanged(reader);
                if (ir != null) {
                    reader.close();
                    reader = ir;
                }
                
            }
            return new IndexSearcher(reader);
        } catch (CorruptIndexException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

        return null;

    }
    
    
    
    /*public void delete() {
        IndexWriter writer = null;
            try {
                directory = FSDirectory.open(new File(
                        "C:\\Users\\hc-yaomy\\Desktop\\lucene\\index100"));
                IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35,
                        new StandardAnalyzer(Version.LUCENE_35));
                writer = new IndexWriter(directory, config);
                
                writer.deleteDocuments(new Term("name","Linux Basics.txt"));
                writer.commit();
            } catch (CorruptIndexException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            } catch (LockObtainFailedException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            } finally {
                if(writer!= null) {
                    try {
                        writer.close();
                    } catch (CorruptIndexException e) {
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                    } catch (IOException e) {
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                    }
                }
            }
    }
    
    
    public void undelete() {
        try {
            directory = FSDirectory.open(new File(
                    "C:\\Users\\hc-yaomy\\Desktop\\lucene\\index100"));
            
            IndexReader reader = IndexReader.open(directory,false);
            reader.undeleteAll();
            System.out.println("文档总数 "+reader.maxDoc());
            System.out.println("未删除的文档总数 "+reader.numDocs());
            reader.close();
        } catch (CorruptIndexException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
    
    public void update() {

        IndexWriter writer = null;
            try {
                directory = FSDirectory.open(new File(
                        "C:\\Users\\hc-yaomy\\Desktop\\lucene\\index100"));
                IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35,
                        new StandardAnalyzer(Version.LUCENE_35));
                writer = new IndexWriter(directory, config);
                
                Document doc = new Document();
                doc.add(new Field("name","test",Field.Store.YES,Field.Index.ANALYZED));
            //    doc.add(new Field("content","test111111111111111111111111111111111111111",Field.Store.NO,Field.Index.ANALYZED));
                doc.add(new Field("path","test/1/2/3",Field.Store.YES,Field.Index.ANALYZED));
                writer.updateDocument(new Term("name","Linux Basics.txt"), doc);
                writer.commit();
            } catch (CorruptIndexException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            } catch (LockObtainFailedException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            } finally {
                if(writer!= null) {
                    try {
                        writer.close();
                    } catch (CorruptIndexException e) {
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                    } catch (IOException e) {
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                    }
                }
            }
    
    }

    */
}

转载于:https://blog.51cto.com/yaomy/1721546

weixin_33907511

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
lucene 索引过程

IndexSearchersearcher=null;try{QueryParserparser=newQueryParser(Version.LUCENE_35,"content",newStandardAnalyzer(Version.LUCENE_35));parser.setAll...
复制链接

扫一扫