lucene3.5 example

最新推荐文章于 2024-08-24 17:09:16 发布

a13393665983

最新推荐文章于 2024-08-24 17:09:16 发布

阅读量86

点赞数

文章标签： java

原文链接：http://www.cnblogs.com/lexus/archive/2011/12/18/2291732.html

版权

lucene3.5 example

/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package com.jd.lucene;

import java.io.File;
import java.io.IOException;
import java.sql.Date;

import java.text.Format;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class Search {
    private static String indexPath = "/home/mlzboy/my/crawler/index";//索引存放目录
    /**
     * @param args
     * @throws IOException
     * @throws CorruptIndexException
     * @throws ParseException
     */
    public static void main(String[] args) throws CorruptIndexException, IOException, ParseException {
        // TODO Auto-generated method stub
        IndexSearcher searcher = new IndexSearcher(FSDirectory.open(new File(indexPath)));
        System.out.println("total blogs:"+searcher.getIndexReader().numDocs());
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_34);
        QueryParser parser = new QueryParser(Version.LUCENE_34, "title", analyzer);//有变化的地方
        Query query = parser.parse("诺基亚95");
        query = parser.parse("lucene");


//        SortField s1=new SortField("read",SortField.INT,true);
List<SortField> sortFields = new ArrayList<SortField>();
      sortFields.add(new SortField("read", SortField.INT, true));
      sortFields.add(new SortField("date", SortField.LONG, true));
      SortField[] aa=new SortField[2];
      sortFields.toArray(aa);
      Sort sort=new Sort(aa);

// Sort sort=new Sort();
// sort.setSort(s1);

        TopFieldDocs tfd=searcher.search(query,100,sort);
        ScoreDoc[] hits = tfd.scoreDocs;
//        TopScoreDocCollector collector = TopScoreDocCollector.create(100,false);//有变化的地方
//        searcher.search(query, collector);
//        ScoreDoc[] hits = collector.topDocs().scoreDocs;

        System.out.println(hits.length);
        for (int i = 0; i < hits.length; i++) {
            Document doc = searcher.doc(hits[i].doc);//new method is.doc()
            System.out.print(doc.getFieldable("id")+" "+doc.getFieldable("title")+"   "+hits[i].toString()+" ");
            System.out.print("=="+hits[i].doc+"====");
            System.out.print(doc.getFieldable("link"));
            Format formatter;
formatter = new SimpleDateFormat("yyyy-MM-dd hh:mm");
String s = formatter.format(Long.parseLong(doc.get("date")));

System.out.print(s+" ");
System.out.println(Integer.parseInt(doc.get("read")));
        }

//        System.out.println("Found " + collector.getTotalHits());
System.out.println("Found "+tfd.totalHits);
       }

}

/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package com.jd.lucene;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Scanner;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class Index {
    private static String indexPath = "/home/mlzboy/my/crawler/index";//索引存放目录
    /**
     * @param args
     */
    public static void main(String[] args) {
        // TODO Auto-generated method stub
        try {

            Directory dir = FSDirectory.open(new File(indexPath));
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_34);
            IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_34, analyzer);

//            if (create) {
//              // Create a new index in the directory, removing any
//              // previously indexed documents:
//              iwc.setOpenMode(OpenMode.CREATE);
//            } else {
//              // Add new documents to an existing index:
              iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
//            }

            // Optional: for better indexing performance, if you
            // are indexing many documents, increase the RAM
            // buffer. But if you do this, increase the max heap
            // size to the JVM (eg add -Xmx512m or -Xmx1g):
            //
            // iwc.setRAMBufferSizeMB(256.0);

            IndexWriter writer = new IndexWriter(dir, iwc);
            writer.deleteAll();
            Scanner scanner = new Scanner(new FileInputStream("/home/mlzboy/my/crawler/d.txt"), "UTF-8");
    try {
      int ii=0;
      while (scanner.hasNextLine()){
        String line=scanner.nextLine();
        String[] elems=line.split(",");
        System.out.println(elems[0]);
         if (elems.length>2){
             ii+=1;

             Document doc = new Document();
        Field f0=new Field("id",Integer.toString(ii),Field.Store.YES,Field.Index.NOT_ANALYZED);
        Field f1=new Field("title",elems[0],Field.Store.YES,Field.Index.ANALYZED);
        Field f2=new Field("link",elems[1],Field.Store.YES,Field.Index.NO);

            System.out.println(elems[2]);
                    SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd hh:mm");
             Date date=new Date();
                String dateString = "2007-07-07 20:29";
                dateString = elems[2];
             try {
               date = df.parse(dateString);
               System.out.println(date.toLocaleString());
             }
             catch (Exception ex)
             { System.out.println(ex.getMessage());}

                NumericField f3=new NumericField("date",Field.Store.YES,true);
                f3.setLongValue(date.getTime());
                doc.add(f3);
                System.out.println(elems[2]);

                doc.add(new NumericField("read",Field.Store.YES,true).setIntValue(Integer.parseInt(elems[3])));
                doc.add(new NumericField("comment",Field.Store.YES,true).setIntValue(Integer.parseInt(elems[4])));

        doc.add(f0);
        doc.add(f1);
        doc.add(f2);

writer.addDocument(doc);
}

      }
    }
    finally{
      scanner.close();
    }
//            Document doc = new Document();
//              Field f=new Field("title","诺基亚返乡贴补n95",Field.Store.YES,Field.Index.ANALYZED);

//              doc.add(f);
//            writer.addDocument(doc);
            // NOTE: if you want to maximize search performance,
            // you can optionally call optimize here. This can be
            // a costly operation, so generally it's only worth
            // it when your index is relatively static (ie you're
            // done adding documents to it):
            //
            writer.forceMerge(1);
//            Term term=new Term("link","http://www.cnblogs.com/lexus/archive/2011/09/30/2196819.html");
//            writer.deleteDocuments(term);
            Term term=new Term("id","2162");
            writer.deleteDocuments(term);
            writer.close();

            System.out.println(" caught b " );
            System.out.println(new Date());
            System.out.println(new Date().getTime());

          } catch (IOException e) {
            System.out.println(" caught a " );
          }

    }

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"

xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">

<artifactId>lucene</artifactId>

<version>1.0-SNAPSHOT</version>

<name>lucene</name>

<url>http://maven.apache.org</url>

<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>

</properties>

<groupId>junit</groupId>

<artifactId>junit</artifactId>

</dependency>

<groupId>org.apache.lucene</groupId>

<artifactId>lucene-core</artifactId>

</dependency>

<groupId>org.apache.lucene</groupId>

<artifactId>lucene-demo</artifactId>

</dependency>

</dependencies>

</project>

posted on 2011-12-18 13:11 lexus 阅读( ...) 评论( ...) 编辑收藏

转载于:https://www.cnblogs.com/lexus/archive/2011/12/18/2291732.html

a13393665983

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
lucene3.5 example

lucene3.5 example /** To change this template, choose Tools | Templates* and open the template in the editor.*/package com.jd.luce...
复制链接

扫一扫