lucene3.5 example

/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
package com.jd.lucene;

import java.io.File;
import java.io.IOException;
import java.sql.Date;

import java.text.Format;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class Search {
    private static String indexPath = "/home/mlzboy/my/crawler/index";//索引存放目录  
    /**
     * @param args
     * @throws IOException
     * @throws CorruptIndexException
     * @throws ParseException
     */
    public static void main(String[] args) throws CorruptIndexException, IOException, ParseException {
        // TODO Auto-generated method stub
        IndexSearcher searcher = new IndexSearcher(FSDirectory.open(new File(indexPath)));
        System.out.println("total blogs:"+searcher.getIndexReader().numDocs());
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_34);
        QueryParser parser = new QueryParser(Version.LUCENE_34, "title", analyzer);//有变化的地方  
        Query query = parser.parse("诺基亚95");  
        query = parser.parse("lucene");
        
        
//        SortField s1=new SortField("read",SortField.INT,true);
List<SortField> sortFields = new ArrayList<SortField>();      
      sortFields.add(new SortField("read", SortField.INT, true));
      sortFields.add(new SortField("date", SortField.LONG, true));
      SortField[] aa=new SortField[2];
      sortFields.toArray(aa);
      Sort sort=new Sort(aa);

        //        Sort sort=new Sort();
//        sort.setSort(s1);

        TopFieldDocs tfd=searcher.search(query,100,sort);
        ScoreDoc[] hits = tfd.scoreDocs;
//        TopScoreDocCollector collector = TopScoreDocCollector.create(100,false);//有变化的地方  
//        searcher.search(query, collector);  
//        ScoreDoc[] hits = collector.topDocs().scoreDocs;            
        
        System.out.println(hits.length);  
        for (int i = 0; i < hits.length; i++) {  
            Document doc = searcher.doc(hits[i].doc);//new method is.doc()
            System.out.print(doc.getFieldable("id")+" "+doc.getFieldable("title")+"   "+hits[i].toString()+" ");  
            System.out.print("=="+hits[i].doc+"====");
            System.out.print(doc.getFieldable("link"));
            Format formatter;
  formatter = new SimpleDateFormat("yyyy-MM-dd hh:mm");
  String s = formatter.format(Long.parseLong(doc.get("date")));
 
  System.out.print(s+" ");
  System.out.println(Integer.parseInt(doc.get("read")));
        }  
      
//        System.out.println("Found " + collector.getTotalHits());          
System.out.println("Found "+tfd.totalHits);
       }

 

}

 

 

 

 

 

/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
package com.jd.lucene;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Scanner;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class Index {
    private static String indexPath = "/home/mlzboy/my/crawler/index";//索引存放目录 
    /**
     * @param args
     */
    public static void main(String[] args) {
        // TODO Auto-generated method stub
        try {

            Directory dir = FSDirectory.open(new File(indexPath));
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_34);
            IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_34, analyzer);

//            if (create) {
//              // Create a new index in the directory, removing any
//              // previously indexed documents:
//              iwc.setOpenMode(OpenMode.CREATE);
//            } else {
//              // Add new documents to an existing index:
              iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
//            }

            // Optional: for better indexing performance, if you
            // are indexing many documents, increase the RAM
            // buffer.  But if you do this, increase the max heap
            // size to the JVM (eg add -Xmx512m or -Xmx1g):
            //
            // iwc.setRAMBufferSizeMB(256.0);

            IndexWriter writer = new IndexWriter(dir, iwc);
            writer.deleteAll();
            Scanner scanner = new Scanner(new FileInputStream("/home/mlzboy/my/crawler/d.txt"), "UTF-8");
    try {
      int ii=0;
      while (scanner.hasNextLine()){
        String line=scanner.nextLine();
        String[] elems=line.split(",");
        System.out.println(elems[0]);
         if (elems.length>2){
             ii+=1;
            
             Document doc = new Document();
        Field f0=new Field("id",Integer.toString(ii),Field.Store.YES,Field.Index.NOT_ANALYZED);
        Field f1=new Field("title",elems[0],Field.Store.YES,Field.Index.ANALYZED);
        Field f2=new Field("link",elems[1],Field.Store.YES,Field.Index.NO);

            System.out.println(elems[2]);
                    SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd hh:mm");
             Date date=new Date();
                String dateString = "2007-07-07 20:29";
                dateString = elems[2];
             try {
               date = df.parse(dateString);
               System.out.println(date.toLocaleString());
             }
             catch (Exception ex)
             { System.out.println(ex.getMessage());}

                NumericField f3=new NumericField("date",Field.Store.YES,true);
                f3.setLongValue(date.getTime());
                doc.add(f3);
                System.out.println(elems[2]);
               
                doc.add(new NumericField("read",Field.Store.YES,true).setIntValue(Integer.parseInt(elems[3])));
                doc.add(new NumericField("comment",Field.Store.YES,true).setIntValue(Integer.parseInt(elems[4])));

        doc.add(f0);
        doc.add(f1);
        doc.add(f2);

        writer.addDocument(doc);
                }

      }
    }
    finally{
      scanner.close();
    }
//            Document doc = new Document();
//              Field f=new Field("title","诺基亚返乡贴补n95",Field.Store.YES,Field.Index.ANALYZED);
           
//              doc.add(f);
//            writer.addDocument(doc);
            // NOTE: if you want to maximize search performance,
            // you can optionally call optimize here.  This can be
            // a costly operation, so generally it's only worth
            // it when your index is relatively static (ie you're
            // done adding documents to it):
            //
            writer.forceMerge(1);
//            Term term=new Term("link","http://www.cnblogs.com/lexus/archive/2011/09/30/2196819.html");
//            writer.deleteDocuments(term);
            Term term=new Term("id","2162");
            writer.deleteDocuments(term);
            writer.close();

            System.out.println(" caught b " );
            System.out.println(new Date());
            System.out.println(new Date().getTime());

          } catch (IOException e) {
            System.out.println(" caught a " );
          }
       
       
       

       
       
       
       
    }

}

 

 

 

 

 

 

 

 

 

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"

  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">

  <modelVersion>4.0.0</modelVersion>

 

  <groupId>com.jd</groupId>

  <artifactId>lucene</artifactId>

  <version>1.0-SNAPSHOT</version>

  <packaging>jar</packaging>

 

  <name>lucene</name>

  <url>http://maven.apache.org</url>

 

  <properties>

    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>

  </properties>

 

  <dependencies>

    <dependency>

      <groupId>junit</groupId>

      <artifactId>junit</artifactId>

      <version>3.8.1</version>

      <scope>test</scope>

    </dependency>

    <dependency>

      <groupId>org.apache.lucene</groupId>

      <artifactId>lucene-core</artifactId>

      <version>3.5.0</version>

      <type>jar</type>

    </dependency>

    <dependency>

      <groupId>org.apache.lucene</groupId>

      <artifactId>lucene-demo</artifactId>

      <version>3.5.0</version>

    </dependency>

  </dependencies>

</project>

posted on 2011-12-18 13:11  lexus 阅读( ...) 评论( ...) 编辑 收藏

转载于:https://www.cnblogs.com/lexus/archive/2011/12/18/2291732.html

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值