自己做的一个操作索引的例子

建立的字段包括:ID=digitalID,title,contents几个字段,可以根据需要增加新的字段,其中digitaiID做了唯一性处理,在添加之前进行删除更新,ID是作为path存储的,不能索引,只能使用,在这里这样做没有什么实际的价值,其他字段都是字符串,由于所有的数据都是取自数据库的,所以这样做比较好,例子的最后有一段是建立文件索引的,直接引用别人的代码了。
上述方法的查询时候不能直接使用一些特殊的符号:+-[](){}&&||"等等,如果要是用需要进行转义,查询的给定条件需要自己定义:比如查询title和contents同时满足的需要这样构造:
field:contents
text:title:text AND text
或的关系
field:contents
text:title:text OR text 或 text:title:text text

 /**
     * 建立指定的编码的图片的索引
     * 1.索引的内容包含图的标题,描述关键字
     * 2.上述内容用字符串的形式存储
     * 3.给定的建立索引的目录必须是空目录或者存在索引文件的目录
     * @param astrID String 给定的数字对象的编码
     * @param astrTitle String 给定的关键
     * @param astrContent String 给定的索引内容(包括标题,描述)
     * @throws java.lang.Exception 数据处理错误
     */
    private void addIndex(String astrID, String astrTitle, String astrContent) throws
            Exception {
        boolean build = false;
        //是否要建立索引文件
        String tmpPath = mstrIndexPath + "/segments";
        File exist = new File(tmpPath);
        if (!exist.exists() || exist.isDirectory()) { //给定的文件不存在,需要建立新的索引文件
            build = true;
        }
        //用指定的语言分析器构造一个索引
        IndexWriter writer = new IndexWriter(mstrIndexPath,
                                             new StandardAnalyzer(), build);
        //writer.mergeFactor = 10;
        //mergeFactor是一个针对批量索引的优化参数,控制多少条处理完多少条记录(Document)后,写入一次索引,写入频率越高,内存使用越少,但索引速度越慢,所以在大批量数据导入时需要增大文件写入的间隔,多让索引在内存中操作。
        org.apache.lucene.document.Document doc = new org.apache.lucene.
                                                  document.Document();
        doc.add(Field.UnIndexed("ID", astrID));
        doc.add(Field.Keyword("digitalID", astrID));
        doc.add(Field.Text("title", astrTitle));
        doc.add(Field.Text("contents", astrContent));

        writer.addDocument(doc);
        writer.optimize();
        writer.close();
    }

    /**
     * 修改指定索引编码的索引记录:
     *    修改的方法是在原有索引的基础上删除相同的索引号,添加新的索引号
     * @param astrID String 指定索引的索引编码
     * @param astrTitle 指定的关键字
     * @param astrContent String 指定的内容
     * @throws Exception 可能的异常
     */
    public synchronized void modIndex(String astrID, String astrTitle,
                                      String astrContent) throws
            Exception {
        File file = new File(mstrIndexPath + "/segments");
        if (file.exists() && !file.isDirectory()) {
            //删除存在的ID
            Directory directory = FSDirectory.getDirectory(mstrIndexPath, false);
            IndexReader reader = IndexReader.open(directory);
            reader.unlock(directory);

            //删除指定的索引
            Term term = new Term(INDEX_ID, astrID);
            int delete = reader.delete(term);
            /*for (int i = 0; i < reader.maxDoc(); i++) {
                if (!reader.isDeleted(i)) {
                    Document doc = reader.document(i); //取索引的一行记录
                    String fValue = doc.get("digitalID"); //取指定的字段值
                    if (fValue.equals(astrID)) {
                        reader.delete(i);
                    }
                }
                         }*/
            reader.close();
            directory.close();
        }
        //添加一条记录
        addIndex(astrID, astrTitle, astrContent);
    }

    /**
     * 删除指定字段的索引
     * @param field String 指定的字段
     * @param txt String 指定字段的值
     * @throws Exception 可能的异常
     */
    public synchronized void delIndex(String field, String txt) throws
            Exception {
        //删除存在的ID
        Directory directory = FSDirectory.getDirectory(mstrIndexPath, false);
        IndexReader reader = IndexReader.open(directory);
        reader.unlock(directory);

        //删除指定的索引
        Term term = new Term(field, txt);
        int delete = reader.delete(term);
        reader.close();
        directory.close();
    }

    /**
     * 查找给定串的数字对象,返回所有的数字对象编码集合
     * @param astrField String 给定的域
     * @param astrKey String 给定的查询字符串
     * @return String[] 编码ID数组
     * @throws Exception
     */
    public String[] search(String astrField, String astrKey) throws Exception {
        Analyzer analyzer = new StandardAnalyzer();
        String[] did = new String[0];
        //指向索引目录的搜索器
        try {
            Searcher searcher = new IndexSearcher(mstrIndexPath);
            Term term = new Term(astrField, astrKey);
            Query query = QueryParser.parse(astrKey, astrField, analyzer);
            System.out.println("Searching for: " + query.toString("contents"));
            Hits hits = searcher.search(query);
            int size = hits.length();
            did = new String[size];
            for (int i = 0; i < size; i++) {
                did[i] = hits.doc(i).get("digitalID");
                System.out.println(":" + hits.doc(i).get("contents"));
            }
        } catch (ParseException e) {
            //e.printStackTrace();
            System.out.println(e.getMessage());
        }
        return did;
    }

----------------------------------------------------------------------------------------------------------------------------------------------

class IndexFiles {
  public static void main(String[] args) throws IOException {
    String usage = "java " + IndexFiles.class + " <root_directory>";
    if (args.length == 0) {
      System.err.println("Usage: " + usage);
      System.exit(1);
    }

    Date start = new Date();
    try {
      IndexWriter writer = new IndexWriter("D://index//tt", new StandardAnalyzer(), true);
      indexDocs(writer, new File("D://temp"));

      writer.optimize();
      writer.close();

      Date end = new Date();

      System.out.print(end.getTime() - start.getTime());
      System.out.println(" total milliseconds");

    } catch (IOException e) {
      System.out.println(" caught a " + e.getClass() +
       "/n with message: " + e.getMessage());
    }
  }

  public static void indexDocs(IndexWriter writer, File file)
    throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
      if (file.isDirectory()) {
        String[] files = file.list();
        // an IO error could occur
        if (files != null) {
          for (int i = 0; i < files.length; i++) {
            indexDocs(writer, new File(file, files[i]));
          }
        }
      } else {
        System.out.println("adding " + file);
        try {
          writer.addDocument(FileDocument.Document(file));
        }
        // at least on windows, some temporary files raise this exception with an "access denied" message
        // checking if the file can be read doesn't help
        catch (FileNotFoundException fnfe) {
          ;
        }
      }
    }
  }
}

public class FileDocument {
  /** Makes a document for a File.
    <p>
    The document has three fields:
    <ul>
    <li><code>path</code>--containing the pathname of the file, as a stored,
    tokenized field;
    <li><code>modified</code>--containing the last modified date of the file as
    a keyword field as encoded by <a
    href="lucene.document.DateField.html">DateField</a>; and
    <li><code>contents</code>--containing the full contents of the file, as a
    Reader field;
    */
  public static Document Document(File f)
       throws java.io.FileNotFoundException {

    // make a new, empty document
    Document doc = new Document();

    // Add the path of the file as a field named "path".  Use a Text field, so
    // that the index stores the path, and so that the path is searchable
    doc.add(Field.Text("path", f.getPath()));

    // Add the last modified date of the file a field named "modified".  Use a
    // Keyword field, so that it's searchable, but so that no attempt is made
    // to tokenize the field into words.
    doc.add(Field.Keyword("modified",
     DateField.timeToString(f.lastModified())));
    System.out.println(f.lastModified()+" "+System.currentTimeMillis());

    // Add the contents of the file a field named "contents".  Use a Text
    // field, specifying a Reader, so that the text of the file is tokenized.
    // ?? why doesn't FileReader work here ??
    FileInputStream is = new FileInputStream(f);
    Reader reader = new BufferedReader(new InputStreamReader(is));
    doc.add(Field.Text("contents", reader));

    // return the document
    return doc;
  }

  private FileDocument() {}
}

class SearchFiles {
  public static void main(String[] args) {
    try {
      Searcher searcher = new IndexSearcher("D://index//tt");
      Analyzer analyzer = new StandardAnalyzer();

      BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
      while (true) {
 System.out.print("Query: ");
 String line = in.readLine();

 if (line.length() == -1)
   break;

 //Query query = QueryParser.parse(line, "contents", analyzer);
        Query query = QueryParser.parse("产党", "contents", analyzer);
 System.out.println("Searching for: " + query.toString("contents"));

 Hits hits = searcher.search(query);
 System.out.println(hits.length() + " total matching documents");

 final int HITS_PER_PAGE = 10;
 for (int start = 0; start < hits.length(); start += HITS_PER_PAGE) {
   int end = Math.min(hits.length(), start + HITS_PER_PAGE);
   for (int i = start; i < end; i++) {
     Document doc = hits.doc(i);
     String path = doc.get("path");
     if (path != null) {
              //System.out.println(i + ". " + path+" "+doc.get("modified"));
     } else {
              String url = doc.get("url");
       if (url != null) {
  //System.out.println(i + ". " + url);
  //System.out.println("   - " + doc.get("title"));
       } else {
  System.out.println(i + ". " + "No path nor URL for this document");
       }
     }
   }

   if (hits.length() > end) {
     System.out.print("more (y/n) ? ");
     line = in.readLine();
     if (line.length() == 0 || line.charAt(0) == 'n')
       break;
   }
 }
      }
      searcher.close();

    } catch (Exception e) {
      System.out.println(" caught a " + e.getClass() +
    "/n with message: " + e.getMessage());
    }
  }
}

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值