二、lucene3.5的查询语法

1、工程结构


2、查询语法代码

package org.itat.index;

import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

public class SearcherUtil {
	private Directory directory;
	private Analyzer analyzer = new IKAnalyzer();
	private IndexReader reader;
	private String[] ids = {"1","2","3","4","5","6"};
	private String[] emails = {"aa@itat.org","bb@itat.org","cc@cc.org","dd@sina.org","ee@zttc.edu","ff@itat.org"};
	private String[] contents = {
			"welcome to visited the space,I like book",
			"hello boy, I like pingpeng ball",
			"my name is cc I like game",
			"I like football",
			"I like football and I like basketball too",
			"I like movie and swim"
	};
	private Date[] dates = null;
	private int[] attachs = {2,3,1,4,5,5};
	private String[] names = {"zhangsan","lisi","john","jetty","mike","jake"};
	private Map<String,Float> scores = new HashMap<String,Float>();
	
	public SearcherUtil() {
//		directory = new RAMDirectory();
		try {
			directory = FSDirectory.open(new File("F:\\Workspaces\\lucenes\\02_lucene_searcher\\index"));
			setDates();
			scores.put("itat.org",2.0f);
			scores.put("zttc.edu", 1.5f);
//			index();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	private void setDates() {
		SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
		try {
			dates = new Date[ids.length];
			dates[0] = sdf.parse("2010-02-19");
			dates[1] = sdf.parse("2012-01-11");
			dates[2] = sdf.parse("2011-09-19");
			dates[3] = sdf.parse("2010-12-22");
			dates[4] = sdf.parse("2012-01-01");
			dates[5] = sdf.parse("2011-05-19");
		} catch (ParseException e) {
			e.printStackTrace();
		}
	}

	
	public void index() {
		IndexWriter writer = null;
		try {
			writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
			writer.deleteAll();
			Document doc = null;
			for(int i=0;i<ids.length;i++) {
				doc = new Document();
				doc.add(new Field("id",ids[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
				doc.add(new Field("email",emails[i],Field.Store.YES,Field.Index.NOT_ANALYZED));
				doc.add(new Field("content",contents[i],Field.Store.NO,Field.Index.ANALYZED));
				doc.add(new Field("name",names[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
				//存储数字
				doc.add(new NumericField("attach",Field.Store.YES,true).setIntValue(attachs[i]));
				//存储日期
				doc.add(new NumericField("date",Field.Store.YES,true).setLongValue(dates[i].getTime()));
				String et = emails[i].substring(emails[i].lastIndexOf("@")+1);
				if(scores.containsKey(et)) {
					doc.setBoost(scores.get(et));
				} else {
					doc.setBoost(0.5f);
				}
				writer.addDocument(doc);
			}
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			try {
				if(writer!=null)writer.close();
			} catch (CorruptIndexException e) {
				e.printStackTrace();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
	}
	
	public IndexSearcher getSearcher() {
		try {
			if(reader==null) {
				reader = IndexReader.open(directory);
			} else {
				IndexReader tr = IndexReader.openIfChanged(reader);
				if(tr!=null) {
					reader.close();
					reader = tr;
				}
			}
			return new IndexSearcher(reader);
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return null;
	}
	
	public IndexSearcher getSearcher(Directory directory) {
		try {
			if(reader==null) {
				reader = IndexReader.open(directory);
			} else {
				IndexReader tr = IndexReader.openIfChanged(reader);
				if(tr!=null) {
					reader.close();
					reader = tr;
				}
			}
			return new IndexSearcher(reader);
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return null;
	}
	
	public void searchByTerm(String field,String name,int num) {
		try {
			IndexSearcher searcher = getSearcher();
			Query query = new TermQuery(new Term(field,name));
			TopDocs tds = searcher.search(query, num);
			printDocument(searcher, tds);
			searcher.close();
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	public void searchByTermToken(String field,String name,int num) {
		try {
			IndexSearcher searcher = getSearcher();
//			Query query = new TermQuery(new Term(field,name));
			//当用户输入两个关键字时,QueryParser默认它们之间的关系为“或”关系
			//下面这么写的话在对用户输入进行扫描时,就会用空格分开的关键字理解为“与”,
			//其实也就是构建了一个“与”关系的布尔型查询
//			parser.setDefaultOperator(Operator.AND);
			QueryParser parser = new QueryParser(Version.LUCENE_35, field, analyzer);
			String k = analyzerKey(name);
			Query query = parser.parse(name);
			TopDocs tds = searcher.search(query, num);
			printDocument(searcher, tds);
			searcher.close();
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	private String analyzerKey(String key){
//		StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_35);
		StringReader reader = new StringReader(key);
		TokenStream tokenStream = analyzer.tokenStream("", reader);
		CharTermAttribute termattr = tokenStream.addAttribute(CharTermAttribute.class);
		StringBuilder sb = new StringBuilder();
		try {
			while(tokenStream.incrementToken()){
				String k = termattr.toString();
				sb.append(k).append(" ");
			}
		} catch (IOException e) {
			e.printStackTrace();
		}
		key = sb.toString().trim();
		key = key.replaceAll("\\s+", " AND ");
		return sb.toString();
	}
	public void printDocument(IndexSearcher searcher,TopDocs tds){
		System.out.println("共查询了【"+tds.totalHits+"】条");
		for(ScoreDoc sd : tds.scoreDocs){
			try {
				Document doc = searcher.doc(sd.doc);
				System.out.println("filename:"+doc.get("filename"));
				System.out.println("path:"+doc.get("path"));
				System.out.println("date:"+doc.get("date"));
				System.out.println("size:"+doc.get("size"));
				System.out.println("content:"+doc.get("content"));
				System.out.println("-------------------------------------------");
			} catch (CorruptIndexException e) {
				e.printStackTrace();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
	}
	public void searchByTermRange(String field,String start,String end,int num) {
		try {
			IndexSearcher searcher = getSearcher();
			Query query = new TermRangeQuery(field,start,end,true, true);
			TopDocs tds = searcher.search(query, num);
			printDocument(searcher, tds);
			searcher.close();
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	/**
	 * 建立索引时:使用的Field, 而使用NumericRangeQuery, 必须使用NumericField
	 * @param field
	 * @param start
	 * @param end
	 * @param num
	 */
	public void searchByNumricRange(String field,int start,int end,int num) {
		try {
			IndexSearcher searcher = getSearcher();
			Query query = NumericRangeQuery.newIntRange(field,start, end,true,true);
//			 DateTools.dateToString(new Date(), null);
			TopDocs tds = searcher.search(query, num);
			printDocument(searcher, tds);
			searcher.close();
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	public void searchByPrefix(String field,String value,int num) {
		try {
			IndexSearcher searcher = getSearcher();
			Query query = new PrefixQuery(new Term(field,value));
			TopDocs tds = searcher.search(query, num);
			printDocument(searcher, tds);
			searcher.close();
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	public void searchByWildcard(String field,String value,int num) {
		try {
			IndexSearcher searcher = getSearcher();
			//在传入的value中可以使用通配符:?和*,?表示匹配一个字符,*表示匹配任意多个字符
			Query query = new WildcardQuery(new Term(field,value));
			TopDocs tds = searcher.search(query, num);
			printDocument(searcher, tds);
			searcher.close();
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	public void searchByBoolean(int num) {
		try {
			IndexSearcher searcher = getSearcher();
			BooleanQuery query = new BooleanQuery();
			/*
			 * BooleanQuery可以连接多个子查询
			 * Occur.MUST表示必须出现
			 * Occur.SHOULD表示可以出现
			 * Occur.MUSE_NOT表示不能出现
			 */
			query.add(new TermQuery(new Term("name","3")), Occur.MUST_NOT);
			query.add(new TermQuery(new Term("content","健壮")),Occur.SHOULD);
			TopDocs tds = searcher.search(query, num);
			printDocument(searcher, tds);
			searcher.close();
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	public void searchByPhrase(int num) {
		try {
			IndexSearcher searcher = getSearcher();
			PhraseQuery query = new PhraseQuery();
			query.setSlop(10);
			query.add(new Term("content","java"));
			//第一个Term
			query.add(new Term("content","程序"));
			//产生距离之后的第二个Term
//			query.add(new Term("content","football"));
			TopDocs tds = searcher.search(query, num);
			printDocument(searcher, tds);
			searcher.close();
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	/**
	 * 查询用于匹配与指定项相似的项
	 * 默认是匹配一个有不同的,其他一样的,比如like 和 mike,就是距离算法的相似距离为1
	 * 这种方式少用,影响效率
	 */
	public void searchByFuzzy(int num) {
		try {
			IndexSearcher searcher = getSearcher();
			//最后两个参数为匹配率和距离
			FuzzyQuery query = new FuzzyQuery(new Term("content","总统"),0.4f,0);
			System.out.println(query.getPrefixLength());
			System.out.println(query.getMinSimilarity());
			TopDocs tds = searcher.search(query, num);
			printDocument(searcher, tds);
			searcher.close();
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	public void searchByQueryParse(Query query,int num) {
		try {
			IndexSearcher searcher = getSearcher();
			TopDocs tds = searcher.search(query, num);
			System.out.println("一共查询了:"+tds.totalHits);
			for(ScoreDoc sd:tds.scoreDocs) {
				Document doc = searcher.doc(sd.doc);
				System.out.println(doc.get("id")+"---->"+
						doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+
						doc.get("attach")+","+doc.get("date")+"=="+sd.score);
			}
			searcher.close();
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	/**
	 * lucene3.5之前采用的是一种再查询的方式,也就是说先把全部的结果的docid查询出来,然后
	 * 分页得到该页的docid,然后根据docid得到document信息,
	 * lucene官方是说他的速度已经够快,再查询不会有效率问题
	 * @param query
	 * @param pageIndex
	 * @param pageSize
	 */
	public void searchPage(String query,int pageIndex,int pageSize) {
		try {
			Directory dir = FileIndexUtils.getDirectory();
			IndexSearcher searcher = getSearcher(dir);
			QueryParser parser = new QueryParser(Version.LUCENE_35,"content",analyzer);
			Query q = parser.parse(query);
			TopDocs tds = searcher.search(q, 500);
			ScoreDoc[] sds = tds.scoreDocs;
			int start = (pageIndex-1)*pageSize;
			int end = pageIndex*pageSize;
			for(int i=start;i<end;i++) {
				Document doc = searcher.doc(sds[i].doc);
				System.out.println("filename:"+doc.get("filename"));
				System.out.println("path:"+doc.get("path"));
				System.out.println("date:"+doc.get("date"));
				System.out.println("size:"+doc.get("size"));
				System.out.println("content:"+doc.get("content"));
				System.out.println("-------------------------------------------");
			}
			
			searcher.close();
		} catch (org.apache.lucene.queryParser.ParseException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	/**
	 * 目前没有办法只取当前这页的数据,而是要全部查询然后得到docid
	 * 一种增加效率的方式是取的条数做下限制,比如不要每次都取500条,
	 * 也是把取的条数设置为当前页的所在位置数,比如每页10条,
	 * 取第一页数据则取10条,取第二页则取20条,取五页则去50条
	 * 根据页码和分页大小获取上一次的最后一个ScoreDoc
	 */
	private ScoreDoc getLastScoreDoc(int pageIndex,int pageSize,Query query,IndexSearcher searcher) throws IOException {
		if(pageIndex==1)return null;//如果是第一页就返回空
		int num = pageSize*(pageIndex-1);//获取上一页的数量
		TopDocs tds = searcher.search(query, num);
		return tds.scoreDocs[num-1];
	}
	/**
	 * 使用这种方式的话是把上一页的最后一个元素给拿到,然后再把pagesize传入,
	 * 就可以得到当页的数据,其实就是简便了查询,原理还是把全部的docid查询后在得到document
	 * @param query
	 * @param pageIndex
	 * @param pageSize
	 */
	public void searchPageByAfter(String query,int pageIndex,int pageSize) {
		try {
			Directory dir = FileIndexUtils.getDirectory();
			IndexSearcher searcher = getSearcher(dir);
			QueryParser parser = new QueryParser(Version.LUCENE_35,"content",analyzer);
			Query q = parser.parse(query);
			//先获取上一页的最后一个元素
			ScoreDoc lastSd = getLastScoreDoc(pageIndex, pageSize, q, searcher);
			//通过最后一个元素搜索下页的pageSize个元素
			TopDocs tds = searcher.searchAfter(lastSd,q, pageSize);
			printDocument(searcher, tds);
			searcher.close();
		} catch (org.apache.lucene.queryParser.ParseException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	public void searchNoPage(String query) {
		try {
			Directory dir = FileIndexUtils.getDirectory();
			IndexSearcher searcher = getSearcher(dir);
			QueryParser parser = new QueryParser(Version.LUCENE_35,"content",new StandardAnalyzer(Version.LUCENE_35));
			Query q = parser.parse(query);
			TopDocs tds = searcher.search(q, 20);
			ScoreDoc[] sds = tds.scoreDocs;
			for(int i=0;i<sds.length;i++) {
				Document doc = searcher.doc(sds[i].doc);
				System.out.println(sds[i].doc+":"+doc.get("path")+"-->"+doc.get("filename"));
			}
			
			searcher.close();
		} catch (org.apache.lucene.queryParser.ParseException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
}

3、查询语法的测试单元类

package org.itat.test;

import java.io.File;
import java.io.IOException;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.Version;
import org.itat.index.FileIndexUtils;
import org.itat.index.SearcherUtil;
import org.junit.Before;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;

public class TestSearch {
	private SearcherUtil su;
	private Analyzer analyzer = new IKAnalyzer();
	@Before
	public void init() {
		su = new SearcherUtil();
	}
	
	@Test
	public void testCopyFiles() {
		try {
			File file = new File("F:\\Workspaces\\lucenes\\02_lucene_searcher\\resource");
			for(File f:file.listFiles()) {
				String destFileName = FilenameUtils.getFullPath(f.getAbsolutePath())+
						FilenameUtils.getBaseName(f.getName())+".she";
				FileUtils.copyFile(f, new File(destFileName));
			}
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	@Test
	public void searchByTerm() {
//		su.searchByTerm("content","",10);
		su.searchByTermToken("content","头脑风暴",10);
	}
	
	@Test
	public void searchByTermRange() {
		//查询name以a开头和s结尾的
//		su.searchByTermRange("name","a","s",10);
		//由于attachs是数字类型,使用TermRange无法查询
//		su.searchByTermRange("size",new NumericField("200").stringValue(),new NumericField("500").stringValue(), 10);
		QueryParser parser = new QueryParser(Version.LUCENE_35, "size", analyzer);
		Query query;
		try {
			query = parser.parse("size:[100 TO 500]");
			su.searchByQueryParse(query, 10);
		} catch (ParseException e) {
			e.printStackTrace();
		} 
	}
	
	@Test
	public void searchByNumRange() {
//		su.searchByNumricRange("attach",2,10, 5);
		su.searchByNumricRange("size",100,300, 10);
	}
	
	@Test
	public void searchByPrefix() {
		su.searchByPrefix("content", "人", 10);
	}
	
	@Test
	public void searchByWildcard() {
		//匹配@itat.org结尾的所有字符
//		su.searchByWildcard("email", "*@itat.org", 10);
		//匹配j开头的有三个字符的name
//		su.searchByWildcard("name", "j???", 10);
		su.searchByWildcard("content", "类?", 10);
	}
	
	@Test
	public void searchByBoolean() {
		su.searchByBoolean(10);
	}
	
	@Test
	public void searchByPhrase() {
		su.searchByPhrase(10);
	}
	
	@Test
	public void searchByFuzzy() {
		su.searchByFuzzy(10);
	}
	
	@Test
	public void searchByQueryParse() throws ParseException {
		//1、创建QueryParser对象,默认搜索域为content
		QueryParser parser = new QueryParser(Version.LUCENE_35, "content", new StandardAnalyzer(Version.LUCENE_35));
		//改变空格的默认操作符,以下可以改成AND
		//parser.setDefaultOperator(Operator.AND);
		//开启第一个字符的通配符匹配,默认关闭因为效率不高
		parser.setAllowLeadingWildcard(true);
		//搜索content中包含有like的
		Query query = parser.parse("like");
		
		//有basketball或者football的,空格默认就是OR
		query = parser.parse("basketball football");
		
		//改变搜索域为name为mike
		//query = parser.parse("content:like");
		
		//同样可以使用*和?来进行通配符匹配
//		query = parser.parse("name:j*");
		
		//通配符默认不能放在首位
//		query = parser.parse("email:*@itat.org");
		
		//匹配name中没有mike但是content中必须有football的,+和-要放置到域说明前面
		query = parser.parse("- name:mike + like");
		
		//匹配一个区间,注意:TO必须是大写
		//query = parser.parse("id:[1 TO 6]");
		
		//闭区间匹配只会匹配到2
		//query = parser.parse("id:{1 TO 3}");
		
		//完全匹配I Like Football的
		//query = parser.parse("\"I like football\"");
		
		//匹配I 和football之间有一个单词距离的
		//query = parser.parse("\"I football\"~1");
		
		//模糊查询
		//query = parser.parse("name:make~");
		
		//没有办法匹配数字范围(自己扩展Parser)
		//query = parser.parse("attach:[2 TO 10]");
		su.searchByQueryParse(query, 10);
	}
	
	@Test
	public void indexFile() {
		FileIndexUtils.index(true);
	}
	
	@Test
	public void testSearchPage01() {
		su.searchPage("java", 2,5);
		System.out.println("-------------------------------");
//		su.searchNoPage("java");
		su.searchPageByAfter("java", 2,2);
	}
	
	@Test
	public void testSearchPage02() {
		su.searchPageByAfter("java", 3,20);
	}
	
}


4、创建索引的类

package org.itat.index;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

public class FileIndexUtils {
	private static Directory directory = null;
	private static Analyzer analyzer = new IKAnalyzer();
	static{
		try {
			directory = FSDirectory.open(new File("F:\\Workspaces\\lucenes\\02_lucene_searcher\\index"));
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	public static Directory getDirectory() {
		return directory;
	}
	
	public static void index(boolean hasNew) {
		IndexWriter writer = null;
		try {
			writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, analyzer));
			if(hasNew) {
				writer.deleteAll();
			}
			File file = new File("F:\\Workspaces\\lucenes\\02_lucene_searcher\\resource");
			Document doc = null;
			for(File f:file.listFiles()) {
				doc = new Document();
				doc.add(new Field("content",FileUtils.readFileToString(f),Field.Store.YES,Field.Index.ANALYZED));
				doc.add(new Field("filename",f.getName(),Field.Store.YES,Field.Index.ANALYZED));
				doc.add(new Field("path",f.getAbsolutePath(),Field.Store.YES,Field.Index.ANALYZED));
				doc.add(new NumericField("date",Field.Store.YES,true).setLongValue(f.lastModified()));
				doc.add(new NumericField("size",Field.Store.YES,true).setIntValue((int)(f.length())));
				writer.addDocument(doc);
			}
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			try {
				if(writer!=null) writer.close();
			} catch (CorruptIndexException e) {
				e.printStackTrace();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
	}
}

5、对索引进行操作的类

package org.itat.index;

import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.StaleReaderException;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;


public class IndexUtil {
	private String[] ids = {"1","2","3","4","5","6"};
	private String[] emails = {"aa@itat.org","bb@itat.org","cc@cc.org","dd@sina.org","ee@zttc.edu","ff@itat.org"};
	private String[] contents = {
			"welcome to visited the space,I like book",
			"hello boy, I like pingpeng ball",
			"my name is cc I like game",
			"I like football",
			"I like football and I like basketball too",
			"I like movie and swim"
	};
	private Date[] dates = null;
	private int[] attachs = {2,3,1,4,5,5};
	private String[] names = {"zhangsan","lisi","john","jetty","mike","jake"};
	private Directory directory = null;
	private Map<String,Float> scores = new HashMap<String,Float>();
	
	public IndexUtil() {
		setDates();
		scores.put("itat.org",2.0f);
		scores.put("zttc.edu", 1.5f);
		directory = new RAMDirectory();
		index();
	}
	
	
	
	private void setDates() {
		SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
		try {
			dates = new Date[ids.length];
			dates[0] = sdf.parse("2010-02-19");
			dates[1] = sdf.parse("2012-01-11");
			dates[2] = sdf.parse("2011-09-19");
			dates[3] = sdf.parse("2010-12-22");
			dates[4] = sdf.parse("2012-01-01");
			dates[5] = sdf.parse("2011-05-19");
		} catch (ParseException e) {
			e.printStackTrace();
		}
	}

	public void undelete() {
		//使用IndexReader进行恢复
		try {
			IndexReader reader = IndexReader.open(directory,false);
			//恢复时,必须把IndexReader的只读(readOnly)设置为false
			reader.undeleteAll();
			reader.close();
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (StaleReaderException e) {
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	public void merge() {
		IndexWriter writer = null;
		try {
			writer = new IndexWriter(directory,
					new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35)));
			//会将索引合并为2段,这两段中的被删除的数据会被清空
			//特别注意:此处Lucene在3.5之后不建议使用,因为会消耗大量的开销,
			//Lucene会根据情况自动处理的
			writer.forceMerge(2);
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			try {
				if(writer!=null) writer.close();
			} catch (CorruptIndexException e) {
				e.printStackTrace();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
	}
	
	public void forceDelete() {
		IndexWriter writer = null;
		
		try {
			writer = new IndexWriter(directory,
					new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35)));
			writer.forceMergeDeletes();
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			try {
				if(writer!=null) writer.close();
			} catch (CorruptIndexException e) {
				e.printStackTrace();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
	}
	
	public void delete() {
		IndexWriter writer = null;
		
		try {
			writer = new IndexWriter(directory,
					new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35)));
			//参数是一个选项,可以是一个Query,也可以是一个term,term是一个精确查找的值
			//此时删除的文档并不会被完全删除,而是存储在一个回收站中的,可以恢复
			writer.deleteDocuments(new Term("id","1"));
			writer.commit();
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			try {
				if(writer!=null) writer.close();
			} catch (CorruptIndexException e) {
				e.printStackTrace();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
	}
	
	
	public void update() {
		IndexWriter writer = null;
		try {
			writer = new IndexWriter(directory,
					new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35)));
			/*
			 * Lucene并没有提供更新,这里的更新操作其实是如下两个操作的合集
			 * 先删除之后再添加
			 */
			Document doc = new Document();
			doc.add(new Field("id","11",Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
			doc.add(new Field("email",emails[0],Field.Store.YES,Field.Index.NOT_ANALYZED));
			doc.add(new Field("content",contents[0],Field.Store.NO,Field.Index.ANALYZED));
			doc.add(new Field("name",names[0],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
			writer.updateDocument(new Term("id","1"), doc);
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			try {
				if(writer!=null) writer.close();
			} catch (CorruptIndexException e) {
				e.printStackTrace();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
	}
	
	public void query() {
		try {
			IndexReader reader = IndexReader.open(directory);
			//通过reader可以有效的获取到文档的数量
			System.out.println("numDocs:"+reader.numDocs());
			System.out.println("maxDocs:"+reader.maxDoc());
			System.out.println("deleteDocs:"+reader.numDeletedDocs());
			reader.close();
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	public void index() {
		IndexWriter writer = null;
		try {
			writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
			writer.deleteAll();
			Document doc = null;
			for(int i=0;i<ids.length;i++) {
				doc = new Document();
				doc.add(new Field("id",ids[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
				doc.add(new Field("email",emails[i],Field.Store.YES,Field.Index.NOT_ANALYZED));
				doc.add(new Field("content",contents[i],Field.Store.NO,Field.Index.ANALYZED));
				doc.add(new Field("name",names[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
				//存储数字
				doc.add(new NumericField("attach",Field.Store.YES,true).setIntValue(attachs[i]));
				//存储日期
				doc.add(new NumericField("date",Field.Store.YES,true).setLongValue(dates[i].getTime()));
				String et = emails[i].substring(emails[i].lastIndexOf("@")+1);
				System.out.println(et);
				if(scores.containsKey(et)) {
					doc.setBoost(scores.get(et));
				} else {
					doc.setBoost(0.5f);
				}
				writer.addDocument(doc);
			}
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			try {
				if(writer!=null)writer.close();
			} catch (CorruptIndexException e) {
				e.printStackTrace();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
	}
}

工程下载地址:http://download.csdn.net/detail/wxwzy738/5256553



  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值