1 package lucene5; 2 3 import java.io.IOException; 4 import java.nio.file.Paths; 5 import java.text.SimpleDateFormat; 6 import java.util.Date; 7 import org.apache.lucene.analysis.standard.StandardAnalyzer; 8 import org.apache.lucene.document.Document; 9 import org.apache.lucene.document.Field; 10 import org.apache.lucene.document.IntField; 11 import org.apache.lucene.document.LongField; 12 import org.apache.lucene.document.TextField; 13 import org.apache.lucene.index.DirectoryReader; 14 import org.apache.lucene.index.IndexReader; 15 import org.apache.lucene.index.IndexWriter; 16 import org.apache.lucene.index.IndexWriterConfig; 17 import org.apache.lucene.index.Term; 18 import org.apache.lucene.queryparser.classic.ParseException; 19 import org.apache.lucene.queryparser.classic.QueryParser; 20 import org.apache.lucene.search.IndexSearcher; 21 import org.apache.lucene.search.Query; 22 import org.apache.lucene.search.ScoreDoc; 23 import org.apache.lucene.search.TopDocs; 24 import org.apache.lucene.store.Directory; 25 import org.apache.lucene.store.FSDirectory; 26 public class Lucene5Utils { 27 28 private static String[] ids={"1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","23","24","25","26","27","28","29","30","31"}; 29 private static String[] emails={"aa@aa.org","cc@cc.org","dd@dd.org","bb@bb.org","ee@ee.org","ff@ff.org","cc@cc.org","dd@dd.org","bb@bb.org","ee@ee.org","ff@ff.org","cc@cc.org","dd@dd.org","bb@bb.org","ee@ee.org","ff@ff.org","cc@cc.org","dd@dd.org","bb@bb.org","ee@ee.org","ff@ff.org","cc@cc.org","dd@dd.org","bb@bb.org","ee@ee.org","ff@ff.org","cc@cc.org","dd@dd.org","bb@bb.org","ee@ee.org","ff@ff.org"}; 30 private static String[] contents={"welcome to you","hello a boy","hello a girl","how are you","goog luck","oh shit","hello a boy","hello a girl","how are you","goog luck","oh shit","hello a boy","hello a girl","how are you","goog luck","oh shit","hello a boy","hello a girl","how are you","goog luck","oh shit","hello a boy","hello a girl","how are you","goog luck","oh shit","hello a boy","hello a girl","how are you","goog luck","oh shit"}; 31 private static String[] names={"liwu","zhangsan","xiaoqinag","laona","dabao","lisi","zhangsan","xiaoqinag","laona","dabao","lisi","zhangsan","xiaoqinag","laona","dabao","lisi","zhangsan","xiaoqinag","laona","dabao","lisi","zhangsan","xiaoqinag","laona","dabao","lisi","zhangsan","xiaoqinag","laona","dabao","lisi"}; 32 private static int[] attachs={1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}; 33 private static Date[] dates=null ; 34 35 //创建索引 36 private static Directory directory = null ; 37 //创建IndexWriter 38 private static IndexWriter indexWriter = null ; 39 //创建IndexReader 40 private static IndexReader indexReader = null ; 41 //创建searcher 42 private static IndexSearcher search = null ; 43 static { 44 try { 45 setDate() ; 46 directory = FSDirectory.open(Paths.get("D:\\lucene\\index")) ; 47 indexWriter = new IndexWriter(directory, new IndexWriterConfig(new StandardAnalyzer())) ; 48 indexReader = DirectoryReader.open(directory) ; 49 search = new IndexSearcher(indexReader); 50 } catch (IOException e) { 51 e.printStackTrace(); 52 } 53 } 54 55 public static void setDate() { 56 SimpleDateFormat sdf=new SimpleDateFormat("yyyy-mm-kk"); 57 try { 58 dates=new Date[ids.length]; 59 dates[0]=sdf.parse("2010-08-17"); 60 dates[1]=sdf.parse("2011-02-17"); 61 dates[2]=sdf.parse("2012-03-17"); 62 dates[3]=sdf.parse("2011-04-17"); 63 dates[4]=sdf.parse("2012-05-17"); 64 dates[5]=sdf.parse("2011-07-17"); 65 dates[6]=sdf.parse("2011-02-17"); 66 dates[7]=sdf.parse("2012-03-17"); 67 dates[8]=sdf.parse("2011-04-17"); 68 dates[9]=sdf.parse("2012-05-17"); 69 dates[10]=sdf.parse("2011-07-17"); 70 dates[11]=sdf.parse("2011-02-17"); 71 dates[12]=sdf.parse("2012-03-17"); 72 dates[13]=sdf.parse("2011-04-17"); 73 dates[14]=sdf.parse("2012-05-17"); 74 dates[15]=sdf.parse("2011-07-17"); 75 dates[16]=sdf.parse("2011-02-17"); 76 dates[17]=sdf.parse("2012-03-17"); 77 dates[18]=sdf.parse("2011-04-17"); 78 dates[19]=sdf.parse("2012-05-17"); 79 dates[20]=sdf.parse("2011-07-17"); 80 dates[21]=sdf.parse("2011-02-17"); 81 dates[22]=sdf.parse("2012-03-17"); 82 dates[23]=sdf.parse("2011-04-17"); 83 dates[24]=sdf.parse("2012-05-17"); 84 dates[25]=sdf.parse("2011-07-17"); 85 dates[26]=sdf.parse("2011-02-17"); 86 dates[27]=sdf.parse("2012-03-17"); 87 dates[28]=sdf.parse("2011-04-17"); 88 dates[29]=sdf.parse("2012-05-17"); 89 dates[30]=sdf.parse("2011-07-17"); 90 } catch (Exception e) { 91 e.printStackTrace(); 92 } 93 } 94 public static void main(String[] args) { 95 // createIndex () ; 96 // delete() ; 97 // search () ; 98 // paserQuery() ; 99 searchBypage02(1,2,"welcome to you"); 100 } 101 102 103 /** 104 * 创建/更新索引 105 */ 106 public static void createIndex () { 107 try { 108 //清空当前所有索引 109 indexWriter.deleteAll() ; 110 //创建写入流 111 // reader = new java.io.BufferedReader(new InputStreamReader(new FileInputStream("D:\\lucene\\files\\产品名称-拼音.txt"),"GBK")); 112 for (int i = 0 ; i<=30; i++) { 113 Document d = new Document(); 114 System.out.println(ids[i]+"-"+emails[i]+"-"+contents[i]+"-"+names[i]+"-"+attachs[i]+"-"+dates[i]); 115 d.add(new Field("id",ids[i],TextField.TYPE_STORED)) ; 116 d.add(new Field("email",emails[i],TextField.TYPE_STORED)) ; 117 d.add(new Field("content",contents[i],TextField.TYPE_NOT_STORED)) ; 118 d.add(new Field("name",names[i],TextField.TYPE_STORED)) ; 119 d.add(new IntField("attach",1,IntField.TYPE_STORED)) ; 120 d.add(new LongField("date",dates[i].getTime(),LongField.TYPE_STORED)) ; 121 indexWriter.addDocument(d) ; 122 } 123 } catch (IOException e) { 124 e.printStackTrace(); 125 } finally { 126 try { 127 //在这里writer必须要关掉或者commit。不然,会写入不进去数据 128 indexWriter.commit() ; 129 // indexWriter.close() ; 130 } catch (IOException e) { 131 e.printStackTrace(); 132 } 133 } 134 } 135 136 //简单的查询 137 public static void search (){ 138 System.out.println(indexReader.maxDoc()); //输出所有的DOC数 139 System.out.println(indexReader.numDocs()); //输出可用的DOC数 140 System.out.println(indexReader.numDeletedDocs()); //输出删除的DOC数 141 try { 142 //获取搜索域--shouzimu 143 QueryParser parser = new QueryParser("email",new StandardAnalyzer()) ; 144 //设置查询条件 145 Query query = parser.parse("@dd.org~"); 146 //设置查找结果 -最多为10条 147 TopDocs tocs = search.search(query,10) ; 148 //遍历查询结果 149 ScoreDoc[] docs = tocs.scoreDocs ; 150 for (ScoreDoc sd : docs) { 151 Document doc = search.doc(sd.doc) ; 152 //这里的content输出为null。因为我们没有保存哦~ 153 System.out.println(doc.get("id")+":" + doc.get("email") + "-" + doc.get("content")+"-" + doc.get("name")+"-" + doc.get("attach")+"-" + doc.get("date")); 154 } 155 } catch (org.apache.lucene.queryparser.classic.ParseException e) { 156 e.printStackTrace(); 157 } catch (IOException e) { 158 e.printStackTrace(); 159 } 160 } 161 162 //删除索引 163 164 public static void delete () { 165 try { 166 //删除,放置在回收站中,可恢复 167 indexWriter.deleteDocuments(new Term("shouzimu","xxxxlxw")) ; 168 //删除,不可恢复 169 indexWriter.forceMergeDeletes() ; 170 indexWriter.commit() ; 171 } catch (IOException e) { 172 e.printStackTrace(); 173 } 174 } 175 176 //5中的方法 暂时不会啊= =、 177 public static void undelete () { 178 } 179 180 //详解ParserQuery 181 public static void paserQuery () { 182 //这里我们定义一个基于标准分词器的Query;搜索域默认为content 183 QueryParser parser = new QueryParser("content",new StandardAnalyzer()) ; 184 // parser.setAllowLeadingWildcard(true) ; 185 try { 186 //在默认域中查找 有you的 187 Query query = parser.parse("you"); 188 //在默认域中查找有 boy 或者girl 的 189 query = parser.parse("boy girl"); 190 query = parser.parse("boy OR girl"); 191 //在name域中查号 name 为xiaoqinag的 192 query = parser.parse("name:xiaoqinag"); 193 query = parser.parse("name : xiaoqinag"); 194 //查找邮箱结尾为@dd.org的 通配符查询不能用?!!! 待解决 195 query = parser.parse("email:a?@aa.org"); //会报错 提示通配符不能放在首位 196 //在默认域中查找 有how 且有 you的 197 query = parser.parse("how AND you"); 198 //在默认域中查找 有hello 但是没有 girl的 千万要注意空格! 199 query = parser.parse("-girl +hello"); 200 //完全匹配hello a boy 201 query = parser.parse("\"hello a girl\""); 202 //显示id 2-4 的闭区间 TO大写 203 query = parser.parse("id:[2 TO 4]"); 204 //显示id 2-4 的开区间 TO大写 205 query = parser.parse("id:{2 TO 4}"); 206 //显示id hello 和 boy 之间有小于1的 207 query = parser.parse("\"hello boy\"~1"); 208 //模糊查询 209 query = parser.parse("name:xiaoqiang~"); 210 query = parser.parse("attach:{2 TO 4}"); 211 TopDocs tdocs = search.search(query, 10) ; 212 ScoreDoc[] sdoc = tdocs.scoreDocs; 213 for (ScoreDoc sd : sdoc) { 214 Document doc = search.doc(sd.doc) ; 215 System.out.println(doc.get("id")+":" + doc.get("email") + "-" + doc.get("content")+"-" + doc.get("name")+"-" + doc.get("attach")+"-" + doc.get("date")); 216 } 217 } catch (ParseException e) { 218 e.printStackTrace(); 219 } catch (IOException e) { 220 e.printStackTrace(); 221 } 222 } 223 224 //分页查询一 225 public static void searchBypage01 (int page,int pageSize,String str) { 226 int start = (page-1) * pageSize ; 227 int end = start + pageSize ; 228 QueryParser parser = new QueryParser("content",new StandardAnalyzer()) ; 229 try { 230 Query query = parser.parse(str) ; 231 TopDocs tdos = search.search(query, 100) ; 232 ScoreDoc[] sds = tdos.scoreDocs ; 233 for (int i = start ;i <end;i++ ) { 234 Document doc = search.doc(sds[i].doc) ; 235 System.out.println(doc.get("id")+":" + doc.get("email") + "-" + doc.get("content")+"-" + doc.get("name")+"-" + doc.get("attach")+"-" + doc.get("date")); 236 } 237 } catch (ParseException e) { 238 e.printStackTrace(); 239 } catch (IOException e) { 240 e.printStackTrace(); 241 } 242 } 243 244 //分页查询二 245 public static void searchBypage02 (int page,int pageSize,String str) { 246 int start = (page-1) * pageSize ; 247 // int end = start + pageSize ; 248 QueryParser parser = new QueryParser("content",new StandardAnalyzer()) ; 249 try { 250 Query query = parser.parse(str) ; 251 TopDocs tdos = search.search(query, 100) ; 252 ScoreDoc[] sds = tdos.scoreDocs ; 253 tdos = search.searchAfter(sds[start], query, pageSize) ; 254 sds = tdos.scoreDocs ; 255 for (ScoreDoc sd : sds) { 256 Document doc = search.doc(sd.doc) ; 257 System.out.println(doc.get("id")+":" + doc.get("email") + "-" + doc.get("content")+"-" + doc.get("name")+"-" + doc.get("attach")+"-" + doc.get("date")); 258 } 259 } catch (ParseException e) { 260 e.printStackTrace(); 261 } catch (IOException e) { 262 e.printStackTrace(); 263 } 264 } 265 266 267 }