lucene搜索引擎(文件索引、数据库索引)
第一步是:搜集数据创建索引文件,供搜索时索引
//1
private static IndexWriter writer; // new index being built
//创建索引初始化indexwriter
private boolean createWriter(String IdxScrDir, boolean bool) {
try {
writer = new IndexWriter(IdxScrDir, new StandardAnalyzer(), bool);
writer.setUseCompoundFile(false);
writer.setMergeFactor(100);
writer.setMaxMergeDocs(1000);
writer.setMaxFieldLength(Integer.MAX_VALUE);
writer.setMaxBufferedDocs(Integer.MAX_VALUE);
return true;
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return false;
}
//2
private String[] getFeildByPath(String path){
if (path.indexOf("]") != -1) {
String[] s = path.split("]");
int start = 0;
for (int i = 0; i < s.length; i++) {
start = s[i].indexOf("[");
if (start != -1) {
s[i] = s[i].substring(start + 1);
} else
s[i] = null;
}
return s;
}else{
return null;
}
}
//从数据库查找数据创建索引
private boolean createsqlDocument(searchtables[] st) {
String sql = "";
for (int i = 0; i < st.length; i++) {
String urlpath = new String(st[i].getpath());
System.out.println(urlpath);
String[] s = getFeildByPath(urlpath);
sql = st[i].gettablename();
try {
ResultSet rs = Global.getInstance().getDataSource(st[i].getdbname()).ExecuteSQL(sql);
System.out.println(rs.getCount());
System.out.println(rs.getSize());
for (int j = 0; j < rs.getCount(); j++) {
urlpath = st[i].getpath();
if(s!=null){
for (int j2 = 0; j2 < s.length; j2++) {
if (s[j2]!=null) {
ResultSetMetaData rsmd = rs.getMetaData(s[j2]);
if(rsmd.getType()==93){
String date = rs.GetCellValue(j, s[j2]);
String newdate = date.substring(0, 4) + "/" +date.substring(5,7) + "/" +date.substring(8, 10);
urlpath = urlpath.replace("["+s[j2]+"]", newdate);
}else{
urlpath = urlpath.replace("["+s[j2]+"]", rs.GetCellValue(j, s[j2]));
}
}
}
}
Document doc = new Document();
String content = rs.GetCellValue(j, st[i].getcontents());
if (content != null && !content.equals("")) {
content.replaceAll(
"<head>.*</head>|<script>.*?</script>", "")
.replaceAll("<.*?>|&.{2,5};", "");
}
doc.add(new Field("id", rs.GetCellValue(j, st[i].gettitleid()),Field.Store.YES, Field.Index.TOKENIZED));
doc.add(new Field("title", rs.GetCellValue(j, st[i].gettitle()),Field.Store.YES, Field.Index.TOKENIZED));
// doc.add(new Field("content", rs.GetCellValue(j, st[i].getcontents()),Field.Store.YES, Field.Index.TOKENIZED));
doc.add(new Field("content", content,Field.Store.YES, Field.Index.TOKENIZED));
doc.add(new Field("path", urlpath,Field.Store.YES, Field.Index.TOKENIZED));
doc.add(new Field("type", rs.GetCellValue(j, st[i].gettypename()),Field.Store.YES, Field.Index.TOKENIZED));
writer.addDocument(doc);
}
} catch (Exception e) {
e.printStackTrace();
return false;
}
}
return true;
}
//3
/**
* 优化索引
*
* @throws CorruptIndexException
* @throws IOException
*/
private void optimizeWriter() throws CorruptIndexException, IOException {
writer.optimize();
}
/**
* 关闭索引
*
* @throws CorruptIndexException
* @throws IOException
*/
private void colseWriter() throws CorruptIndexException, IOException {
writer.close();
}
//4
public boolean createIndex(String dirpath,boolean bool,searchtables[] st){
boolean flag = true;
try {
flag = createWriter(dirpath, bool);
if(flag){
flag = createsqlDocument(st);
optimizeWriter();
colseWriter();
}
} catch (Exception e) {
e.printStackTrace();
}
return flag;
}