Lucene工作总结

公司项目:portal中期刊文章内容作为大字段存储在Oracle中,首页有一个搜索功能:要求将所有包括搜索字段的文章的标题列出来(文章的内容存储在Oracle的CLOB字段中),也就是要用Lucene实现对数据库的大字段进行索引(索引通过计划任务定时建立索引)和搜索。。。

==================定时建立索引文件:===============

Main方法:

package zxt.lucene.index;

import java.util.Timer;
public class IndexerServer {


/**
* 定时调用建立索引任务
* @author wulihai
* @create 2009-06-02
*/
public static void main(String[] args) {
String propFile = "directory.properties";
Config.setConfigFileName(propFile);
Timer timer = new Timer();
LuceneDBIndexerTask luceneTask=LuceneDBIndexerTask.getInstance();
timer.scheduleAtFixedRate(luceneTask, 0,DataTypeUtil.toLong(Constant.CREATE_INDEX_SLEEP_TIME));
}

}




定时调用建立索引任务:

package zxt.lucene.index;

import java.util.Timer;
public class IndexerServer {


/**
* 定时调用建立索引任务
* @author wulihai
* @create 2009-06-02
*/
public static void main(String[] args) {
String propFile = "directory.properties";
Config.setConfigFileName(propFile);
Timer timer = new Timer();
LuceneDBIndexerTask luceneTask=LuceneDBIndexerTask.getInstance();
timer.scheduleAtFixedRate(luceneTask, 0,DataTypeUtil.toLong(Constant.CREATE_INDEX_SLEEP_TIME));
}

}




建立索引的核心实现:

package zxt.lucene.index;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.StringWriter;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Date;
import java.util.TimerTask;

import oracle.sql.CLOB;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;

/**
* 建立索引的任务类
* @author wulihai
* @create 2009-06-02
*/
public class LuceneDBIndexerTask extends TimerTask {
//缺省索引目录
private static String DEFAULT_INDEX_DIR="C:\\IndexDB";
//临时索引目录的父目录
private File parentDir=null;
//被搜索的索引文件
private static LuceneDBIndexerTask index=new LuceneDBIndexerTask();

//构造方法
private LuceneDBIndexerTask(){
String dirStr=Constant.INDEX_STORE_DIRECTORY;
if(dirStr!=null&&!"".equals(dirStr)){
this.parentDir=new File(dirStr);

}else{
this.parentDir=new File(DEFAULT_INDEX_DIR);
}

if(!this.parentDir.exists()){
this.parentDir.mkdir();
}
}

/**
* 单实例访问接口
* @return
*/
public static LuceneDBIndexerTask getInstance(){
return index;
}

/**
* 锁定目录以及文件
* 只允许单线程访问
*
*/
/*public synchronized void singleRunning(){
if(flag==false){
flag=true;
run(parentDir);
}
}*/

/**
* 为数据库字段建立索引
*/
public void run() {
System.out.println("====LuceneDBIndexerTask$run()===============");

System.out.println("~~~开始建立索引文件~~~~~~~~~~~~~~~");
Connection conn=null;
Statement stmt=null;
ResultSet rs=null;
try {
Class.forName(Constant.DB_DRIVER_STRING);
conn = DriverManager.getConnection(Constant.DB_URI_STRING, Constant.DB_USERNAME, Constant.DB_PWD);
stmt = conn.createStatement();
rs = stmt.executeQuery(Constant.DB_QUERY_STRING);
File file=new File(parentDir+File.separator+new SimpleDateFormat("yyyyMMddHHmmss").format(new Date())+File.separator);
if(!file.exists()){
file.mkdir();
}
IndexWriter writer = new IndexWriter(file,new StandardAnalyzer(), true);
long startTime = new Date().getTime();
while (rs.next()) {
Document doc = new Document();
doc.add(new Field("ARTICLEID", rs.getString("ARTICLEID"), Field.Store.YES,Field.Index.TOKENIZED));
doc.add(new Field("TITLE", rs.getString("TITLE"), Field.Store.YES,Field.Index.TOKENIZED));
doc.add(new Field("USERNAME", rs.getString("USERNAME"), Field.Store.YES,Field.Index.TOKENIZED));
doc.add(new Field("USERID", rs.getString("USERID"), Field.Store.YES,Field.Index.TOKENIZED));
//对日期建立索引
String createdate=new SimpleDateFormat("yyyy-MM-dd").format(rs.getTimestamp("CREATEDATE"));
doc.add(new Field("CREATEDATE", createdate, Field.Store.YES,Field.Index.TOKENIZED));
//对大字段建立索引
BufferedReader in=null;
String content="";
CLOB clob = (CLOB) rs.getClob("CONTENT");
if (clob != null) {
//得到一个读入流
in=new BufferedReader(clob.getCharacterStream());
StringWriter out=new StringWriter();
int c;
while((c=in.read())!=-1){
out.write(c);
}
content=out.toString();
}
doc.add(new Field("CONTENT", content, Field.Store.YES, Field.Index.TOKENIZED));
writer.addDocument(doc);
}
writer.optimize();
writer.close();

//测试一下索引的时间
long endTime = new Date().getTime();
System.out.println("索引文件"+file.getPath()+"建立成功...");
System.out.println("这花费了" + (endTime - startTime) + " 毫秒来把文档增加到索引里面去!");


//判断文件目录file下的文件个数如果大于3,就将文件建立最早的文件给删除掉
checkFiles(parentDir);

} catch (IOException e) {
e.printStackTrace();
} catch (SQLException e) {
e.printStackTrace();
} catch (ClassNotFoundException e) {
e.printStackTrace();

}finally{
try {
if(rs!=null){
rs.close();
}
if(stmt!=null){
stmt.close();
}
if(conn!=null){
conn.close();
}
} catch (SQLException e) {
e.printStackTrace();
}
}
}

/**
* 判断文件目录file下的文件个数如果大于3,就将文件建立最早的文件给删除掉
*/
public void checkFiles(File dir) {
int length=dir.listFiles().length;
while(length>3){
//删除生成最早的文件
File [] files=dir.listFiles();
String[] names=dir.list();
Arrays.sort(names);
File deletefile=files[0];
deleteDirectory(deletefile);
length--;
}

}

/*
* 递归删除一个目录以及下面的文件
*/
public boolean deleteDirectory(File path) {
if( path.exists() ) {
File[] files = path.listFiles();
for(int i=0; i<files.length; i++) {
if(files[i].isDirectory()) {
deleteDirectory(files[i]);

}
else {
//删除文件
files[i].delete();
}
}
}
//删除目录
boolean hasdelete=path.delete();
if(hasdelete){
System.out.println("删除索引目录"+path);

}
return hasdelete;


}





public static void main(String[] args) {
new LuceneDBIndexerTask().run();

}


}





配置文件管理类:

package zxt.lucene.index;

import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;

/**
*
* @author wulihai
* @create 2009-06-02
*
*/
public class Config {

private static Config cfg = null;

private static String configFileName = null;

private Properties props;

public Config() {
props = new java.util.Properties();
}

/**
* 单例访问接口
* @return
*/
public synchronized static Config getInstance() {
if (cfg == null) {
cfg = new Config();
cfg.loadConfig();
return cfg;
} else {
return cfg;
}

}

private int loadConfig() {
if (configFileName != null || configFileName.length() > 0) {
InputStream inputStream = Config.class.getClassLoader()
.getResourceAsStream("directory.properties");
System.out.println("configFileName=" + configFileName);
try {
props.load(inputStream);
} catch (IOException e) {
e.printStackTrace();
}
return 1;
}
return 0;

}

public static void setConfigFileName(String cfg) {
configFileName = cfg;
}

public String getProperty(String keyName) {
return props.getProperty(keyName);
}

}



常量配置

package zxt.lucene.index;

/**
* 常量配置类 *
* @author wulihai
* @create 2009-06-02
*/
public class Constant {

// 隔多长时间建立一次索引
public static final String CREATE_INDEX_SLEEP_TIME = Config.getInstance()
.getProperty("create_index_sleep_time");

// 索引文件存放路径
public static final String INDEX_STORE_DIRECTORY = Config.getInstance()
.getProperty("index_store_directory");
//数据库驱动程序
public static final String DB_DRIVER_STRING = Config.getInstance()
.getProperty("db_driver_string");
//数据库连接URI
public static final String DB_URI_STRING = Config.getInstance()
.getProperty("db_uri_string");
//数据库连接username
public static final String DB_USERNAME= Config.getInstance()
.getProperty("db_username");
//数据库连接pwd
public static final String DB_PWD= Config.getInstance()
.getProperty("db_pwd");
//数据库查询语句db_query_str
public static final String DB_QUERY_STRING= Config.getInstance()
.getProperty("db_query_string");

}




数据类型处理类:

package zxt.lucene.index;

/**
* 数据类型转换工具类
* @author wulihai
* @create 2009-06-02
*/
public class DataTypeUtil {
/**
* 将对象转换为整数型
* @param o 源对象
* @return 对应的Long值,如果出错,则返回Long.MIN_VALUE
*/
public static long toLong(Object o) {
if (o == null) {
throw new IllegalArgumentException("该对象为空");
}
String s = o.toString();
try {
return Long.parseLong(s);
} catch (Exception ex) {
return Long.MAX_VALUE;
}
}
}





配置文件 :



#== the directory for store lucene-index ========#
index_store_directory=D:/lucene/indexDB/

#======== two hours ========#
#create_index_sleep_time=7200000

#======== two minutes ========#
create_index_sleep_time=120000

db_driver_string=oracle.jdbc.driver.OracleDriver
db_uri_string=jdbc:oracle:thin:@localhost:1521:lportal
db_username=lportal
db_pwd=lportal
db_query_string=SELECT * from journalarticle



==================搜索类:===============

核心搜索类:

package com.liferay.portal.util;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import com.liferay.portlet.journal.model.JournalArticle;

/**
* 负责搜索的类
*/
public class LuceneDBQuery {

private static LuceneDBQuery search = new LuceneDBQuery();

// 构造方法
private LuceneDBQuery() {

}

/**
* 单实例访问接口
*
* @return
*/
public static LuceneDBQuery getInstance() {
return search;
}

/**
* 搜索方法
*
* @throws java.text.ParseException
* @throws Exception
*/
public List search(String queryString) {
int count = 0;
long startTime = new Date().getTime();
Hits hits = null;

// 搜索目录
File searchDir = null;
Query query = null;
InputStream inputStream=null;;
String filePath="index.xml";
String indexDir="";
indexDir= LuceneDBQueryUtil.getIndexPath();

if (indexDir != null && !"".equals(indexDir)) {
searchDir = new File(indexDir);
if(!searchDir.exists()){
searchDir.mkdir();
}
}
// 这里注意索引存放的目录的父目录
// searchDir=new File("E:\\index\\indexDB\\");
File targetDir = getTargetDir(searchDir);
IndexSearcher searcher = null;
List results = new ArrayList();

try {
Directory dir=FSDirectory.getDirectory(targetDir,false);
searcher = new IndexSearcher(dir);
} catch (Exception e1) {
e1.printStackTrace();
System.out.println("创建索引对象出现异常...");
}
Analyzer analyzer = new StandardAnalyzer();

// 构建查询对象Query,对CONTENT字段进行搜索
QueryParser qp = new QueryParser("CONTENT", analyzer);
try {
query = qp.parse(queryString);
} catch (ParseException e1) {
e1.printStackTrace();
}

if (searcher != null) {

// 得到搜索结果Hits
try {
hits = searcher.search(query);
} catch (IOException e1) {
System.out.println("查询索引库出现异常...");
e1.printStackTrace();
}
// 查到的记录条数
count = hits.length();
if (hits.length() > 0) {
for (int i = 0; i < hits.length(); i++) {// 输出搜索信息
JournalArticle article = new JournalArticle();
Document document = null;
try {
document = hits.doc(i);
} catch (Exception e1) {
System.out.println("返回查询结果集出现异常...");
e1.printStackTrace();
}
try {
article.setDisplayDate(new SimpleDateFormat("yyyyMMdd")
.parse(document.get("CREATEDATE")));
article.setCreateDate(new SimpleDateFormat("yyyyMMdd")
.parse(document.get("CREATEDATE")));
} catch (java.text.ParseException e) {
e.printStackTrace();
}
article.setTitle(document.get("TITLE"));
article.setArticleId(document.get("ARTICLEID"));
article.setUserName(document.get("USERNAME"));
article.setUserId(document.get("USERID"));
results.add(article);
}
// 测试一下索引的时间
long endTime = new Date().getTime();
System.out.println("查询过程花费了" + (endTime - startTime) + " 毫秒!");
} else {
System.out.println("0个结果!");
}
}

return results;

}

/**
* 确定搜索索引所在目录目录
*/
private File getTargetDir(File dir) {
int length = dir.listFiles().length;
File searchFile = null;

// length=3的时候最多
// 同时搜索和同时建索引的时候会出现length=4
if (length >= 2) {
// 找到次最新建立的索引文件
String[] names = dir.list();
Arrays.sort(names);
searchFile = new File(dir + File.separator + names[length - 2]);
}
if (length == 1) {
File files[] = dir.listFiles();
searchFile = files[0];
}
if (length == 0) {
// 如果没有索引文件则,建立第一个索引
// TestDBIndexer.getInstance().isInstanceRunning();
// search();
}

return searchFile;
}
//
// public static void main(String[] args) throws Exception {
// new LuceneDBQuery().search("纳税人");
// }

}




配置文件管理类:


package com.liferay.portal.util;

import java.io.IOException;

import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;

public class LuceneDBQueryUtil {

public static String getIndexPath(){

String filePath = "zxt_index.xml";
String indexPath="";
SAXBuilder builder = new SAXBuilder(false);
try {
Document doc = builder.build(Thread.currentThread().getContextClassLoader().getResource(filePath));
Element rootElement = doc.getRootElement();
Element index=rootElement.getChild("index");
indexPath=index.getText();
System.out.println(indexPath);
} catch (JDOMException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return indexPath;


}
}



配置文件:zxt_index.xml


<?xml version="1.0" encoding="UTF-8"?>
<list>
<index>D:\\index\\IndexDB</index>
</list>


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值