compass如何实现文件搜索

把文件包装成对象
package fileSearcherTest;

import org.compass.annotations.Index;
import org.compass.annotations.Searchable;
import org.compass.annotations.SearchableId;
import org.compass.annotations.SearchableProperty;
import org.compass.annotations.Store;

@Searchable
public class TextFile {

private String path;
private String title;
private String content;
private long lastModified;

@SearchableId
public String getPath() {
return path;
}

public void setPath(String path) {
this.path = path;
}

@SearchableProperty(boost=3.0f,index=Index.TOKENIZED,store=Store.YES)
public String getTitle() {
return title;
}

public void setTitle(String title) {
this.title = title;
}
/*
* 思考:content是否存储 ,如果文件很大存储会占用大量空间
*/
@SearchableProperty(index=Index.TOKENIZED,store=Store.YES)
public String getContent() {
return content;
}

public void setContent(String content) {
this.content = content;
}

@SearchableProperty(index=Index.NO,store=Store.YES)
public long getLastModified() {
return lastModified;
}

public void setLastModified(long lastModified) {
this.lastModified = lastModified;
}

}



编写对文件的搜索类
package fileSearcherTest;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import jeasy.analysis.MMAnalyzer;

import org.apache.lucene.analysis.Analyzer;
import org.compass.annotations.config.CompassAnnotationsConfiguration;
import org.compass.core.Compass;
import org.compass.core.CompassHighlighter;
import org.compass.core.CompassHits;
import org.compass.core.CompassSession;
import org.compass.core.CompassTransaction;

public class FileSearcher {

private Compass compass;

private Analyzer analyzer = new MMAnalyzer();

public FileSearcher() {
this.compass = new CompassAnnotationsConfiguration().setConnection(
"./index").addClass(TextFile.class).setSetting(
"compass.engine.highlighter.default.formatter.simple.pre",
"<font color='red'>").setSetting(
"compass.engine.highlighter.default.formatter.simple.post",
"</font>").setSetting(
"compass.engine.analyzer.default.type", analyzer)
.buildCompass();
Runtime.getRuntime().addShutdownHook(new Thread() {
public void run() {
compass.close();
}
});
}

/*
* 给单个文件建索引 参数:字符串
*/
public void indexFile(String filePath) {
File file = null;
file = new File(filePath);
indexFile(file);
}

/*
* 给单个文件建索引 参数:File
*/
public void indexFile(File file) {
StringBuilder sb = new StringBuilder();
BufferedReader reader = null;
/** ********解析Txt文件**************** */
try {
reader = new BufferedReader(new FileReader(file));
for (;;) {
String line = reader.readLine();
if (line == null) {
break;
}
sb.append(line).append("\n");
}
} catch (FileNotFoundException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
} catch (IOException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
} finally {
if (reader != null) {
try {
reader.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
/** *********包装成对象************* */
TextFile tf = new TextFile();
tf.setPath(file.getAbsolutePath());
tf.setTitle(file.getName());
tf.setContent(sb.toString());
tf.setLastModified(file.lastModified());
/** *********索引对象**************** */
CompassSession session = null;
CompassTransaction tx = null;
try {
session = compass.openSession();
tx = session.beginLocalTransaction();
session.create(tf);
tx.commit();
} catch (RuntimeException e) {
tx.rollback();
throw e;
} finally {
if (session != null) {
session.close();
}
}
}

/*
* 给目录下的所有文件建索引
*/
public void index(File file) {
if (file.isFile()) { // 如果是文件就建索引并保存
indexFile(file);
return;
}
File[] childs = file.listFiles();
for (int i = 0; i < childs.length; i++) {
File f = childs[i];
if (f.isDirectory()) {// 如果是目录就递归调用
index(f);
} else {
indexFile(f);
}
}
}

/*
* 给目录下的所有文件建索引
*/
public void index(String filePath) {
File file = null;
file = new File(filePath);
index(file);
}

/*
* 删除索引 删除索引是根据索引的id来删除
*/
public void unIndex(File file) {
TextFile tf = new TextFile();
tf.setPath(file.getAbsolutePath());

CompassSession session = null;
CompassTransaction tx = null;
try {
session = compass.openSession();
tx = session.beginLocalTransaction();
session.delete(tf);
tx.commit();
} catch (RuntimeException e) {
tx.rollback();
throw e;
} finally {
if (session != null) {
session.close();
}
}
}

/*
* 返回搜索的结果集 page_index:当前第几页 page_size:每页的大小
*/
public List<TextFile> search(String queryString, int page_index,
int page_size) {
CompassSession session = null;
CompassTransaction tx = null;

try {
session = compass.openSession();
tx = session.beginTransaction();
CompassHits hits = session.find(queryString);
int n = hits.length();
if (n == 0) {
return Collections.emptyList();
}
List<TextFile> tfs = new ArrayList<TextFile>(n);
/** *********设置分页************** */
int start_index = (page_index - 1) * page_size;
int end_index = start_index + page_size;
if (end_index > n) {
end_index = n;
}
/** ************************ */
CompassHighlighter highlighter = null;
TextFile tf = null;
for (int i = start_index; i < end_index; i++) {
/** ********设置高亮器************* */
highlighter = hits.highlighter(i);
tf = (TextFile) hits.data(i);
tf.setTitle(highlighter.fragment("title", tf.getTitle()));
tf.setContent(highlighter.fragment("content", tf.getContent()));
/** ************************** */
tfs.add(tf);
}
hits.close();
tx.commit();
return tfs;
} catch (RuntimeException e) {
tx.rollback();
throw e;
} finally {
if (session != null) {
session.close();
}
}
}

}


编写测试类man
package fileSearcherTest;

import java.util.List;

import org.junit.Test;

public class Man {

private FileSearcher fileSearcher = new FileSearcher();

@Test
public void testIndex() {
//String dir = "E:\\EclipseStudyWorkspace\\Compass\\lucenceDataSource" ;
String dir = "E:\\EclipseStudyWorkspace\\Compass\\lucenceDataSource2" ;

fileSearcher.index(dir) ;
}
@Test
public void testFileSearch() {
List<TextFile> tfs = fileSearcher.search("吴朝辉", 1, 100) ;
System.out.println("Results : "+tfs.size());
for(TextFile tf:tfs){
System.out.println(tf.getPath());
System.out.println("content:" +tf.getContent());
}
}

}



这里测试的只是txt和doc文件 其他的文件爱你类型必须先做解析
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值