BM方案模式匹配的Java代码实现

速度还算快,例子里比较的文件一共371个,3,293,472字节,比较时间不超过2秒。
不过我的机器也很好,CPU: Athelon 64 X2 Dual 5200+,Mem: 2GB DDR2 667。


package cn.sh.huang;

import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.text.DateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;

/**
*
* @author Huang, Haixu
*/
public class Main
{
/**
* @param args the command line arguments
*/
public static void main(String[] args) throws FileNotFoundException,
IOException
{
Calendar c = Calendar.getInstance();
FileFilter filter = new FileFilter()
{
String s = "*.java";
{
s = s.replace('.', '#').replaceAll("#", "\\\\.");
s = s.replace('*', '#').replaceAll("#", ".*");
s = s.replace('?', '#').replaceAll("#", ".?");
s = "^" + s + "$";
}
Pattern p = Pattern.compile(s);

public boolean accept(File file)
{
return file.isDirectory() ? true : (p.matcher(file.getName()).
matches());
}
};
List idxList = checkFolder("C:\\Program Files\\Java\\jdk1.6.0_13\\demo",
filter, "DocumentEvent".getBytes("US-ASCII"));
for (int i = 0, size = idxList.size(); i < size; i++) {
System.out.println(idxList.get(i));
}
DateFormat df = DateFormat.getTimeInstance();

System.out.println("From " + df.format(c.getTime())
+ " to " + df.format(Calendar.getInstance().getTime()));
}

private static List checkFolder(String folderName, FileFilter filter,
byte[] pattern) throws FileNotFoundException, IOException
{
File folder = new File(folderName);
File[] files = folder.listFiles(filter);
if (files == null) {
return null;
}
List list = new ArrayList();
for (int i = 0; i < files.length; i++) {
File file = files[i];
String fileName = file.getAbsolutePath();
if (file.isDirectory()) {
List subList = checkFolder(fileName, filter, pattern);
if (subList != null) {
list.addAll(subList);
}
} else {
int[] idxz = checkFile(fileName, pattern);
if (idxz.length > 0) {
StringBuffer sb = new StringBuffer(fileName + "# ");
for (int j = 0; j < idxz.length; j++) {
sb.append(idxz[j]).append(" ");
}
list.add(sb.toString());
}
}
}
return list;
}

private static int[] checkFile(String fileName, byte[] pattern) throws
FileNotFoundException, IOException
{
File file = new File(fileName);
int fileLen = (int) file.length();
FileInputStream fis = new FileInputStream(file);
return getPatternIndexz(fis, fileLen, 0, pattern);
}

private static int[] getPatternIndexz(FileInputStream fis, int fileLen,
int index, byte[] pattern) throws IOException
{
fis.skip(index);
final Rule[] rules = getShiftRule(pattern);
byte[] buffer = new byte[pattern.length];
List idxList = new ArrayList();
int shift = pattern.length;

while (fileLen > shift) {
int remain = pattern.length - shift;
if (remain > 0) {
System.arraycopy(buffer, shift, buffer, 0, remain);
}
int readed = 0;
do {
readed = fis.read(buffer, remain + readed, shift - readed);
} while (shift > readed);
fileLen -= shift;

shift = match(buffer, pattern, rules);
if (shift == 0) {
idxList.add(new Integer(index));
shift = pattern.length;
}
index += shift;
}
int[] idxz = new int[idxList.size()];
for (int i = 0; i < idxz.length; i++) {
idxz[i] = ((Integer) idxList.get(i)).intValue();
}
return idxz;
}

private static Rule[] getShiftRule(final byte[] pattern)
{
int endPos = pattern.length - 1;
List idxList = new ArrayList();
for (int i = endPos - 1; i >= 0; i--) {
idxList.add(new Integer(i));
}
List ruleList = new ArrayList();
Set flagSet = new HashSet();
for (int i = endPos; i >= 0 && idxList.size() > 0; i--) {
byte p = pattern[i];
List shadowIdxList = new ArrayList();
for (int j = 0, size = idxList.size(); j < size; j++) {
int idx = ((Integer) idxList.get(j)).intValue();
int pos = idx - (endPos - i);
if (pos < 0) {
ruleList.add(new Rule(i, null, endPos - idx));
} else {
byte pp = pattern[pos];
if (pp != p) {
Byte ppp = new Byte(pp);
if (!flagSet.contains(ppp)) {
flagSet.add(ppp);
ruleList.add(new Rule(i, ppp, endPos - idx));
}
} else {
shadowIdxList.add(idxList.get(j));
}
}
}
flagSet.clear();
idxList = shadowIdxList;
}
return (Rule[]) ruleList.toArray(new Rule[ruleList.size()]);
}

private static int match(final byte[] buffer, final byte[] pattern,
Rule[] rules)
{
int default_shift = pattern.length;
for (int i = pattern.length - 1; i >= 0; i--) {
byte b = buffer[i], p = pattern[i];
if (b != p) {
for (int j = 0; j < rules.length; j++) {
Rule rule = rules[j];
Byte pp = rule.getP();
if (pp == null) {
default_shift = rule.getShift();
continue;
}
int idx = rule.getIdx();
if (i < idx) { // Next rule
continue;
} else if (i == idx) {
if (pp.byteValue() == b) {
return rule.getShift();
}
} else {
return default_shift;
}
}
return default_shift; // No matching rule
}
}
return 0;
}
}

final class Rule
{
private final int idx;
private final Byte p;
private final int shift;

public Rule(final int idx, final Byte p, final int shift)
{
this.idx = idx;
this.p = p;
this.shift = shift;
}

/**
* @return the idx
*/
public int getIdx()
{
return idx;
}

/**
* @return the p
*/
public Byte getP()
{
return p;
}

/**
* @return the shift
*/
public int getShift()
{
return shift;
}
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值