java 路径通配符_通配符匹配算法的java实现

看过一些关于通配符匹配的实现不甚满意,于是写了一种java实现,仅供参考。

/**

* 平均O(n+m) 最坏O(n*m)

* 约定 * : ANY>=0 , ? : ANY=1

*

* @author Spance.Wong

*/

static class WildCardMatcher {

/**

* 仅为了方便实验

*

* @param input

* @param pattern

* @return

*/

static List matches(String input, String pattern) {

String[] pa = pattern.split("\\*+"); // 分割不是重点,故未做重点实现

return matches(input, pa);

}

/**

* 从input中查找通配符序列

*

* @param input

* @param patterns

* @return

*/

static List matches(CharSequence input, String[] patterns) {

int n = input.length(), m = patterns.length;

List result = new ArrayList();

for (int i = 0; i < n; ) {

int left = -1, right = -1;

for (int j = 0; j < m; j++) { // 以i为起点,执行m趟匹配,每趟i至少前进p[j].length长度

long region = lookBehind(input, i, patterns[j]);

if (j != 0 && region >= 0) { // 模式序列的第二个开始使用贪婪匹配

long greedyRegion;

for (int k = (int) region + 1; ; k = (int) greedyRegion + 1) {

greedyRegion = lookBehind(input, k, patterns[j]);

if (greedyRegion > 0) // 贪婪找到,继续贪婪尝试

region = greedyRegion;

else

break;

}

}

if (region < 0) { // pattern[j]失败,则本趟失败

i = ((int) -region) + 1;

break;

} else {

i = (int) region + 1;

if (j == 0) // 模式序列的第一个找到,记左边界,在高32位

left = (int) (region >> 32);

if (j == m - 1) // 模式序列的最后一个找到,记右边界,在低32位

right = (int) region;

}

}

if (left >= 0 && right >= 0)

result.add(input.subSequence(left, right + 1).toString());

}

return result;

}

/**

* 在input的i位置开始向后扫描非贪婪查找pattern,在pattern尾匹配时回溯确认

*

* @param in

* @param i

* @param pattern

* @return

*/

static long lookBehind(CharSequence in, int i, CharSequence pattern) {

int len = in.length(), pLen = pattern.length(), _pMax = pLen - 1;

char pEnd = pattern.charAt(_pMax);

if (len - i >= pLen) {

for (i = i + _pMax; i < len; i++) { // 以 i + pLen - 1 起步

if (in.charAt(i) == pEnd || pEnd == '?') { // 与pa末尾相同,i即右边界

if (pLen == 1)

return ((long) i) << 32 | i;

for (int j = i - 1; j >= i - _pMax; j--) { // 则至多回溯pLen长找左边界

char p = pattern.charAt(_pMax - i + j);

if (in.charAt(j) == p || p == '?') {

if (j == i - _pMax) // 找到左边界即j

return ((long) j) << 32 | i;

} else

break;

}

}

}

}

return -i;

}

}

// 若干测试

public static void main(String[] args) {

assertAndPrint(WildCardMatcher.matches("assbsavb", "a*b"), "assbsavb");

assertAndPrint(WildCardMatcher.matches("assbsavb", "a??b"), "assb");

assertAndPrint(WildCardMatcher.matches("assbsavb", "a?b"), "avb");

assertAndPrint(WildCardMatcher.matches("assbsavbz", "a??"), "ass", "avb");

assertAndPrint(WildCardMatcher.matches("assbsavbz", "?"));

assertAndPrint(WildCardMatcher.matches("assbsavbz", "??sb"), "assb");

assertAndPrint(WildCardMatcher.matches("assbsavbz", "b*s"), "bs");

assertAndPrint(WildCardMatcher.matches("assbsavbz", "?s"), "as", "bs");

assertAndPrint(WildCardMatcher.matches("assbsavbz", "s?"), "ss", "sa");

assertAndPrint(WildCardMatcher.matches("assbsavbz", "s?s"), "sbs");

assertAndPrint(WildCardMatcher.matches("assbsavbz", "s"));

assertAndPrint(WildCardMatcher.matches("assbsavbz", "z"));

assertAndPrint(WildCardMatcher.matches("assbsavbz", "Z"));

assertAndPrint(WildCardMatcher.matches("assbsavbz", "s?b?a"), "ssbsa");

assertAndPrint(WildCardMatcher.matches("assbsavbcsb", "a*sb"), "assbsavbcsb");

assertAndPrint(WildCardMatcher.matches("assbsavbcsb", "a*s*b"), "assbsavbcsb");

assertAndPrint(WildCardMatcher.matches("assbsavbcsb", "a*s?"), "assbsavbcsb");

assertAndPrint(WildCardMatcher.matches("assbsavbcsb", "a*a?b"), "assbsavb");

String file = readExternal();

int loop = 100;

int count = 0;

String[] pc = "???>".split("\\*+");

long t1 = System.currentTimeMillis();

for (int i = 0; i < loop; i++) {

count += WildCardMatcher.matches(file, pc).size();

}

long t2 = System.currentTimeMillis();

System.out.printf("wildcard matcher time=%d, count=%d %n", t2 - t1, count);

count = 0;

Pattern pa = Pattern.compile("()");

t1 = System.currentTimeMillis();

for (int i = 0; i < loop; i++) {

java.util.regex.Matcher ma = pa.matcher(file);

while (ma.find()) {

count += ma.groupCount();

}

}

t2 = System.currentTimeMillis();

System.out.printf("regex matcher time=%d, count=%d %n", t2 - t1, count);

}

同样也证明了,通配符(简单的模糊匹配)有着比正则更快的效率。

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值