Boyer Moore Algorithm, indexOf, strstr

* BoyerMoore.java

public class BoyerMoore {

    public static int indexOf(byte[] haystack, int index, int length, byte[] needle,
                              int[] offsetTable, int[] charTable) {
        int end = index + length;
        int i = needle.length - 1 + index, j;
        while (i < end) {
            for (j = needle.length - 1; needle[j] == haystack[i]; --i, --j) {
                if (j == 0) {
                    return i;
                }
            }
            /* i += needle.length - j; */  /* For naive method */
            i += Math.max(offsetTable[needle.length - 1 - j], charTable[haystack[i]]);
        }
        return -1;
    }

    /**
     * Makes the jump table based on the mismatched character information.
     */
    private static int[] makeCharTable(byte[] needle) {
        final int ALPHABET_SIZE = 256;
        int[] table = new int[ALPHABET_SIZE];
        int i;
        for (i = 0; i < table.length; ++i) {
            table[i] = needle.length;
        }
        for (i = 0; i < needle.length - 1; ++i) {
            table[needle[i]] = needle.length - 1 - i;
        }
        return table;
    }

    /**
     * Makes the jump table based on the scan offset which mismatch occurs.
     */
    private static int[] makeOffsetTable(byte[] needle) {
        int[] table = new int[needle.length];
        int lastPrefixPosition = needle.length;
        for (int i = needle.length - 1; i >= 0; --i) {
            if (isPrefix(needle, i + 1)) {
                lastPrefixPosition = i + 1;
            }
            table[needle.length - 1 - i] = lastPrefixPosition - i + needle.length - 1;
        }
        for (int i = 0; i < needle.length - 1; ++i) {
            int slen = suffixLength(needle, i);
            table[slen] = needle.length - 1 - i + slen;
        }
        return table;
    }

    /**
     * Is needle[p:end] a prefix of needle?
     */
    private static boolean isPrefix(byte[] needle, int p) {
        for (int i = p, j = 0; i < needle.length; ++i, ++j) {
            if (needle[i] != needle[j]) {
                return false;
            }
        }
        return true;
    }

    /**
     * Returns the maximum length of the substring ends at p and is a suffix.
     */
    private static int suffixLength(byte[] needle, int p) {
        int len = 0;
        for (int i = p, j = needle.length - 1;
             i >= 0 && needle[i] == needle[j]; --i, --j) {
            len += 1;
        }
        return len;
    }

    public static void main(String[] args) {
        String s = """
                CONNECT presearch.com:443 HTTP/1.1\r
                Host: presearch.com:443\r
                Proxy-Connection: keep-alive\r
                User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36\r
                """;
        byte[] needle = {0x0d, 0x0a};
        byte[] buf = new byte[s.length()+1];
        int i;
        for (i = 0; i < s.length(); i++) {
            buf[i] = (byte)(s.charAt(i)&0x000000ff);
        }
        int[] offsetTable = makeOffsetTable(needle);
        int[] charTable = makeCharTable(needle);
        int idx1 = 0, idx2;
        int lineNum = 0;

        while (idx1 < s.length()) {
            idx2 = indexOf(buf, idx1, s.length(), needle, offsetTable, charTable);
            System.out.printf("LINE#%d [%s]\n", ++lineNum, s.substring(idx1, idx2));
            idx1 = idx2 + needle.length;
        }
    }
}

C:\Users\admin\.jdks\corretto-18.0.2-1\bin\java.exe "-javaagent:E:\Program Files\JetBrains\IntelliJ IDEA 2022.2.3\lib\idea_rt.jar=55089:E:\Program Files\JetBrains\IntelliJ IDEA 2022.2.3\bin" -Dfile.encoding=UTF-8 -Dsun.stdout.encoding=UTF-8 -Dsun.stderr.encoding=UTF-8 -classpath E:\usr\source\BoyerMoore\out\production\BoyerMoore BoyerMoore
LINE#1 [CONNECT presearch.com:443 HTTP/1.1]
LINE#2 [Host: presearch.com:443]
LINE#3 [Proxy-Connection: keep-alive]
LINE#4 [User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36]

Process finished with exit code 0

java 3引号 自动在行尾添加0x0a  ('\n')

KMP search pattern

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

fareast_mzh

打赏个金币

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值