public class KMP {

    public static void main(String[] args) {
        String s = "aabaabcabaabcac";
        String p = "abaabcac";

        s = "adsdfsfsdfsdf";
        p = "sdfsfsdfsdf";

        s = "";
        p = "";
        System.out.println(kmpMatch(s, p));
    }

    public static int kmpMatch(String str, String pattern) {
        if (pattern == null || pattern.length() == 0) {
            return 0;
        }

        if (str == null || str.length() == 0) {
            return -1;
        }

        char[] strArr = str.toCharArray();
        char[] patternArr = pattern.toCharArray();

        int[] next = getNextArray(patternArr);

        // Pointer of the given str
        int strPtr = 0;

        // Pointer of the given pattern
        int patternPtr = 0;

        while (strPtr < strArr.length && patternPtr < patternArr.length) {
            // next[0] = -1, so when patternPtr = -1, pattern differ with str at index 0
            // so move both strPtr and patternPtr.
            if (patternPtr == -1) {
                strPtr++;

                // patternPtr becomes 0
                patternPtr++;
            }
            // Given pattern matches the given str at the head with patternPtr chars, which means
            // strArr[strPtr-patternPtr:strPtr] == pattern[0:patterPtr], carry on the comparison.
            else if (strArr[strPtr] == patternArr[patternPtr]) {
                strPtr++;

                patternPtr++;
            }
            // patternPtr != -1 && strArr[strPtr] != patternArr[patternPtr], pattern differ with str at certain position,
            // but not the first char. Do not move forward the both pointers. Adjust the patternPtr to start a
            // new comparison.
            else {
                // if pattern differ with str at the first char, patternPtr points to -1
                patternPtr = next[patternPtr];
            }
        }

        // all matched
        if (patternPtr == patternArr.length) {
            return strPtr - patternPtr;
        }

        return -1;
    }

    public static int[] getNextArray(char[] pattern) {
        int[] next = new int[pattern.length];

        next[0] = -1;

        int matchedCount;

        for (int j = 1; j < pattern.length; j++) {

            // suppose matchedCount = -1 or pattern[j-1] != pattern[matchedCount], next[j] should have a default value.
            next[j] = 0;

            // previously matched count
            matchedCount = next[j - 1];
            while (matchedCount != -1) {
                // Standing at index j, if previous char pattern[j-1] matches the (matchedCount)th char, current next[j] increases
                // And up till index j-1, next[j] chars matches

                // matched chars pattern[0:matchedCount-1]
                if (pattern[j - 1] == pattern[matchedCount]) {
                    next[j] = matchedCount + 1;

                    break;
                }

                // If not match, technically, we need to start over from the head, but we can take advantage of the previously
                // computed offset information.

                // !!! Important !!! You can treat substring pattern[j-matchedCount:] as the string to be matched by 'pattern',
                // it will be much easier for you to understand.

                // Therefore, set matchedCount to the offset that pattern failed to match substring pattern[j-matchedCount:]
                // instead of a fresh start.

                // Take following pattern string as example, 9th char is 'd' which differs from 5th char 'f', but we don't
                // have to search from 6th char, we can take advantage of next[4]=1, then compare 1th char 'd' with 9th char 'd'.

                // [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] j
                // [s, d, f, s, f, s, d, f, s, d, f] pattern
                // [-1,0, 0, 0, 1, 0, 1, 2, 3, 4, 2] next
                matchedCount = next[matchedCount];
            }
        }

        return next;
    }
}

// Example 1.

// pattern string "abaabcac"

// [0, 1, 2, 3, 4, 5, 6, 7] counter j
// [a, b, a, a, b, c, a, c] pattern string
// [-1,0, 0, 1, 1, 2, 0, 1] step size

// if j = 0, next[j] = next[0]=-1, which means previous matched chars length before 0 is -1.
// actually there is no chars before index 0

// if j = 1, next[j] = next[1]=0, which means previous matched consecutive substring length before 1 is 0.
// it can be easily understood that you cannot match pattern[0] with pattern[0], they are the same thing.

// if j = 2, next[j] = next[2] = 0, which means previous matched consecutive substring length before 2 is 0.
// (pattern[1] = 'b') != (pattern[0] = 'a')

// if j = 3, next[j] = next[3] = 1, which means previous matched consecutive substring length before 3 is 1.
// (pattern[2] = 'a') == (pattern[0] = 'a')

// if j = 4, next[j] = next[4] = 1, which means previous matched consecutive substring length before 4 is 1.
// (pattern[2:3] = 'aa') != (pattern[0:1] = 'ab'), but (pattern[3] = 'a') == (pattern[0] = 'a')

// if j = 5, next[j] = next[5] = 2, which means previous matched consecutive substring length before 5 is 2.
// (pattern[3:4] = 'ab') == (pattern[0:1] = 'ab')

// if j = 6, next[j] = next[6] = 0, which means previous matched consecutive substring length before 6 is 0.
// (pattern[3:5] = 'abc') == (pattern[0:2] = 'aba'), and (pattern[5] = 'c') != (pattern[0] = 'a')

// if j = 7, next[j] = next[7] = 1, which means previous matched consecutive substring length before 7 is 1.
// (pattern[6] = 'a') == (pattern[0] = 'a')

// Example 2.

// pattern string "sdfsfsdfsdf";

// next[j-1] means how many chars previous substring matches the pattern string from the head

// [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
// [s, d, f, s, f, s, d, f, s, d, f]
// [-1,0, 0, 0, 1, 0, 1, 2, 3, 4, 2]

// if j = 0, next[j] = next[0]=-1, which means previous matched chars length before 0 is -1.
// actually there is no chars before index 0

// if j = 1, next[j] = next[1]=0, which means previous matched consecutive substring length before 1 is 0.
// it can be easily understood that you cannot match pattern[0] with pattern[0], they are the same thing.

// if j = 2, next[j] = next[2] = 0, which means previous matched consecutive substring length before 2 is 0.
// (pattern[1] = 'd') != (pattern[0] = 's')

// if j = 4, next[j] = next[4] = 1, which means previous matched consecutive substring length before 4 is 1.
// (pattern[3] = 's') != (pattern[0] = 's')

// if j = 7, next[j] = next[7] = 2, which means previous matched consecutive substring length before 7 is 2.
// (pattern[5:6] = 'sd') != (pattern[0:1] = 'sd')

// if j = 10, next[j] = next[10] = 2, which means previous matched consecutive substring length before 10 is 2.
// (pattern[8:9] = 'sd') == (pattern[0:1] = 'sd')
Copyright © iovi.com 2017 all right reserved,powered by GitbookLast Modification: 2020-03-22 20:17:05

results matching ""

    No results matching ""