public class KMP {
public static void main(String[] args) {
String s = "aabaabcabaabcac";
String p = "abaabcac";
s = "adsdfsfsdfsdf";
p = "sdfsfsdfsdf";
s = "";
p = "";
System.out.println(kmpMatch(s, p));
}
public static int kmpMatch(String str, String pattern) {
if (pattern == null || pattern.length() == 0) {
return 0;
}
if (str == null || str.length() == 0) {
return -1;
}
char[] strArr = str.toCharArray();
char[] patternArr = pattern.toCharArray();
int[] next = getNextArray(patternArr);
// Pointer of the given str
int strPtr = 0;
// Pointer of the given pattern
int patternPtr = 0;
while (strPtr < strArr.length && patternPtr < patternArr.length) {
// next[0] = -1, so when patternPtr = -1, pattern differ with str at index 0
// so move both strPtr and patternPtr.
if (patternPtr == -1) {
strPtr++;
// patternPtr becomes 0
patternPtr++;
}
// Given pattern matches the given str at the head with patternPtr chars, which means
// strArr[strPtr-patternPtr:strPtr] == pattern[0:patterPtr], carry on the comparison.
else if (strArr[strPtr] == patternArr[patternPtr]) {
strPtr++;
patternPtr++;
}
// patternPtr != -1 && strArr[strPtr] != patternArr[patternPtr], pattern differ with str at certain position,
// but not the first char. Do not move forward the both pointers. Adjust the patternPtr to start a
// new comparison.
else {
// if pattern differ with str at the first char, patternPtr points to -1
patternPtr = next[patternPtr];
}
}
// all matched
if (patternPtr == patternArr.length) {
return strPtr - patternPtr;
}
return -1;
}
public static int[] getNextArray(char[] pattern) {
int[] next = new int[pattern.length];
next[0] = -1;
int matchedCount;
for (int j = 1; j < pattern.length; j++) {
// suppose matchedCount = -1 or pattern[j-1] != pattern[matchedCount], next[j] should have a default value.
next[j] = 0;
// previously matched count
matchedCount = next[j - 1];
while (matchedCount != -1) {
// Standing at index j, if previous char pattern[j-1] matches the (matchedCount)th char, current next[j] increases
// And up till index j-1, next[j] chars matches
// matched chars pattern[0:matchedCount-1]
if (pattern[j - 1] == pattern[matchedCount]) {
next[j] = matchedCount + 1;
break;
}
// If not match, technically, we need to start over from the head, but we can take advantage of the previously
// computed offset information.
// !!! Important !!! You can treat substring pattern[j-matchedCount:] as the string to be matched by 'pattern',
// it will be much easier for you to understand.
// Therefore, set matchedCount to the offset that pattern failed to match substring pattern[j-matchedCount:]
// instead of a fresh start.
// Take following pattern string as example, 9th char is 'd' which differs from 5th char 'f', but we don't
// have to search from 6th char, we can take advantage of next[4]=1, then compare 1th char 'd' with 9th char 'd'.
// [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] j
// [s, d, f, s, f, s, d, f, s, d, f] pattern
// [-1,0, 0, 0, 1, 0, 1, 2, 3, 4, 2] next
matchedCount = next[matchedCount];
}
}
return next;
}
}
// Example 1.
// pattern string "abaabcac"
// [0, 1, 2, 3, 4, 5, 6, 7] counter j
// [a, b, a, a, b, c, a, c] pattern string
// [-1,0, 0, 1, 1, 2, 0, 1] step size
// if j = 0, next[j] = next[0]=-1, which means previous matched chars length before 0 is -1.
// actually there is no chars before index 0
// if j = 1, next[j] = next[1]=0, which means previous matched consecutive substring length before 1 is 0.
// it can be easily understood that you cannot match pattern[0] with pattern[0], they are the same thing.
// if j = 2, next[j] = next[2] = 0, which means previous matched consecutive substring length before 2 is 0.
// (pattern[1] = 'b') != (pattern[0] = 'a')
// if j = 3, next[j] = next[3] = 1, which means previous matched consecutive substring length before 3 is 1.
// (pattern[2] = 'a') == (pattern[0] = 'a')
// if j = 4, next[j] = next[4] = 1, which means previous matched consecutive substring length before 4 is 1.
// (pattern[2:3] = 'aa') != (pattern[0:1] = 'ab'), but (pattern[3] = 'a') == (pattern[0] = 'a')
// if j = 5, next[j] = next[5] = 2, which means previous matched consecutive substring length before 5 is 2.
// (pattern[3:4] = 'ab') == (pattern[0:1] = 'ab')
// if j = 6, next[j] = next[6] = 0, which means previous matched consecutive substring length before 6 is 0.
// (pattern[3:5] = 'abc') == (pattern[0:2] = 'aba'), and (pattern[5] = 'c') != (pattern[0] = 'a')
// if j = 7, next[j] = next[7] = 1, which means previous matched consecutive substring length before 7 is 1.
// (pattern[6] = 'a') == (pattern[0] = 'a')
// Example 2.
// pattern string "sdfsfsdfsdf";
// next[j-1] means how many chars previous substring matches the pattern string from the head
// [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
// [s, d, f, s, f, s, d, f, s, d, f]
// [-1,0, 0, 0, 1, 0, 1, 2, 3, 4, 2]
// if j = 0, next[j] = next[0]=-1, which means previous matched chars length before 0 is -1.
// actually there is no chars before index 0
// if j = 1, next[j] = next[1]=0, which means previous matched consecutive substring length before 1 is 0.
// it can be easily understood that you cannot match pattern[0] with pattern[0], they are the same thing.
// if j = 2, next[j] = next[2] = 0, which means previous matched consecutive substring length before 2 is 0.
// (pattern[1] = 'd') != (pattern[0] = 's')
// if j = 4, next[j] = next[4] = 1, which means previous matched consecutive substring length before 4 is 1.
// (pattern[3] = 's') != (pattern[0] = 's')
// if j = 7, next[j] = next[7] = 2, which means previous matched consecutive substring length before 7 is 2.
// (pattern[5:6] = 'sd') != (pattern[0:1] = 'sd')
// if j = 10, next[j] = next[10] = 2, which means previous matched consecutive substring length before 10 is 2.
// (pattern[8:9] = 'sd') == (pattern[0:1] = 'sd')