Tuesday, November 18, 2014

kmp string match

from CLRS(Introduction to Algorithm)

For a pattern P, pai[q] is the length of the longest prefix of P that is a proper suffix of Pq. (Pq is P[1...q], note that here the index start from 1, which is different from the code below)

vector<int> computepai(string Pat){
    int n=Pat.length();
    vector<int> pai(n,0);
    pai[0]=0;

    for(int i=1;i<n;i++){
        int k=pai[i-1];//number of characters matched
        while(k>0&&Pat[k]!=Pat[i])
            k=pai[k-1];
        k+=Pat[k]==Pat[i];

        pai[i]=k;
    }
    return pai;
}

下面这个computepai是算法导论上的版本,和上面的几乎一样(不同的只是k的位置),但是上面的似乎更容易理解和记忆些。
vector<int> computepai(string Pat){
    int n=Pat.length();
    vector<int> pai(n,0);
    pai[0]=0;
    int k=0;//number of characters matched
    for(int i=1;i<n;i++){
        while(k>0&&Pat[k]!=Pat[i])
            k=pai[k-1];
        if(Pat[k]==Pat[i])
            k++;
        pai[i]=k;
    }
    return pai;
}

void matchstr(string text,string Pat){
    int n=text.length();
    int m=Pat.length();
    vector<int> pai= computepai(Pat);
    int k=0;//number of characters matched
    for(int i=0;i<n;i++){//scan the text from left to right
        while(k>0&&Pat[k]!=text[i])
            k=pai[k-1];   //next character of P doesn't match text[i]
        if(Pat[k]==text[i])
            k++;       //next character matches
        if(k==m){  // is all of P matched
            cout<<"find in pos : "<<i-m+1<<"  "<<text.substr(i-m+1,m)<<endl;
            k=pai[k-1];
        }
    }
}

No comments:

Post a Comment