Cod sursa(job #2449395)

Utilizator AndreiStanescuAlloys Nokito AndreiStanescu Data 19 august 2019 15:54:06
Problema Aho-Corasick Scor 30
Compilator cpp-64 Status done
Runda Arhiva educationala Marime 3.17 kb
#include<iostream>
#include<fstream>
#include<queue>
#include<string>
#include<cstring>
#define maxs 100000
#define maxc 26
#define nmax 100
#define dim 10000
using namespace std;

int n,g[maxs][maxc],f[maxs],out[nmax],freq[nmax];
string arr[nmax],text;

void read()
{
    ifstream fin("ahocorasick.in");
    fin>>text;
    fin>>n;
    for(int i=0;i<n;i++)
        fin>>arr[i];
    fin.close();

}

//Construire Trie, reprezentata prin matrice g, are O(n)
void build()
{
    memset(g,-1,sizeof(g));
    int index=1,i,j,ch;
    for(i=0;i<n;i++)
    {
        int curr_state=0;
        for(j=0;j<arr[i].size();j++)
        {
            ch=arr[i][j]-'a';
            if(g[curr_state][ch]==-1)
                g[curr_state][ch]=index++;
            curr_state=g[curr_state][ch];
        }
        out[curr_state]|=(1<<i);
        //cout<<out[curr_state]<<' ';
        //cout<<arr[i]<<' ';
    }
    for (int ch = 0; ch < maxc; ++ch)
        if (g[0][ch] == -1)
            g[0][ch] = 0;

}

//construirea lui failure are complexitate liniara prin bfs O(n)
void suffix_link()
{
    queue<int> q;
    memset(f,-1,sizeof(f));
    int ch,x,failure;
    //adaug toate literele existente la radacina
    for(ch=0;ch<maxc;ch++)
        if(g[0][ch])
    {
        f[g[0][ch]]=0;
        //cout<<g[0][ch]<<' ';
        q.push(g[0][ch]);
    }
    //corectitudinea este asigurata de bfs()
    while(!q.empty())
    {
        x=q.front();
        //cout<<x<<' ';
        q.pop();
        for(ch=0;ch<maxc;ch++)
            if(g[x][ch]!=-1)
        {
            failure=f[x];
            //cout<<failure<<' ';
            while(g[failure][ch]==-1 )
                failure=f[failure];
            //Prin constructie se asigura maximalitatea
            failure=g[failure][ch];
            //out<<g[x][ch]<<' '<<failure<<' ';
            f[g[x][ch]]=failure;
            //verificam daca e cumva si cuvant(eventual i se ataseaza si output link)
            out[g[x][ch]]|=out[failure];
            //cout<<out[g[x][ch]]<<' ';
            q.push(g[x][ch]);
        }

    }
    //Folosirea de suffix links reduce complexitatea lui AC de la O(m*Lmax) la O(m)

}

int next_state(int curr_state, int ch)
{
    int ans=curr_state;
    while(g[ans][ch]==-1)
        ans=f[ans];
    //if(ans==0 && g[ans][ch]==-1) return 0;
    return g[ans][ch];
}

void AC()
{
    int curr_state=0,i,j,ch;
    for(i=0;i<text.size();i++)
    {
        //cout<<text[i]<<' ';
        ch=text[i]-'a';
        curr_state=next_state(curr_state,ch);
        //cout<<curr_state<<' ';
        if(!out[curr_state])
            continue;
        for(j=0;j<n;j++)
            if(out[curr_state]&(1<<j) )
              //cout << "Word " << arr[j] << " appears from "<< i - arr[j].size() + 1 << " to " << i << endl;
            freq[j]++;
        //O(m+z)

    }
    ofstream fout("ahocorasick.out");
    for(i=0;i<n;i++)
        fout<<freq[i]<<'\n';
    fout.close();
}

int main()
{
    read();
    //cout<<text;
    build();
    //cout<<g[4][0];
    suffix_link();
    //cout<<out[2]&(1<<5);
    //cout<<f[2];
    AC();
    //Complexitate finala O(m+n+z)
}