Cod sursa(job #841396)

Utilizator claudiumihailClaudiu Mihail claudiumihail Data 24 decembrie 2012 05:40:48
Problema Aho-Corasick Scor 100
Compilator cpp Status done
Runda Arhiva educationala Marime 6.7 kb
#include <fstream>
#include <iostream>
#include <string>
#include <cstring>
#include <algorithm>
#include <vector>
#include <iterator>
 
using namespace std;
 
class AhoCorasick
{
public:
    AhoCorasick() :
        m_nodeRoot(new Node)
    {
        m_nodeRoot->SetParent(m_nodeRoot);
    }
     
    ~AhoCorasick()
    {
        if (m_nodeRoot != NULL)
        {
            delete m_nodeRoot;
        }
    }
     
    void AddPattern(const string& sPattern, int iIndex)
    {
        Node* current = m_nodeRoot;
         
        for (string::const_iterator it = sPattern.begin();
             it != sPattern.end();
             ++it)
        {            
            Node* successor = current->GetSuccessor(*it);
             
            if (successor == NULL)
            {
                current->SetSuccessor(*it);
                current->GetSuccessor(*it)->Symbol = *it;
                current->GetSuccessor(*it)->SetParent(current);
                current = current->GetSuccessor(*it);
            }
            else
            {
                current = successor;
            } 
        }
         
        current->TerminalIndexes.push_back(iIndex);
    }
     
    void PreprocessPatterns()
    {
        m_nodeRoot->SetNextPossibleMatch(m_nodeRoot);
        EnqueueSuccessors(m_nodeRoot);
         
        //cout << m_nodeRoot << endl;
 
        for (int i=0; i<m_vProcessedNodes.size(); ++i)
        {
            Node* nextPossibleMatchOfParent = m_nodeRoot->GetSuccessor(m_vProcessedNodes[i]->Symbol);
            m_vProcessedNodes[i]->SetNextPossibleMatch(m_nodeRoot);
            if (nextPossibleMatchOfParent != NULL && nextPossibleMatchOfParent != m_vProcessedNodes[i])
            {
                m_vProcessedNodes[i]->SetNextPossibleMatch(nextPossibleMatchOfParent);
            }
 
            nextPossibleMatchOfParent = m_vProcessedNodes[i]->GetParent()->GetNextPossibleMatch();
            do
            {
                Node* nextPossibleMatch = nextPossibleMatchOfParent->GetSuccessor(m_vProcessedNodes[i]->Symbol);
                 
                if (nextPossibleMatch != NULL && nextPossibleMatch != m_vProcessedNodes[i])
                {
                    m_vProcessedNodes[i]->SetNextPossibleMatch(nextPossibleMatch);
                    break;
                }
                 
                nextPossibleMatchOfParent = nextPossibleMatchOfParent->GetNextPossibleMatch();
            } while(nextPossibleMatchOfParent != m_nodeRoot);
 
            //cout << m_vProcessedNodes[i]->TerminalIndexes << "  " << m_vProcessedNodes[i]->Symbol << "    " << m_vProcessedNodes[i] << "     " << m_vProcessedNodes[i]->GetNextPossibleMatch() << endl;
             
            EnqueueSuccessors(m_vProcessedNodes[i]);
        }
         
        //cout << endl << endl;
         
        //cout << m_nodeRoot->GetSuccessor('a')->GetSuccessor('b')->GetNextPossibleMatch() << "   " << m_nodeRoot->GetSuccessor('b') << endl;
    }
     
    void Match(const string& sText) const
    {
        Node* current = m_nodeRoot;
         
        for (string::const_iterator it = sText.begin();
             it != sText.end();
             ++it)
        {
            while (current->GetSuccessor(*it) == NULL && current != m_nodeRoot)
            {
                current = current->GetNextPossibleMatch();
            }
             
            if (current->GetSuccessor(*it) != NULL)
            {
                current = current->GetSuccessor(*it);
                current->Matched++;
            }
        }
    }
     
    void AggregateMatches(vector<int>& vMatches)
    {
        for (int i=m_vProcessedNodes.size()-1; i>=0; --i)
        {
            //cout << m_vProcessedNodes[i] << endl;
 
            m_vProcessedNodes[i]->GetNextPossibleMatch()->Matched += m_vProcessedNodes[i]->Matched;
            
            for (vector<int>::const_iterator it=m_vProcessedNodes[i]->TerminalIndexes.begin();
                 it != m_vProcessedNodes[i]->TerminalIndexes.end();
                 ++it)
            {
                vMatches[*it] = m_vProcessedNodes[i]->Matched;
            }
        }
    }
 
private:
 
    struct Node;
 
    void EnqueueSuccessors(Node* localRoot)
    {
        for (char ch='a'; ch <= 'z'; ++ch)
        {
            Node* node = localRoot->GetSuccessor(ch);
            if (node != NULL)
            {
                m_vProcessedNodes.push_back(node);
            }
        }
    }
 
    struct Node
    {
        Node() :
            Symbol(NULL),
            Matched(0),
            m_nodeNextPossibleMatch(NULL),
            m_nodeParent(NULL)
        {
            memset(m_sSuccessors, 0, m_iMaxAlphabet*sizeof(Node*));
        }
         
        static const int m_iMaxAlphabet = 26;
         
        inline static int GetSymbolIndex(char ch)
        {
            return int(ch - 'a');
        }
         
        inline Node* GetSuccessor(char ch)
        {
            return m_sSuccessors[GetSymbolIndex(ch)];
        }
         
        inline void SetSuccessor(char ch)
        {
            m_sSuccessors[GetSymbolIndex(ch)] = new Node;
        }
         
        inline Node* GetParent()
        {
            return m_nodeParent;
        }
         
        inline void SetParent(Node* parent)
        {
            m_nodeParent = parent;
        }
         
        inline Node* GetNextPossibleMatch()
        {
            return m_nodeNextPossibleMatch;
        }
         
        inline void SetNextPossibleMatch(Node* node)
        {
            m_nodeNextPossibleMatch = node;
        }
 
        char Symbol;
        int Matched;
        vector<int> TerminalIndexes;
 
        Node* m_nodeNextPossibleMatch;
        Node* m_nodeParent;
        Node* m_sSuccessors[m_iMaxAlphabet];
    };
 
    Node* m_nodeRoot;
    vector<Node*> m_vProcessedNodes;
};
 
int main()
{
    int iPatterns = 0;
    string sText;
    AhoCorasick ahoCorasick;
    fstream fin("ahocorasick.in", fstream::in);
    fstream fout("ahocorasick.out", fstream::out);
     
    fin >> sText >> iPatterns;
    //cout << sText << endl << iPatterns << endl;
     
    vector<int> vMatched(iPatterns, 0);
     
    for (int i=0; i<iPatterns; ++i)
    {
        string sPattern;
        fin >> sPattern;
        
        ahoCorasick.AddPattern(sPattern, i); 
    }
     
    ahoCorasick.PreprocessPatterns();
     
    ahoCorasick.Match(sText);
     
    ahoCorasick.AggregateMatches(vMatched);
     
    ostream_iterator<int> itOut(fout, "\n");
    copy(vMatched.begin(), vMatched.end(), itOut);
     
    fin.close();
    fout.close();
    return 0;
}