Cod sursa(job #841394)

Utilizator claudiumihailClaudiu Mihail claudiumihail Data 24 decembrie 2012 04:34:04
Problema Aho-Corasick Scor 15
Compilator cpp Status done
Runda Arhiva educationala Marime 6.36 kb
#include <fstream>
#include <iostream>
#include <string>
#include <cstring>
#include <algorithm>
#include <vector>
#include <iterator>

using namespace std;

class AhoCorasick
{
public:
    AhoCorasick() :
        m_nodeRoot(new Node)
    {
        m_nodeRoot->SetParent(m_nodeRoot);
    }
    
    ~AhoCorasick()
    {
        if (m_nodeRoot != NULL)
        {
            delete m_nodeRoot;
        }
    }
    
    void AddPattern(const string& sPattern, int iIndex)
    {
        Node* current = m_nodeRoot;
        
        for (string::const_iterator it = sPattern.begin();
             it != sPattern.end();
             ++it)
        {            
            Node* successor = current->GetSuccessor(*it);
            
            if (successor == NULL)
            {
                current->SetSuccessor(*it);
                current->GetSuccessor(*it)->Symbol = *it;
                current->GetSuccessor(*it)->SetParent(current);
                current = current->GetSuccessor(*it);
            }
            else
            {
                current = successor;
            } 
        }
        
        current->TerminalIndex = iIndex;
    }
    
    void PreprocessPatterns()
    {
        m_nodeRoot->SetNextPossibleMatch(m_nodeRoot);
        EnqueueSuccessors(m_nodeRoot);
        
        //cout << m_nodeRoot << endl;

        for (int i=0; i<m_vProcessedNodes.size(); ++i)
        {
            m_vProcessedNodes[i]->SetNextPossibleMatch(m_nodeRoot);
            Node* nextPossibleMatchOfParent = m_vProcessedNodes[i]->GetParent()->GetNextPossibleMatch();
            do
            {
                Node*  nextPossibleMatch = nextPossibleMatchOfParent->GetSuccessor(m_vProcessedNodes[i]->Symbol);
                
                if (nextPossibleMatch != NULL && nextPossibleMatch != m_vProcessedNodes[i])
                {
                    m_vProcessedNodes[i]->SetNextPossibleMatch(nextPossibleMatch);
                    break;
                }
                else
                {
                    m_vProcessedNodes[i]->SetNextPossibleMatch(nextPossibleMatchOfParent);
                }
                
                nextPossibleMatchOfParent = nextPossibleMatchOfParent->GetParent()->GetNextPossibleMatch();
            } while(nextPossibleMatchOfParent != m_nodeRoot);
            
            //cout << m_vProcessedNodes[i]->TerminalIndex << "  " << m_vProcessedNodes[i]->Symbol << "    " << m_vProcessedNodes[i] << "     " << m_vProcessedNodes[i]->GetNextPossibleMatch() << endl;
            
            EnqueueSuccessors(m_vProcessedNodes[i]);
        }
        
        //cout << m_nodeRoot->GetSuccessor('a')->GetSuccessor('b')->GetNextPossibleMatch() << "   " << m_nodeRoot->GetSuccessor('b') << endl;
    }
    
    void Match(const string& sText) const
    {
        Node* current = m_nodeRoot;
        
        for (string::const_iterator it = sText.begin();
             it != sText.end();
             ++it)
        {
            while (current->GetSuccessor(*it) == NULL && current != m_nodeRoot)
            {
                current = current->GetNextPossibleMatch();
            }
            
            if (current->GetSuccessor(*it) != NULL)
            {
                current = current->GetSuccessor(*it);
                current->Matched++;
            }
        }
    }
    
    void AggregateMatches(vector<int>& vMatches)
    {
        for (int i=m_vProcessedNodes.size()-1; i>=0; --i)
        {
            m_vProcessedNodes[i]->GetNextPossibleMatch()->Matched += m_vProcessedNodes[i]->Matched;
            if (m_vProcessedNodes[i]->TerminalIndex > 0)
            {
                vMatches[m_vProcessedNodes[i]->TerminalIndex-1] = m_vProcessedNodes[i]->Matched;
            }
        }
    }

private:

    struct Node;

    void EnqueueSuccessors(Node* localRoot)
    {
        for (char ch='a'; ch <= 'z'; ++ch)
        {
            Node* node = localRoot->GetSuccessor(ch);
            if (node != NULL)
            {
                m_vProcessedNodes.push_back(node);
            }
        }
    }

    struct Node
    {
        Node() :
            Symbol(NULL),
            Matched(0),
            TerminalIndex(0),
            m_nodeNextPossibleMatch(NULL),
            m_nodeParent(NULL)
        {
            memset(m_sSuccessors, 0, m_iMaxAlphabet*sizeof(Node*));
        }
        
        static const int m_iMaxAlphabet = 26;
        
        inline static int GetSymbolIndex(char ch)
        {
            return int(ch - 'a');
        }
        
        inline Node* GetSuccessor(char ch)
        {
            return m_sSuccessors[GetSymbolIndex(ch)];
        }
        
        inline void SetSuccessor(char ch)
        {
            m_sSuccessors[GetSymbolIndex(ch)] = new Node;
        }
        
        inline Node* GetParent()
        {
            return m_nodeParent;
        }
        
        inline void SetParent(Node* parent)
        {
            m_nodeParent = parent;
        }
        
        inline Node* GetNextPossibleMatch()
        {
            return m_nodeNextPossibleMatch;
        }
        
        inline void SetNextPossibleMatch(Node* node)
        {
            m_nodeNextPossibleMatch = node;
        }

        char Symbol;
        int Matched;
        int TerminalIndex;

        Node* m_nodeNextPossibleMatch;
        Node* m_nodeParent;
        Node* m_sSuccessors[m_iMaxAlphabet];
    };

    Node* m_nodeRoot;
    vector<Node*> m_vProcessedNodes;
};

int main()
{
    int iPatterns = 0;
    string sText;
    AhoCorasick ahoCorasick;
    fstream fin("ahocorasick.in", fstream::in);
    fstream fout("ahocorasick.out", fstream::out);
    
    fin >> sText >> iPatterns;
    //cout << sText << endl << iPatterns << endl;
    
    vector<int> vMatched(iPatterns, 0);
    
    for (int i=1; i<=iPatterns; ++i)
    {
        string sPattern;
        fin >> sPattern;
        
        //cout << sPattern << endl;
        
        ahoCorasick.AddPattern(sPattern, i);
    }
    
    ahoCorasick.PreprocessPatterns();
    
    ahoCorasick.Match(sText);
    
    ahoCorasick.AggregateMatches(vMatched);
    
    ostream_iterator<int> itOut(fout, "\n");
    copy(vMatched.begin(), vMatched.end(), itOut);
    
    fin.close();
    fout.close();
    return 0;
}