Pagini recente » Cod sursa (job #2685309) | Cod sursa (job #2396188) | Cod sursa (job #1267298) | Cod sursa (job #1289782) | Cod sursa (job #841395)
Cod sursa(job #841395)
#include <fstream>
#include <iostream>
#include <string>
#include <cstring>
#include <algorithm>
#include <vector>
#include <iterator>
using namespace std;
class AhoCorasick
{
public:
AhoCorasick() :
m_nodeRoot(new Node)
{
m_nodeRoot->SetParent(m_nodeRoot);
}
~AhoCorasick()
{
if (m_nodeRoot != NULL)
{
delete m_nodeRoot;
}
}
void AddPattern(const string& sPattern, int iIndex)
{
Node* current = m_nodeRoot;
for (string::const_iterator it = sPattern.begin();
it != sPattern.end();
++it)
{
Node* successor = current->GetSuccessor(*it);
if (successor == NULL)
{
current->SetSuccessor(*it);
current->GetSuccessor(*it)->Symbol = *it;
current->GetSuccessor(*it)->SetParent(current);
current = current->GetSuccessor(*it);
}
else
{
current = successor;
}
}
current->TerminalIndex = iIndex;
}
void PreprocessPatterns()
{
m_nodeRoot->SetNextPossibleMatch(m_nodeRoot);
EnqueueSuccessors(m_nodeRoot);
//cout << m_nodeRoot << endl;
for (int i=0; i<m_vProcessedNodes.size(); ++i)
{
Node* nextPossibleMatchOfParent = m_nodeRoot->GetSuccessor(m_vProcessedNodes[i]->Symbol);
m_vProcessedNodes[i]->SetNextPossibleMatch(m_nodeRoot);
if (nextPossibleMatchOfParent != NULL && nextPossibleMatchOfParent != m_vProcessedNodes[i])
{
m_vProcessedNodes[i]->SetNextPossibleMatch(nextPossibleMatchOfParent);
}
nextPossibleMatchOfParent = m_vProcessedNodes[i]->GetParent()->GetNextPossibleMatch();
do
{
Node* nextPossibleMatch = nextPossibleMatchOfParent->GetSuccessor(m_vProcessedNodes[i]->Symbol);
if (nextPossibleMatch != NULL && nextPossibleMatch != m_vProcessedNodes[i])
{
m_vProcessedNodes[i]->SetNextPossibleMatch(nextPossibleMatch);
break;
}
nextPossibleMatchOfParent = nextPossibleMatchOfParent->GetNextPossibleMatch();
} while(nextPossibleMatchOfParent != m_nodeRoot);
//cout << m_vProcessedNodes[i]->TerminalIndex << " " << m_vProcessedNodes[i]->Symbol << " " << m_vProcessedNodes[i] << " " << m_vProcessedNodes[i]->GetNextPossibleMatch() << endl;
EnqueueSuccessors(m_vProcessedNodes[i]);
}
//cout << endl << endl;
//cout << m_nodeRoot->GetSuccessor('a')->GetSuccessor('b')->GetNextPossibleMatch() << " " << m_nodeRoot->GetSuccessor('b') << endl;
}
void Match(const string& sText) const
{
Node* current = m_nodeRoot;
for (string::const_iterator it = sText.begin();
it != sText.end();
++it)
{
while (current->GetSuccessor(*it) == NULL && current != m_nodeRoot)
{
current = current->GetNextPossibleMatch();
}
if (current->GetSuccessor(*it) != NULL)
{
current = current->GetSuccessor(*it);
current->Matched++;
}
}
}
void AggregateMatches(vector<int>& vMatches)
{
for (int i=m_vProcessedNodes.size()-1; i>=0; --i)
{
//cout << m_vProcessedNodes[i] << endl;
m_vProcessedNodes[i]->GetNextPossibleMatch()->Matched += m_vProcessedNodes[i]->Matched;
if (m_vProcessedNodes[i]->TerminalIndex > 0)
{
vMatches[m_vProcessedNodes[i]->TerminalIndex-1] = m_vProcessedNodes[i]->Matched;
}
}
}
private:
struct Node;
void EnqueueSuccessors(Node* localRoot)
{
for (char ch='a'; ch <= 'z'; ++ch)
{
Node* node = localRoot->GetSuccessor(ch);
if (node != NULL)
{
m_vProcessedNodes.push_back(node);
}
}
}
struct Node
{
Node() :
Symbol(NULL),
Matched(0),
TerminalIndex(0),
m_nodeNextPossibleMatch(NULL),
m_nodeParent(NULL)
{
memset(m_sSuccessors, 0, m_iMaxAlphabet*sizeof(Node*));
}
static const int m_iMaxAlphabet = 26;
inline static int GetSymbolIndex(char ch)
{
return int(ch - 'a');
}
inline Node* GetSuccessor(char ch)
{
return m_sSuccessors[GetSymbolIndex(ch)];
}
inline void SetSuccessor(char ch)
{
m_sSuccessors[GetSymbolIndex(ch)] = new Node;
}
inline Node* GetParent()
{
return m_nodeParent;
}
inline void SetParent(Node* parent)
{
m_nodeParent = parent;
}
inline Node* GetNextPossibleMatch()
{
return m_nodeNextPossibleMatch;
}
inline void SetNextPossibleMatch(Node* node)
{
m_nodeNextPossibleMatch = node;
}
char Symbol;
int Matched;
int TerminalIndex;
Node* m_nodeNextPossibleMatch;
Node* m_nodeParent;
Node* m_sSuccessors[m_iMaxAlphabet];
};
Node* m_nodeRoot;
vector<Node*> m_vProcessedNodes;
};
int main()
{
int iPatterns = 0;
string sText;
AhoCorasick ahoCorasick;
fstream fin("ahocorasick.in", fstream::in);
fstream fout("ahocorasick.out", fstream::out);
fin >> sText >> iPatterns;
//cout << sText << endl << iPatterns << endl;
vector<int> vMatched(iPatterns, 0);
for (int i=1; i<=iPatterns; ++i)
{
string sPattern;
fin >> sPattern;
//cout << sPattern << endl;
ahoCorasick.AddPattern(sPattern, i);
}
ahoCorasick.PreprocessPatterns();
ahoCorasick.Match(sText);
ahoCorasick.AggregateMatches(vMatched);
ostream_iterator<int> itOut(fout, "\n");
copy(vMatched.begin(), vMatched.end(), itOut);
fin.close();
fout.close();
return 0;
}