Pagini recente » Cod sursa (job #1778265) | Cod sursa (job #2749731) | Cod sursa (job #1959536) | Cod sursa (job #194099) | Cod sursa (job #663868)
Cod sursa(job #663868)
#include <fstream>
#include <iostream>
#include <cstring>
#define MAX_TEXT_SIZE 1000001
#define MAX_QUEUE_SIZE 1001001
#define MAX_WORD_SIZE 10000
#define MAX_WORDS 101
#define MAX_CHILDREN 26
using namespace std;
typedef unsigned int uint32;
typedef unsigned short uint16;
template<typename MaxNumWordsType>
struct Trie
{
Trie() :
pFail(NULL),
numWordsEndingHere(0),
ID(0)
{
memset(vChild, NULL, MAX_CHILDREN*sizeof(Trie<MaxNumWordsType>*));
}
Trie<MaxNumWordsType>* pFail;
MaxNumWordsType numWordsEndingHere;
MaxNumWordsType ID;
Trie<MaxNumWordsType>* vChild[MAX_CHILDREN];
};
template<uint32 uSize>
class AhoCorasickDictionary
{
public:
AhoCorasickDictionary() :
uQL(0),
uQR(0)
{
trieRoot.pFail = &trieRoot;
Q = new (std::nothrow) Trie<uint16>*[uSize];
}
inline void insert(const char* szWord, const uint16 uID)
{
uint16 index = 0;
Trie<uint16>* ptrTrie = &trieRoot;
while (szWord[index] != 0)
{
if (ptrTrie->vChild[get_child_index(szWord[index])] == NULL)
{
ptrTrie->vChild[get_child_index(szWord[index])] = new (std::nothrow) Trie<uint16>;
}
ptrTrie = ptrTrie->vChild[get_child_index(szWord[index])];
if (szWord[index+1] == 0)
{
ptrTrie->ID = uID;
}
index++;
}
}
inline void AhoCorasick(const char szText[], uint16 wordFreq[])
{
SetFailLinks();
Trie<uint16>* ptrNode = &trieRoot;
for (uint32 i=0; i<strlen(szText); ++i)
{
const int index = get_child_index(szText[i]);
while (ptrNode != &trieRoot && ptrNode->vChild[index] == NULL)
{
ptrNode = ptrNode->pFail;
}
if (ptrNode->vChild[index] != NULL)
{
ptrNode = ptrNode->vChild[index];
}
ptrNode->numWordsEndingHere++;
}
ComputeFreq(wordFreq);
}
private:
inline void SetFailLinks()
{
push(&trieRoot);
while (!empty())
{
Trie<uint16>* ptrNode = pop();
for (uint32 i=0; i<26; ++i)
{
if (ptrNode->vChild[i] != NULL)
{
Trie<uint16>* ptrFail = ptrNode->pFail;
while (ptrFail != &trieRoot && ptrFail->vChild[i] == NULL)
{
ptrFail = ptrFail->pFail;
}
if (ptrFail->vChild[i] != NULL && ptrFail->vChild[i] != ptrNode->vChild[i])
{
ptrFail = ptrFail->vChild[i];
}
ptrNode->vChild[i]->pFail = ptrFail;
push(ptrNode->vChild[i]);
}
}
}
trieRoot.pFail = NULL;
//cout << trieRoot.vChild[2]->vChild[0]->vChild[0]->ID << " " << trieRoot.vChild[0] << endl;
}
inline void ComputeFreq(uint16 wordFreq[])
{
while (uQR > 0)
{
Trie<uint16>* ptrNode = Q[uQR-1];
uQR--;
if (ptrNode->pFail != NULL)
{
ptrNode->pFail->numWordsEndingHere += ptrNode->numWordsEndingHere;
}
wordFreq[ptrNode->ID-1] = ptrNode->numWordsEndingHere;
}
}
inline void push(Trie<uint16>* ptr)
{
Q[uQR] = ptr;
uQR++;
}
inline Trie<uint16>* pop()
{
uQL++;
return Q[uQL-1];
}
inline bool empty() const
{
return (uQL >= uQR);
}
inline uint32 get_child_index(char ch)
{
return ch - 'a';
}
int uQL, uQR;
Trie<uint16>* *Q;
Trie<uint16> trieRoot;
};
char text[MAX_TEXT_SIZE];
char word[MAX_WORD_SIZE];
uint16 wordFreq[MAX_WORDS];
int main()
{
uint32 dictSize;
AhoCorasickDictionary<MAX_QUEUE_SIZE> acDictionary;
fstream fin("ahocorasick.in", fstream::in);
fstream fout("ahocorasick.out", fstream::out);
fin >> text;
//cout << text << endl;
fin >> dictSize;
//cout << dictSize << endl;
for (uint32 i=1; i<=dictSize; ++i)
{
fin >> word;
//cout << word << endl;
acDictionary.insert(word, i);
}
acDictionary.AhoCorasick(text, wordFreq);
for (uint32 i=0; i<dictSize; ++i)
{
fout << wordFreq[i] << "\n";
}
fin.close();
fout.close();
return 0;
}