Cod sursa(job #663870)

Utilizator claudiumihailClaudiu Mihail claudiumihail Data 19 ianuarie 2012 08:21:27
Problema Aho-Corasick Scor 20
Compilator cpp Status done
Runda Arhiva educationala Marime 3.82 kb
#include <fstream>
#include <iostream>
#include <cstring>

#define MAX_TEXT_SIZE 1000001
#define MAX_QUEUE_SIZE 1001001
#define MAX_WORD_SIZE 10000
#define MAX_WORDS 101
#define MAX_CHILDREN 26

using namespace std;

typedef unsigned int uint32;
typedef unsigned short uint16;

template<typename MaxNumWordsType>
struct Trie
{
	Trie() :
		pFail(NULL),
		numWordsEndingHere(0),
		ID(0)
	{
		memset(vChild, NULL, MAX_CHILDREN*sizeof(Trie<MaxNumWordsType>*));
	}

	Trie<MaxNumWordsType>* pFail;
	MaxNumWordsType numWordsEndingHere;
	MaxNumWordsType ID;
	Trie<MaxNumWordsType>* vChild[MAX_CHILDREN];
};

template<uint32 uSize>
class AhoCorasickDictionary
{
public:
	AhoCorasickDictionary() :
		uQL(0),
		uQR(0)
	{
		trieRoot.pFail = &trieRoot;
		Q = new (std::nothrow) Trie<uint16>*[uSize];
	}
	
	inline void insert(const char* szWord, const uint16 uID)
	{
		uint16 index = 0;
		Trie<uint16>* ptrTrie = &trieRoot;
		
		while (szWord[index] != 0)
		{
			if (ptrTrie->vChild[get_child_index(szWord[index])] == NULL)
			{
				ptrTrie->vChild[get_child_index(szWord[index])] = new (std::nothrow) Trie<uint16>;
			}

			ptrTrie = ptrTrie->vChild[get_child_index(szWord[index])];
			if (szWord[index+1] == 0)
			{
				ptrTrie->ID = uID;
			}
			index++;
		}
	}
	
	inline void AhoCorasick(const char szText[], uint16 wordFreq[])
	{
		SetFailLinks();
		
		Trie<uint16>* ptrNode = &trieRoot;
		for (uint32 i=0; i<strlen(szText); ++i)
		{
			const int index = get_child_index(szText[i]);
			while (ptrNode != &trieRoot && ptrNode->vChild[index] == NULL)
			{
				ptrNode = ptrNode->pFail;
			}
			if (ptrNode->vChild[index] != NULL)
			{
				ptrNode = ptrNode->vChild[index];
			}
			ptrNode->numWordsEndingHere++;
		}
		
		ComputeFreq(wordFreq);
	}
	
private:

	inline void SetFailLinks()
	{
		push(&trieRoot);
		
		while (!empty())
		{
			Trie<uint16>* ptrNode = pop();

			for (uint32 i=0; i<26; ++i)
			{
				if (ptrNode->vChild[i] != NULL)
				{
					Trie<uint16>* ptrFail = ptrNode->pFail;
					while (ptrFail != &trieRoot && ptrFail->vChild[i] == NULL)
					{
						ptrFail = ptrFail->pFail;
					}
					
					if (ptrFail->vChild[i] != NULL && ptrFail->vChild[i] != ptrNode->vChild[i])
					{
						ptrFail = ptrFail->vChild[i];
					}
					
					ptrNode->vChild[i]->pFail = ptrFail;
					push(ptrNode->vChild[i]);
				}
			}
		}
		
		trieRoot.pFail = NULL;
		
		//cout << trieRoot.vChild[2]->vChild[0]->vChild[0]->ID << " " << trieRoot.vChild[0] << endl;
	}
	
	inline void ComputeFreq(uint16 wordFreq[])
	{
		while (uQR > 0)
		{
			Trie<uint16>* ptrNode = Q[uQR-1];
			uQR--;
			
			if (ptrNode->pFail != NULL)
			{
				ptrNode->pFail->numWordsEndingHere += ptrNode->numWordsEndingHere;
			}
			
			wordFreq[ptrNode->ID-1] = ptrNode->numWordsEndingHere;
		}
	}
	
	inline void push(Trie<uint16>* ptr)
	{
		Q[uQR] = ptr;
		uQR++;
	}
	
	inline Trie<uint16>* pop()
	{
		uQL++;
		return Q[uQL-1];
	}
	
	inline bool empty() const
	{
		return (uQL >= uQR);
	}

	inline uint32 get_child_index(char ch)
	{
		return ch - 'a';
	}

	int uQL, uQR;
	Trie<uint16>* *Q;
	Trie<uint16> trieRoot;
};

char text[MAX_TEXT_SIZE];
char word[MAX_WORD_SIZE];

uint16 wordFreq[MAX_WORDS];

int main()
{
	uint32 dictSize;
	AhoCorasickDictionary<MAX_QUEUE_SIZE> acDictionary;
	fstream fin("ahocorasick.in", fstream::in);
	fstream fout("ahocorasick.out", fstream::out);
	
	fin >> text;
	//cout << text << endl;
	
	fin >> dictSize;
	//cout << dictSize << endl;
	
	for (uint32 i=1; i<=dictSize; ++i)
	{
		fin >> word;
		//cout << word << endl;
		acDictionary.insert(word, i);
	}
	
	acDictionary.AhoCorasick(text, wordFreq);
	
	for (uint32 i=0; i<dictSize; ++i)
	{
		fout << wordFreq[i] << "\n";
	}
	
	fin.close();
	fout.close();
	return 0;
}