Cod sursa(job #665454)

Utilizator claudiumihailClaudiu Mihail claudiumihail Data 22 ianuarie 2012 07:22:45
Problema Aho-Corasick Scor 100
Compilator cpp Status done
Runda Arhiva educationala Marime 3.94 kb
#include <fstream>
#include <iostream>
#include <cstring>
#include <cstdlib>
#include <vector>

#define MAX_TEXT_SIZE 1000001
#define MAX_QUEUE_SIZE 1001001
#define MAX_WORD_SIZE 10000
#define MAX_WORDS 101
#define MAX_CHILDREN 26

using namespace std;

typedef unsigned int uint32;
typedef unsigned short uint16;

template<typename MaxNumWordsType>
struct Trie
{
	Trie() :
		pFail(NULL),
		numWordsEndingHere(0)
	{
		memset(vChild, NULL, MAX_CHILDREN*sizeof(Trie<MaxNumWordsType>*));
	}

	Trie<MaxNumWordsType>* pFail;
	uint32 numWordsEndingHere;
	vector<MaxNumWordsType> IDs;
	Trie<MaxNumWordsType>* vChild[MAX_CHILDREN];
};

class ACDictionary
{
public:
	ACDictionary() :
		uQL(0),
		uQR(0)
	{
		Q = static_cast<Trie<uint16>**>(malloc(MAX_QUEUE_SIZE * sizeof(Trie<uint16>*)));
	}

	inline void Insert(const char* szWord, const uint16 uID)
	{
		uint16 index = 0;
		Trie<uint16>* ptrTrie = &trieRoot;
		
		while (szWord[index] != 0)
		{
			if (ptrTrie->vChild[get_child_index(szWord[index])] == NULL)
			{
				ptrTrie->vChild[get_child_index(szWord[index])] = new (std::nothrow) Trie<uint16>;
			}

			ptrTrie = ptrTrie->vChild[get_child_index(szWord[index])];
			if (szWord[index+1] == 0)
			{
				ptrTrie->IDs.push_back(uID);
			}
			index++;
		}
	}

	inline void SetFailLinks()
	{
		trieRoot.pFail = &trieRoot;
		push(&trieRoot);
		
		while (!empty())
		{
			Trie<uint16>* ptrNode = pop();

			for (uint32 i=0; i<26; ++i)
			{
				if (ptrNode->vChild[i] != NULL)
				{
					Trie<uint16>* ptrFail = ptrNode->pFail;
					while (ptrFail != &trieRoot && ptrFail->vChild[i] == NULL)
					{
						ptrFail = ptrFail->pFail;
					}
					
					if (ptrFail->vChild[i] != NULL && ptrFail->vChild[i] != ptrNode->vChild[i])
					{
						ptrFail = ptrFail->vChild[i];
					}
					
					ptrNode->vChild[i]->pFail = ptrFail;
					push(ptrNode->vChild[i]);
				}
			}
		}
		
		trieRoot.pFail = NULL;
		
		//cout << trieRoot.vChild[2]->vChild[0]->vChild[0]->ID << " " << trieRoot.vChild[0] << endl;
	}

	inline void AhoCorasick(const char szText[], uint32 wordFreq[])
	{
		SetFailLinks();
		
		Trie<uint16>* ptrNode = &trieRoot;
		const uint32 strSize = strlen(szText);
		for (uint32 i=0; i<strSize; ++i)
		{
			const int index = get_child_index(szText[i]);
			while (ptrNode != &trieRoot && ptrNode->vChild[index] == NULL)
			{
				ptrNode = ptrNode->pFail;
			}
			if (ptrNode->vChild[index] != NULL)
			{
				ptrNode = ptrNode->vChild[index];
			}
			ptrNode->numWordsEndingHere++;
		}
		
		ComputeFreq(wordFreq);
	}

	~ACDictionary()
	{
		free(Q);
	}
 
private:
	
	inline void ComputeFreq(uint32 wordFreq[])
	{
		while (uQR > 0)
		{
			Trie<uint16>* ptrNode = Q[uQR-1];
			uQR--;
			
			if (ptrNode->pFail != NULL)
			{
				ptrNode->pFail->numWordsEndingHere += ptrNode->numWordsEndingHere;
			}
			
			for (uint32 i=0; i<ptrNode->IDs.size(); ++i)
			{
				wordFreq[ptrNode->IDs[i]] = ptrNode->numWordsEndingHere;
			}
		}
	}

	inline bool empty() const
	{
		return (uQL >= uQR);
	}

	inline uint32 get_child_index(char ch) const
	{
		return ch - 'a';
	}

	inline void push(Trie<uint16>* ptr)
	{
		Q[uQR] = ptr;
		uQR++;
	}

	inline Trie<uint16>* pop()
	{
		uQL++;
		return Q[uQL-1];
	}

	int uQL, uQR;
	Trie<uint16>* *Q;
	Trie<uint16> trieRoot;
};


char text[MAX_TEXT_SIZE];
char word[MAX_WORD_SIZE];

uint32 wordFreq[MAX_WORDS];

int main()
{
	uint32 dictSize;
	ACDictionary acDict;
	fstream fin("ahocorasick.in", fstream::in);
	fstream fout("ahocorasick.out", fstream::out);
	
	fin >> text;
	//cout << text << endl;
	
	fin >> dictSize;
	//cout << dictSize << endl;
	
	for (uint32 i=0; i<dictSize; ++i)
	{
		fin >> word;
		//cout << word << endl;
		acDict.Insert(word, i);
	}

	acDict.AhoCorasick(text, wordFreq);
	
	for (uint32 i=0; i<dictSize; ++i)
	{
		fout << wordFreq[i] << "\n";
	}
	
	fin.close();
	fout.close();
	return 0;
}