#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include "../../util/c/queue.h"
#include "../../util/c/utility.h"
#define STATES 1000
Trie trie[STATES][SCHAR_MAX + 1];
Trie output[STATES][SCHAR_MAX + 1];
Trie failure[STATES];
char text[STRINGLENGTH];
char matches[STRINGLENGTH];
Trie* buildTrie(char** argv, int argc, int verbose) {
size_t i, j;
Trie state = 1;
Trie current = 0;
Queue* queue;
Trie stat, fail;
memset(output, 0, sizeof(output));
memset(trie, -1, sizeof(trie));
for (i = 1 + verbose; i < argc; ++i) {
current = 0;
for (j = 0; j < strlen(argv[i]); j++) {
if (trie[current][((unsigned char**)argv)[i][j]] == (Trie)-1) {
trie[current][((unsigned char**)argv)[i][j]] = state;
state++;
}
current = trie[current][((unsigned char**)argv)[i][j]];
}
output[current][i] = 1;
}
for (i = 0; i < SCHAR_MAX + 1; ++i) {
if (trie[0][i] == (Trie)-1) {
trie[0][i] = 0;
}
}
queue = newQueue(sizeof(Trie));
if (queue == NULL) {
return NULL;
}
memset(failure, -1, sizeof(failure));
for (i = 0; i < SCHAR_MAX + 1; ++i) {
if (trie[0][i] != 0) {
failure[trie[0][i]] = 0;
enqueue(queue, &trie[0][i]);
}
}
while (queue->size) {
stat = *(Trie*)(dequeue(queue));
for (i = 0; i < SCHAR_MAX + 1; ++i) {
if (trie[stat][i] != (Trie)-1) {
fail = failure[stat];
while (trie[fail][i] == (Trie)-1) {
fail = failure[fail];
}
fail = trie[fail][i];
failure[trie[stat][i]] = fail;
for (j = 0; j < SCHAR_MAX + 1; j++) {
if (output[fail][j] == 1) {
output[trie[stat][i]][j] = 1;
}
}
enqueue(queue, &trie[stat][i]);
}
}
}
destroy(queue);
return *trie;
}
Trie nextState(Trie currentState, unsigned char nextInput) {
Trie stat = currentState;
while (trie[stat][nextInput] == (Trie)-1) {
stat = failure[stat];
}
return trie[stat][nextInput];
}
int ahocorasick(char** argv, int argc, string text,
int verbose, int sameColour, int fromFile) {
size_t i, j, t, match = 0;
bool found;
Trie currentState;
buildTrie(argv, argc, verbose);
currentState = 0;
for (i = 0; i < strlen(text); i++) {
currentState = nextState(currentState, text[i]);
found = FALSE;
for (j = 0; j < SCHAR_MAX + 1; j++) {
if (output[currentState][j] == 1) {
found = TRUE;
break;
}
}
if (found == FALSE) {
continue;
}
for (j = 1 + verbose; j < argc; j++)
{
if (output[currentState][j] == 1) {
if (!fromFile) {
if (verbose) {
printf("Word %s appears from %d to %d.\n",
argv[j], i - strlen(argv[j]) + 1, i);
}
else {
for (t = i - strlen(argv[j]) + 1; t <= i; t++) {
matches[t] = j - verbose - sameColour;
}
}
} else {
matches[j]++;
}
match++;
}
}
}
return match;
}
int main(int argc, char** argv) {
size_t i, j;
char** words;
int wordsLength;
int strlens;
bool verbose, sameColour;
if (argc > 1) {
strlens = read(text);
ahocorasick(argv, argc, text, verbose = !strcmp(argv[1], "--verbose"),
sameColour = !strcmp(argv[1], "--samecolour"), FALSE);
if (!verbose) {
print(text, strlens, matches, sameColour);
}
} else {
if (freopen("ahocorasick.in", "r", stdin) == NULL) {
return -1;
}
if (freopen("ahocorasick.out", "w", stdout) == NULL) {
return -1;
}
if (scanf("%s", text) != 1) {
return -1;
}
if (scanf("%d", &wordsLength) != 1) {
return -1;
}
wordsLength++;
words = malloc(wordsLength * sizeof(char*));
if (words == NULL) {
return -1;
}
for (i = 1; i < wordsLength; i++) {
words[i] = malloc(AHOPATTERNLENGTH * sizeof(char));
if (words[i] == NULL) {
for (j = 0; j < i; j++) {
free(words[j]);
}
free(words);
return -1;
}
if (scanf("%s", words[i]) != 1) {
return -1;
}
}
ahocorasick(words, wordsLength, text, FALSE, FALSE, TRUE);
for (i = 1; i < wordsLength; i++) {
printf("%d\n", matches[i]);
free(words[i]);
}
free(words);
fclose(stdin);
fclose(stdout);
}
return 0;
}