Cod sursa(job #2087715)

Utilizator valinetromaniaValiNet Romania valinetromania Data 14 decembrie 2017 02:03:38
Problema Aho-Corasick Scor 0
Compilator c Status done
Runda Arhiva educationala Marime 5.03 kb
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>

#include "../../util/c/queue.h"
#include "../../util/c/utility.h"

#define STATES 1000

Trie trie[STATES][SCHAR_MAX + 1];
Trie output[STATES][SCHAR_MAX + 1];
Trie failure[STATES];
char text[STRINGLENGTH];
char matches[STRINGLENGTH];

Trie* buildTrie(char** argv, int argc, int verbose) {
    size_t i, j;
    Trie state = 1;
    Trie current = 0;
    Queue* queue;
    Trie stat, fail;
    
    memset(output, 0, sizeof(output));
    memset(trie, -1, sizeof(trie));

    for (i = 1 + verbose; i < argc; ++i) {
        current = 0;
        for (j = 0; j < strlen(argv[i]); j++) {
            if (trie[current][((unsigned char**)argv)[i][j]] == (Trie)-1) {
                trie[current][((unsigned char**)argv)[i][j]] = state;
                state++;
            }
            current = trie[current][((unsigned char**)argv)[i][j]];
        }
        output[current][i] = 1;
    }

    for (i = 0; i < SCHAR_MAX + 1; ++i) {
        if (trie[0][i] == (Trie)-1) {
            trie[0][i] = 0;
        }
    }

    queue = newQueue(sizeof(Trie));
    if (queue == NULL) {
        return NULL;
    }

    memset(failure, -1, sizeof(failure));
    for (i = 0; i < SCHAR_MAX + 1; ++i) {
        if (trie[0][i] != 0) {
            failure[trie[0][i]] = 0;
            enqueue(queue, &trie[0][i]);
        }
    }

    while (queue->size) {
        stat = *(Trie*)(dequeue(queue));
        for (i = 0; i < SCHAR_MAX + 1; ++i) {
            if (trie[stat][i] != (Trie)-1) {
                fail = failure[stat];
                while (trie[fail][i] == (Trie)-1) {
                    fail = failure[fail];
                }
                fail = trie[fail][i];
                failure[trie[stat][i]] = fail;
                for (j = 0; j < SCHAR_MAX + 1; j++) {
                    if (output[fail][j] == 1) {
                        output[trie[stat][i]][j] = 1;
                    }
                }
                enqueue(queue, &trie[stat][i]);
            }
        }
    }

    destroy(queue);

    return *trie;
}

Trie nextState(Trie currentState, unsigned char nextInput) {
    Trie stat = currentState;
    while (trie[stat][nextInput] == (Trie)-1) {
        stat = failure[stat];
    }
    return trie[stat][nextInput];
}

int ahocorasick(char** argv, int argc, string text, 
    int verbose, int sameColour, int fromFile) {
    size_t i, j, t, match = 0;
    bool found;
    Trie currentState;
    buildTrie(argv, argc, verbose);
    currentState = 0;
    for (i = 0; i < strlen(text); i++) {
        currentState = nextState(currentState, text[i]);
        found = FALSE;
        for (j = 0; j < SCHAR_MAX + 1; j++) {
            if (output[currentState][j] == 1) {
                found = TRUE;
                break;
            }
        }
        if (found == FALSE) {
            continue;
        }
        for (j = 1 + verbose; j < argc; j++)
        {
            if (output[currentState][j] == 1) {
                if (!fromFile) {
                    if (verbose) {
                        printf("Word %s appears from %d to %d.\n", 
                            argv[j], i - strlen(argv[j]) + 1, i);
                    }
                    else {
                        for (t = i - strlen(argv[j]) + 1; t <= i; t++) {
                            matches[t] = j - verbose - sameColour;
                        }
                    }
                } else {
                    matches[j]++;
                }
                match++;
            }
        }
    }
    return match;
}

int main(int argc, char** argv) {
    size_t i, j;
    char** words;
    int wordsLength;
    int strlens;
    bool verbose, sameColour;
    if (argc > 1) {
        strlens = read(text);
        ahocorasick(argv, argc, text, verbose = !strcmp(argv[1], "--verbose"),
            sameColour = !strcmp(argv[1], "--samecolour"), FALSE);
        if (!verbose) {
		    print(text, strlens, matches, sameColour);
        }
    } else {
        if (freopen("ahocorasick.in", "r", stdin) == NULL) {
		    return -1;
	    }
        if (freopen("ahocorasick.out", "w", stdout) == NULL) {
		    return -1;
	    }
        if (scanf("%s", text) != 1) {
		    return -1;
	    }
        if (scanf("%d", &wordsLength) != 1) {
            return -1;
        }
        wordsLength++;
        words = malloc(wordsLength * sizeof(char*));
        if (words == NULL) {
            return -1;
        }
        for (i = 1; i < wordsLength; i++) {
            words[i] = malloc(AHOPATTERNLENGTH * sizeof(char));
            if (words[i] == NULL) {
                for (j = 0; j < i; j++) {
                    free(words[j]);
                }
                free(words);
                return -1;
            }
            if (scanf("%s", words[i]) != 1) {
                return -1;
            }
        }
        ahocorasick(words, wordsLength, text, FALSE, FALSE, TRUE);
        for (i = 1; i < wordsLength; i++) {
            printf("%d\n", matches[i]);
            free(words[i]);
        }
        free(words);
        fclose(stdin);
        fclose(stdout);
    }

    return 0;
}