Cod sursa(job #3354285)

Utilizator batistaUPB-Oprea-Cosmin-Dumitru batista Data 17 mai 2026 12:42:56
Problema Aho-Corasick Scor 95
Compilator cpp-64 Status done
Runda Arhiva educationala Marime 4.91 kb
#include <stdio.h>
#include <string>
#include <queue>
#include <vector>
#include <unordered_map>

using namespace std;

#define TEXT_LEN 1000005
#define WORD_LEN 10005
#define ALPHABET_SIZE 26
#define NUM_WORDS 105

char text[TEXT_LEN];
char word[WORD_LEN];
int word_freq[NUM_WORDS];

struct TrieNode {
    TrieNode() : children{}, failure(nullptr), output(nullptr), count(0) {}

    TrieNode* children[ALPHABET_SIZE];

    /**
     * Pointer to another node that represents the `longest suffix for current node`,
     * equivalent to `the largest prefix in trie that is suffix for current node`.
     * This is the node where we can restart the matching from.
     * This link (blue link) is used for fast prefix matching when advancing the main text.
     */
    TrieNode* failure;

    /**
     * Pointer to the next node in the dictionary that can be reached by following failure arcs.
     * This link is used to reconstruct the solution: the prefixes that match and that are a word in the dictionary.
     * This is an optimization to avoid iterating failure links that are not a complete word when reconstructing the solution.
     */
    TrieNode *output;

    /**
     * Number of words ending in this node.
     */
    int count;

    /**
     * Ids of all words that end in this node.
     */
    vector<int> pattern_ids;

    /**
     * Cache representing all dictionary patterns that should be reported when we arrive at this node.
     * Includes: patterns ending at this node (pattern_ids) + patterns reachable through output/failure suffixes.
     */
    vector<int> matches;
};

void trie_insert(TrieNode *node, const char *str, const int i) {
    while (*str) {
        const int c = *str - 'a';
        if (!node->children[c]) {
            node->children[c] = new TrieNode();
        }

        node = node->children[c];
        ++str;
    }

    node->count++;
    node->pattern_ids.push_back(i);
}

/**
 * For every node sets a failure link to a node representing the longest possible strict suffix of it in the trie.
 * For every node sets an output link to the first node in the dictionary that can be reached  by following failure links.
 */
void bfs_build_links(TrieNode *root) {
    queue<TrieNode*> q;
    q.push(root);
    root->failure = root;

    while (!q.empty()) {
        TrieNode *parent = q.front();
        q.pop();

        for (int i = 0; i < ALPHABET_SIZE; ++i) {
            TrieNode* child = parent->children[i];
            if (!child) {
                continue;
            }
            q.push(child);

            // Compute failure node for child
            if (parent == root) {
                // Nodes on the first level don't have suffixes. Assigning the root as failure link.
                child->failure = root;
            } else {
                const TrieNode *fail = parent->failure;
                while (fail != root && !fail->children[i]) {
                    fail = fail->failure;
                }
                child->failure = fail->children[i] ? fail->children[i] : root;
            }

            // Compute the output node for child
            if (child->failure->count > 0) {
                child->output = child->failure;
            } else {
                child->output = child->failure->output;
            }

            // Precompute all matches reachable from this node.
            child->matches = child->pattern_ids;
            if (child->output) {
                child->matches.insert(
                    child->matches.end(),
                    child->output->matches.begin(),
                    child->output->matches.end()
                );
            }
        }
    }
}

void aho_corasick_collect_solution(TrieNode *root, const char *suffix, unordered_map<int, string>& index_to_word) {
    int pos = 0;
    TrieNode *node = root;

    while (*suffix != '\0') {
        const int c = *suffix - 'a';

        // Follow failure links and advance the matching.
        while (node != root && !node->children[c]) {
            node = node->failure;
        }
        if (node->children[c]) {
            node = node->children[c];
        }

        // Print the solution
        for (int id : node->matches) {
            // printf("%s:%d, ", index_to_word[id].c_str(), pos);
            word_freq[id]++;
        }
        // if (!node->matches.empty()) {
        //     printf("\n");
        // }

        suffix++;
        pos++;
    }
}

int main() {
    freopen("ahocorasick.in", "r", stdin);
    freopen("ahocorasick.out", "w", stdout);

    int n; // n <= 100
    auto *root = new TrieNode();
    unordered_map<int, string> index_to_word;

    scanf("%s", text);
    scanf("%d", &n);

    for (int i = 0; i < n; ++i) {
        scanf("%s", word);
        trie_insert(root, word, i);
        // index_to_word.insert({i, string(word)});
    }
    bfs_build_links(root);
    aho_corasick_collect_solution(root, text,  index_to_word);
    for (int i = 0; i < n; ++i) {
        printf("%d\n", word_freq[i]);
    }

    return 0;
}