Cod sursa(job #2681661)

Utilizator crucerucalinCalin-Cristian Cruceru crucerucalin Data 6 decembrie 2020 11:27:14
Problema Aho-Corasick Scor 0
Compilator cpp-64 Status done
Runda Arhiva educationala Marime 3.3 kb
#include <fstream>
#include <memory>
#include <queue>
#include <stack>
#include <string>
#include <unordered_map>
#include <vector>

using namespace std;

struct node_t {
  // The trie-like structure.
  array<unique_ptr<node_t>, 26> children = {};
  // The backlink to its largest suffix in the trie.
  node_t *backlink = nullptr;
};

node_t *insert_word(node_t &root, const string &word) {
  auto *curr_node = &root;
  for (auto c : word) {
    auto idx = c - 'a';
    if (curr_node->children[idx] == nullptr) {
      curr_node->children[idx] = make_unique<node_t>();
    }
    curr_node = curr_node->children[idx].get();
  }
  return curr_node;
}

stack<const node_t *> fix_backlinks(node_t &root) {
  stack<const node_t *> inverse_bfs;
  queue<node_t *> bfs_queue({&root});
  while (not bfs_queue.empty()) {
    auto *curr_node = bfs_queue.front(); bfs_queue.pop();

    // Save the traversal to do it in reverse order later (see func below).
    inverse_bfs.emplace(curr_node);

    // Go through the children and set their backlinks.
    for (int i = 0; i < 26; ++i) {
      if (auto *child = curr_node->children[i].get(); child != nullptr) {
        // Find the backlink for this child: first node on the "backlinks chain"
        // that has a child with the same id as this one.
        auto *cand = curr_node->backlink;
        if (cand == nullptr) {
          child->backlink = &root;
        } else {
          while (cand != &root and cand->children[i] == nullptr) {
            cand = cand->backlink;
          }
          child->backlink = cand->children[i].get();
          if (child->backlink == nullptr or child->backlink == child) {
            child->backlink = &root;
          }
        }
        bfs_queue.emplace(child);
      }
    }
  }
  return inverse_bfs;
}

void aggregate_node_counts(stack<const node_t *> inverse_bfs,
                           unordered_map<const node_t *, int> &counts) {
  // Traverse the stack, updating backlink counts.
  while (not inverse_bfs.empty()) {
    auto *curr_node = inverse_bfs.top(); inverse_bfs.pop();
    if (curr_node->backlink != nullptr) {
      if (auto it = counts.find(curr_node); it != end(counts)) {
        auto&& [_, count] = *it;
        counts[curr_node->backlink] += count;
      }
    }
  }
}

int main() {
  ifstream fin{"ahocorasick.in"};
  ofstream fout{"ahocorasick.out"};
  string s; fin >> s;
  int n; fin >> n;

  node_t root{};
  vector<const node_t *> dictionary_nodes(n, nullptr);

  // Insert dictionary nodes in the trie.
  for (int i = 0; i < n; ++i) {
    string word; fin >> word;
    dictionary_nodes[i] = insert_word(root, word);
  }

  // Create backlinks.
  auto inverse_bfs = fix_backlinks(root);

  // Traverse the text and keep track of the number of times each node is
  // visited.
  unordered_map<const node_t *, int> match_counts;
  auto *curr_node = &root;
  for (auto c : s) {
    auto i = c - 'a';
    while (curr_node != &root && curr_node->children[i] == nullptr) {
      curr_node = curr_node->backlink;
    }
    if (curr_node->children[i] != nullptr) {
      curr_node = curr_node->children[i].get();
    }
    match_counts[curr_node] += 1;
  }

  // Compute the counts for each word in the dictionary.
  aggregate_node_counts(inverse_bfs, match_counts);

  // Print them.
  for (int i = 0; i < n; ++i) {
    fout << match_counts[dictionary_nodes[i]] << endl;
  }
}