Pagini recente » Diferente pentru problema/shield intre reviziile 40 si 39 | Cod sursa (job #871198) | Cod sursa (job #2531667) | Cod sursa (job #1497524) | Cod sursa (job #3354285)
#include <stdio.h>
#include <string>
#include <queue>
#include <vector>
#include <unordered_map>
using namespace std;
#define TEXT_LEN 1000005
#define WORD_LEN 10005
#define ALPHABET_SIZE 26
#define NUM_WORDS 105
char text[TEXT_LEN];
char word[WORD_LEN];
int word_freq[NUM_WORDS];
struct TrieNode {
TrieNode() : children{}, failure(nullptr), output(nullptr), count(0) {}
TrieNode* children[ALPHABET_SIZE];
/**
* Pointer to another node that represents the `longest suffix for current node`,
* equivalent to `the largest prefix in trie that is suffix for current node`.
* This is the node where we can restart the matching from.
* This link (blue link) is used for fast prefix matching when advancing the main text.
*/
TrieNode* failure;
/**
* Pointer to the next node in the dictionary that can be reached by following failure arcs.
* This link is used to reconstruct the solution: the prefixes that match and that are a word in the dictionary.
* This is an optimization to avoid iterating failure links that are not a complete word when reconstructing the solution.
*/
TrieNode *output;
/**
* Number of words ending in this node.
*/
int count;
/**
* Ids of all words that end in this node.
*/
vector<int> pattern_ids;
/**
* Cache representing all dictionary patterns that should be reported when we arrive at this node.
* Includes: patterns ending at this node (pattern_ids) + patterns reachable through output/failure suffixes.
*/
vector<int> matches;
};
void trie_insert(TrieNode *node, const char *str, const int i) {
while (*str) {
const int c = *str - 'a';
if (!node->children[c]) {
node->children[c] = new TrieNode();
}
node = node->children[c];
++str;
}
node->count++;
node->pattern_ids.push_back(i);
}
/**
* For every node sets a failure link to a node representing the longest possible strict suffix of it in the trie.
* For every node sets an output link to the first node in the dictionary that can be reached by following failure links.
*/
void bfs_build_links(TrieNode *root) {
queue<TrieNode*> q;
q.push(root);
root->failure = root;
while (!q.empty()) {
TrieNode *parent = q.front();
q.pop();
for (int i = 0; i < ALPHABET_SIZE; ++i) {
TrieNode* child = parent->children[i];
if (!child) {
continue;
}
q.push(child);
// Compute failure node for child
if (parent == root) {
// Nodes on the first level don't have suffixes. Assigning the root as failure link.
child->failure = root;
} else {
const TrieNode *fail = parent->failure;
while (fail != root && !fail->children[i]) {
fail = fail->failure;
}
child->failure = fail->children[i] ? fail->children[i] : root;
}
// Compute the output node for child
if (child->failure->count > 0) {
child->output = child->failure;
} else {
child->output = child->failure->output;
}
// Precompute all matches reachable from this node.
child->matches = child->pattern_ids;
if (child->output) {
child->matches.insert(
child->matches.end(),
child->output->matches.begin(),
child->output->matches.end()
);
}
}
}
}
void aho_corasick_collect_solution(TrieNode *root, const char *suffix, unordered_map<int, string>& index_to_word) {
int pos = 0;
TrieNode *node = root;
while (*suffix != '\0') {
const int c = *suffix - 'a';
// Follow failure links and advance the matching.
while (node != root && !node->children[c]) {
node = node->failure;
}
if (node->children[c]) {
node = node->children[c];
}
// Print the solution
for (int id : node->matches) {
// printf("%s:%d, ", index_to_word[id].c_str(), pos);
word_freq[id]++;
}
// if (!node->matches.empty()) {
// printf("\n");
// }
suffix++;
pos++;
}
}
int main() {
freopen("ahocorasick.in", "r", stdin);
freopen("ahocorasick.out", "w", stdout);
int n; // n <= 100
auto *root = new TrieNode();
unordered_map<int, string> index_to_word;
scanf("%s", text);
scanf("%d", &n);
for (int i = 0; i < n; ++i) {
scanf("%s", word);
trie_insert(root, word, i);
// index_to_word.insert({i, string(word)});
}
bfs_build_links(root);
aho_corasick_collect_solution(root, text, index_to_word);
for (int i = 0; i < n; ++i) {
printf("%d\n", word_freq[i]);
}
return 0;
}