Pagini recente » Cod sursa (job #2649353) | Cod sursa (job #303916) | Cod sursa (job #203851) | Cod sursa (job #3230040) | Cod sursa (job #3124785)
// Aho-Corasik Automaton - https://www.geeksforgeeks.org/aho-corasick-algorithm-pattern-searching/
using namespace std;
#include <bits/stdc++.h>
// dict size.
const int KMAX = (1e2 + 5);
// MMAX = max(|w[0]| + |w[1]| + ... + |w[k - 1]|)
const int MMAX = (int) (1e6 + 5); //
// Maximum number of characters in input alphabet
const int ALPHABET_SIZE = 26;
int out[MMAX]; // bitmask out[s] = (1 << i) | (1 << j) -> both w[i] and w[j] end in state 's'; compute w/ BFS
int f[MMAX]; // f[s] = s'; -> s' = longest proper suffix which is a proper prefix of some pattern; compute w/ BFS
int g[MMAX][ALPHABET_SIZE]; // g[s][ch] = s'; (s --ch--> s')
vector<string> w;
string str;
int fv[KMAX];
string text;
int k;
int buildGoTo(const vector<string>& w, int k) {
memset(out, 0, sizeof out);
memset(g, -1, sizeof g);
// Initially, we just have the 0 state
int states = 1;
//fill goto function: g[][] (eq. build Trie)
for (int i = 0; i < k; ++i) {
const string &word = w[i];
int currentState = 0;
for (int j = 0; j < word.size(); ++j) {
int ch = word[j] - 'a';
// Create new state if node does not exist
if (g[currentState][ch] == -1)
g[currentState][ch] = states++;
currentState = g[currentState][ch];
}
// Current word ends in `currentState`
out[currentState] |= (1 << i);
}
// (!) For all characters which don't have an edge from root (or state 0) in Trie, add a goto edge to state 0 itself
for (int ch = 0; ch < ALPHABET_SIZE; ++ch)
if (g[0][ch] == -1)
g[0][ch] = 0;
return states;
}
int buildAhoCorasikAutomaton(const vector<string>& w, int k) {
int states = buildGoTo(w, k);
// Build f[] using BFS w/ Queue
memset(f, -1, sizeof f);
queue<int> q;
for (int ch = 0; ch < ALPHABET_SIZE; ++ch) {
// All nodes 's' of depth 1 have failure f[s] = 0
if (g[0][ch] != 0) {
f[g[0][ch]] = 0;
q.push(g[0][ch]);
}
}
while (q.size()) {
int state = q.front();
q.pop();
// For the removed state, find failure function for
// all those characters for which goto function is
// not defined.
for (int ch = 0; ch <= ALPHABET_SIZE; ++ch) {
if (g[state][ch] != -1) { // If goto function is defined
// Find failure state of removed state
int failure = f[state];
// Find the deepest node labeled by proper
// suffix of string from root to current
// state.
while (g[failure][ch] == -1)
failure = f[failure];
failure = g[failure][ch];
f[g[state][ch]] = failure;
// Merge output values
out[g[state][ch]] |= out[failure];
q.push(g[state][ch]);
}
}
}
return states;
}
int findNextState(int currentState, char nextInput) {
int answer = currentState;
int ch = nextInput - 'a';
// If goto is not defined, use failure function
while (g[answer][ch] == -1)
answer = f[answer];
return g[answer][ch];
}
void searchWords(const vector<string>& w, int k, const string &text) {
buildAhoCorasikAutomaton(w, k);
int currentState = 0;
for (auto const &ch: text) {
currentState = findNextState(currentState, ch);
if (out[currentState] == 0)
continue;
for (int j = 0; j < k; ++j) {
if (out[currentState] & (1 << j))
fv[j]++;
}
}
}
int main() {
freopen("ahocorasick.in", "r", stdin);
freopen("ahocorasick.out", "w", stdout);
cin >> text;
cin >> k;
for (int i = 0; i < k; ++i) {
cin >> str;
w.push_back(str);
}
searchWords(w, k, text);
for (int i = 0; i < k; ++i)
cout << fv[i] << '\n';
return 0;
}