Pagini recente » Cod sursa (job #2647800) | Cod sursa (job #333958) | Cod sursa (job #866403) | Cod sursa (job #1949504) | Cod sursa (job #3288870)
Cod sursa(job #3288870)

Utilizator	Cioc Alex-Andrei nurof3n	Data	24 martie 2025 16:58:33
Problema	Potrivirea sirurilor	Scor	14
Compilator	cpp-64	Status	done
Runda	Arhiva educationala	Marime	10.27 kb
Raporteaza aceasta sursa
// https://www.infoarena.ro/problema/strmatch
#include <array>
#include <algorithm>
#include <cstring>
#include <functional>
#include <iostream>
#include <fstream>
#include <regex>
#include <vector>

#define ALG 9

#if ALG == 1 || ALG == 2
/**
 * @brief KMP algorithm, finds max 1000 occurences of a in b.
 * next[i] represents the longest prefix of a that is a suffix of a_i.
 * This implementation assumes a and b are indexed from 0.
 *
 * One improvement, for real-time restraints, is to create the next table
 * for each character in the alphabet, so that the lookup time for a mismatch
 * is constant.
 *
 * Complexity: O(A+B) time, O(A) space for ALG 1 and O(A+S) space for ALG 2,
 * where S is the size of the alphabet
 */
auto strmatch(const std::string& a, const std::string& b) -> std::pair<int, std::array<int, 1000>>
{
#if ALG == 2
    int marked[128];  // represents the LAST position of the character in a
    std::fill_n(marked, 128, -1);
    marked[a[0]] = 0;
#endif

    // precompute next array
    std::vector<int> next(a.size());
    next[0] = -1;
    for (int i = 1, k = -1; i < a.size(); ++i) {
#if ALG == 2
        marked[a[i]] = i;
#endif

        // if the prefix cannot be extended to match the current suffix,
        // go back to next[k], ensuring that the prexix of a_k is still a
        // suffix of a_{i-1} (because a_k was already a suffix of a_{i-1})
        while (k >= 0 && a[k + 1] != a[i])
            k = next[k];

        // extend the prefix
        if (a[k + 1] == a[i])
            ++k;

        next[i] = k;
    }

    // KMP pattern matching - same algorithm as for precomputing the next array
    std::array<int, 1000> positions;
    int                   len = 0;
    for (int i = 0, q = -1; i < b.size(); ++i) {
#if ALG == 2
        // S. Boyer and J. Moore 1974 improvement for large alphabets: we check
        // first the last element of the pattern and if it does not match, we
        // just skip by the length of the pattern; otherwise, we skip by the
        // minimum amount that is consistent with the match.
        if (q == -1) {
            int pos = i + a.size() - 1;
            if (pos < b.size()) {
                if (marked[b[pos]] == -1) {
                    i = pos;
                    continue;
                }

                i = pos - marked[b[pos]];
            }
            else
                break;
        }

        // for smaller alphabets that can be stored in a small array of markers,
        // we can do this check for each character in b, at each step of the
        // loop
        if (marked[b[i]] == -1) {
            q = -1;
            continue;
        }
#endif

        // if the prefix cannot be extended to match the current character
        // in b, go back to next[q], ensuring that the prefix of a_q is
        // still a suffix to b_{i-1} (because the next[q] step guarantees
        // that a_next[q] will be a suffix of a_q, and thus b_i by
        // transitivity)
        while (q >= 0 && a[q + 1] != b[i])
            q = next[q];

        // extend the match
        if (a[q + 1] == b[i])
            ++q;

        // found a full match
        if (q + 1 == a.size()) {
            if (len < 1000)
                positions[len] = i - a.size() + 1;
            ++len;
        }
    }

    return { len, positions };
}
#endif

#if ALG == 3 || ALG == 4 || ALG == 5
/**
 * @brief Uses standard library searchers:
 * - default searcher: implementation defined complexity
 * - Boyer Moore searcher: search complexity is worst case O(A*B) when the
 * pattern appears in text and best case O(B/A) when it does not; preprocessing
 * is O(A); time complexity is O(S+A)
 * - Boyer Moore Horspool searcher: simpler variant that only uses bad character
 * heuristic; it improves the average case performance to O(B) on random text
 *
 * See: https://www.cppstories.com/2018/08/searchers/
 *
 * Unfortunately, std::search finds only one occurence, thus for multiple,
 * overlapping searches you are limited to incrementing +1 instead of skipping
 * whole sequences of b. Bottom line is these standard library searchers are
 * only suitable for single searches or non-overlapping occurences.
 *
 * Complexity: see above
 */
auto strmatch(const std::string& a, const std::string& b) -> std::pair<int, std::array<int, 1000>>
{
#if ALG == 3
    const std::default_searcher searcher(a.begin(), a.end());
#elif ALG == 4
    const std::boyer_moore_searcher searcher(a.begin(), a.end());
#elif ALG == 5
    const std::boyer_moore_horspool_searcher searcher(a.begin(), a.end());
#endif

    std::array<int, 1000> positions;
    int                   len = 0;

    auto it = std::search(b.begin(), b.end(), searcher);
    while (it != b.end()) {
        if (len < 1000)
            positions[len] = it - b.begin();
        ++len;
        it = std::search(it + 1, b.end(), searcher);
    }

    return { len, positions };
}
#endif

#if ALG == 6
/**
 * @brief Uses std::string::find, which has implementation defined complexity.
 *
 * See:
 * https://stackoverflow.com/questions/43657530/using-stdsearch-over-stringfind
 *
 * Complexity: unknown
 */
auto strmatch(const std::string& a, const std::string& b) -> std::pair<int, std::array<int, 1000>>
{
    std::array<int, 1000> positions;
    int                   len = 0;

    auto pos = b.find(a);
    while (pos != std::string::npos) {
        if (len < 1000)
            positions[len] = pos;
        ++len;
        pos = b.find(a, pos + 1);
    }

    return { len, positions };
}
#endif

#if ALG == 7
/**
 * @brief Uses std::regex_iterator with an overlapping pattern.
 *
 * Complexity: unknown (but bad af)
 */
auto strmatch(const std::string& a, const std::string& b) -> std::pair<int, std::array<int, 1000>>
{
    std::array<int, 1000> positions;
    int                   len = 0;

    std::regex pattern("(?=(" + a + ")).");
    auto       begin = std::sregex_iterator(b.begin(), b.end(), pattern);
    auto       end   = std::sregex_iterator();

    for (auto it = begin; it != end; ++it) {
        if (len < 1000)
            positions[len] = it->position();
        ++len;
    }

    return { len, positions };
}
#endif

#if ALG == 8 || ALG == 9
/**
 * @brief Rabin-Karp algorithm, with a rolling hash. Note that many
 * implementations use Rabin fingerprints for the hashing.
 * This type of algorithm is inferior to KMP and Boyer-Moore, but can be useful
 * in multiple patterns searching.
 *
 * The ALG 9 variant uses 2 hashes to have stronger confidence and removes the
 * comparison check in this case, making it linear worst-case.
 *
 * Complexity: time O(A*B) worst-case but average O(A+B); space O(A+B)
 *
 */
auto strmatch(const std::string& a, const std::string& b) -> std::pair<int, std::array<int, 1000>>
{
    std::array<int, 1000> positions;

    auto a_size = a.size(), b_size = b.size();
    if (a_size > b_size)
        return { 0, positions };

#if ALG == 8
    auto fast_exp = [](unsigned b, unsigned p) {
        unsigned res = 1;
        while (p) {
            while (!(p & 1))
                b *= b, p >>= 1;
            res *= b, --p;
        }
        return res;
    };
#else
    static const unsigned mod1 = 100007, mod2 = 100021, p = 73;

    auto fast_exp = [](unsigned b, unsigned p, unsigned mod) {
        unsigned res = 1;
        while (p) {
            while (!(p & 1))
                b = (b * b) % mod, p >>= 1;
            res = (res * b) % mod, --p;
        }
        return res;
    };
#endif


#if ALG == 8
    auto hash = [](std::string s, size_t len) {
        // string hash version of Daniel J. Bernstein (this is NOT a good one-way function tho)
        // https://stackoverflow.com/questions/11546791/what-is-the-best-hash-function-for-rabin-karp-algorithm
        unsigned h = 0;  // can also be replaced with 5381
        for (int i = 0; i < len; ++i)
            h = h * 33 + s[i];
        return h;
    };
#else
    auto hash = [](std::string s, size_t len) {
        unsigned h1 = 0, h2 = 0;
        for (int i = 0; i < len; ++i) {
            h1 = (h1 * p + s[i]) % mod1;
            h2 = (h2 * p + s[i]) % mod2;
        }
        return std::pair<int, int>{ h1, h2 };
    };
#endif

    // precompute hash of a (and first hash of b also)
    auto ha = hash(a, a_size), hb = hash(b, a_size);

    // rolling hash version
#if ALG == 8
    /**
    h = h[m,n-1] * 33 + c = h[m,n]
    h' = h[m+1,n] * 33 + c' = h[m+1,n+1]
    h' = h[m,n] * 33 + c' - x, x = ?
    x = (h[m,n] - h[m+1,n]) * 33
    h[m,n] = c_n + c_n-1 * 33 + c_n-2 * 33^2 + ... + c_m+1 * 33^n-m-1 + c_m * 33^n-m + h0 * 33^n-m+1
    h[m+1,n] = c_n + c_n-1 * 33 + c_n-2 * 33^2 + ... + c_m+1 * 33^n-m-1 + h0 * 33^n-m
    h[m,n] - h[m+1,n] = c_m*33^n-m + h0 * 33^n-m * 32
    x = 33^n-m * (c_m + 32 * h0)
     */
    unsigned p33          = fast_exp(33, a_size);
    auto     hash_rolling = [&b, a_size, p33, hb](int idx) {
        static auto h = hb;
        return h      = h * 33 + b[idx] - p33 * (b[idx - a_size]);
    };
#else
    unsigned p1 = fast_exp(p, a_size - 1, mod1), p2 = fast_exp(p, a_size - 1, mod2);
    auto     hash_rolling = [&b, a_size, hb, p1, p2](int idx) {
        static auto h = hb;
        h.first       = ((h.first - (b[idx - a_size] * p1) % mod1 + mod1) * p + b[idx]) % mod1;
        h.second      = ((h.second - (b[idx - a_size] * p2) % mod2 + mod2) * p + b[idx]) % mod2;
        return h;
    };
#endif

    int len = 0;
    for (int i = a_size; i <= b_size; ++i) {
        if (ha == hb) {
#if ALG == 8
            // hashes match, we have to compare the strings thoroughly
            if (!std::memcmp(a.c_str(), b.c_str() + i - a_size, a_size)) {
#endif
                if (len < 1000)
                    positions[len] = i - a_size;
                ++len;
#if ALG == 8
            }
#endif
        }
        if (i < b_size)
            hb = hash_rolling(i);
    }

    return { len, positions };
}
#endif

int main()
{
    try {
        std::ios_base::sync_with_stdio(false);
        std::cin.tie(nullptr);

        std::ifstream in("strmatch.in");
        std::ofstream out("strmatch.out");
        if (!in.is_open())
            throw std::runtime_error("input file not found");

        std::string a, b;
        in >> a >> b;

        auto [len, positions] = strmatch(a, b);
        out << len << '\n';
        for (int i = 0; i < std::min(1000, len); ++i)
            out << positions[i] << ' ';
    }
    catch (const std::exception& e) {
        std::cerr << e.what();
        return EXIT_FAILURE;
    }
}
infoarena informatica de performanta

Cod sursa(job #3288870)