Pagini recente » Cod sursa (job #1267733) | Cod sursa (job #2139414) | Cod sursa (job #1944725) | Cod sursa (job #182134) | Cod sursa (job #3287628)
Cod sursa(job #3287628)

Utilizator	Cioc Alex-Andrei nurof3n	Data	18 martie 2025 19:03:41
Problema	Potrivirea sirurilor	Scor	40
Compilator	cpp-64	Status	done
Runda	Arhiva educationala	Marime	6.25 kb
Raporteaza aceasta sursa
// https://www.infoarena.ro/problema/strmatch
#include <array>
#include <algorithm>
#include <functional>
#include <iostream>
#include <fstream>
#include <regex>
#include <vector>

#define ALG 7

#if ALG == 1 || ALG == 2
/**
 * @brief KMP algorithm, finds max 1000 occurences of a in b.
 * next[i] represents the longest prefix of a that is a suffix of a_i.
 * This implementation assumes a and b are indexed from 0.
 *
 * One improvement, for real-time restraints, is to create the next table
 * for each character in the alphabet, so that the lookup time for a mismatch
 * is constant.
 *
 * Complexity: O(A+B) time, O(A) space for ALG 1 and O(A+S) space for ALG 2,
 * where S is the size of the alphabet
 */
auto strmatch(const std::string& a, const std::string& b)
        -> std::pair<int, std::array<int, 1000>>
{
#if ALG == 2
    int marked[128];  // represents the LAST position of the character in a
    std::fill_n(marked, 128, -1);
    marked[a[0]] = 0;
#endif

    // precompute next array
    std::vector<int> next(a.size());
    next[0] = -1;
    for (int i = 1, k = -1; i < a.size(); ++i) {
#if ALG == 2
        marked[a[i]] = i;
#endif

        // if the prefix cannot be extended to match the current suffix,
        // go back to next[k], ensuring that the prexix of a_k is still a
        // suffix of a_{i-1} (because a_k was already a suffix of a_{i-1})
        while (k >= 0 && a[k + 1] != a[i])
            k = next[k];

        // extend the prefix
        if (a[k + 1] == a[i])
            ++k;

        next[i] = k;
    }

    // KMP pattern matching - same algorithm as for precomputing the next array
    std::array<int, 1000> positions;
    int                   len = 0;
    for (int i = 0, q = -1; i < b.size(); ++i) {
#if ALG == 2
        // S. Boyer and J. Moore 1974 improvement for large alphabets: we check
        // first the last element of the pattern and if it does not match, we
        // just skip by the length of the pattern; otherwise, we skip by the
        // minimum amount that is consistent with the match.
        if (q == -1) {
            int pos = i + a.size() - 1;
            if (pos < b.size()) {
                if (marked[b[pos]] == -1) {
                    i = pos;
                    continue;
                }

                i = pos - marked[b[pos]];
            }
            else
                break;
        }

        // for smaller alphabets that can be stored in a small array of markers,
        // we can do this check for each character in b, at each step of the
        // loop
        if (marked[b[i]] == -1) {
            q = -1;
            continue;
        }
#endif

        // if the prefix cannot be extended to match the current character
        // in b, go back to next[q], ensuring that the prefix of a_q is
        // still a suffix to b_{i-1} (because the next[q] step guarantees
        // that a_next[q] will be a suffix of a_q, and thus b_i by
        // transitivity)
        while (q >= 0 && a[q + 1] != b[i])
            q = next[q];

        // extend the match
        if (a[q + 1] == b[i])
            ++q;

        // found a full match
        if (q + 1 == a.size()) {
            if (len < 1000)
                positions[len] = i - a.size() + 1;
            ++len;
        }
    }

    return { len, positions };
}
#endif

#if ALG == 3 || ALG == 4 || ALG == 5
/**
 * @brief Uses standard library searchers:
 * - default searcher: implementation defined complexity
 * - Boyer Moore searcher
 * - Boyer Moore Horspool searcher
 *
 * See: https://www.cppstories.com/2018/08/searchers/
 *
 * Unfortunately, std::search finds only one occurence, thus for multiple,
 * overlapping searches you are limited to incrementing +1 instead of skipping
 * whole sequences of b. Bottom line is these standard library searchers are
 * only suitable for single searches or non-overlapping occurences.
 *
 * Complexity: see above
 */
auto strmatch(const std::string& a, const std::string& b)
        -> std::pair<int, std::array<int, 1000>>
{
#if ALG == 3
    const std::default_searcher searcher(a.begin(), a.end());
#elif ALG == 4
    const std::boyer_moore_searcher searcher(a.begin(), a.end());
#elif ALG == 5
    const std::boyer_moore_horspool_searcher searcher(a.begin(), a.end());
#endif

    std::array<int, 1000> positions;
    int                   len = 0;

    auto it = std::search(b.begin(), b.end(), searcher);
    while (it != b.end()) {
        if (len < 1000)
            positions[len] = it - b.begin();
        ++len;
        it = std::search(it + 1, b.end(), searcher);
    }

    return { len, positions };
}
#endif

#if ALG == 6
/**
 * @brief Uses std::string::find, which has implementation defined complexity.
 *
 * See:
 * https://stackoverflow.com/questions/43657530/using-stdsearch-over-stringfind
 *
 * Complexity: unknown
 */
auto strmatch(const std::string& a, const std::string& b)
        -> std::pair<int, std::array<int, 1000>>
{
    std::array<int, 1000> positions;
    int                   len = 0;

    auto pos = b.find(a);
    while (pos != std::string::npos) {
        if (len < 1000)
            positions[len] = pos;
        ++len;
        pos = b.find(a, pos + 1);
    }

    return { len, positions };
}
#endif

#if ALG == 7
/**
 * @brief Uses std::regex_iterator with an overlapping pattern.
 *
 * Complexity: unknown
 */
auto strmatch(const std::string& a, const std::string& b)
        -> std::pair<int, std::array<int, 1000>>
{
    std::array<int, 1000> positions;
    int                   len = 0;

    std::regex pattern("(?=(" + a + ")).");
    auto       begin = std::sregex_iterator(b.begin(), b.end(), pattern);
    auto       end   = std::sregex_iterator();

    for (auto it = begin; it != end; ++it) {
        if (len < 1000)
            positions[len] = it->position();
        ++len;
    }

    return { len, positions };
}
#endif

int main()
{
    try {
        std::ios_base::sync_with_stdio(false);
        std::cin.tie(nullptr);

        std::ifstream in("strmatch.in");
        std::ofstream out("strmatch.out");

        std::string a, b;
        in >> a >> b;

        auto [len, positions] = strmatch(a, b);
        out << len << '\n';
        for (int i = 0; i < std::min(1000, len); ++i)
            out << positions[i] << ' ';
    }
    catch (const std::exception& e) {
        std::cerr << e.what();
        return EXIT_FAILURE;
    }
}
infoarena informatica de performanta

Cod sursa(job #3287628)