Pagini recente » Cod sursa (job #916955) | Cod sursa (job #2570318) | Cod sursa (job #680291) | Cod sursa (job #2563283) | Cod sursa (job #2390015)
#include <fstream>
#include <iostream>
#include <cstring>
#include <cassert>
#include <vector>
std::vector<int> matches;
bool cmpLower(char a, char b) {
return a < b;
}
bool cmpGreater(char a, char b) {
return a > b;
}
static int computeMaxSuffix(const char* pattern, int length, int& period, bool compare(char, char)) {
int maxSuffix = -1;
int ptr = 0, candPeriod = 1;
period = 1;
while (ptr + candPeriod < length) {
char c1 = pattern[ptr + candPeriod];
char c2 = pattern[maxSuffix + candPeriod];
if (!compare(c1, c2)) {
if (c1 == c2) {
if (candPeriod == period) {
candPeriod = 1;
ptr += period;
} else {
candPeriod++;
}
} else {
period = candPeriod = 1;
maxSuffix = ptr;
ptr++;
}
} else {
ptr += candPeriod;
period = ptr - maxSuffix;
candPeriod = 1;
}
}
return maxSuffix;
}
//---------------------------------------------------------------------------
// Two-Way Algorithm.
// Returns first position where 'needle' is found in 'haystack'.
// 'maxSuffix' and 'period' have been already been preprocessed and the comparison
// memcmp(x, x + period, maxSuffix + 1) is stored in 'equal'.
static void twoWaySearchImpl(const char* haystack, int haystacklen, const char* needle, int needlelen, int maxSuffix, int period) {
// decode the preprocessing.
bool equal = period & 1;
period >>= 1;
int pos, lastPtr = -1;
int resetPtr = needlelen - period - 1;
int offset = 0;
haystacklen -= needlelen;
if (!equal) {
while (offset <= haystacklen) {
pos = maxSuffix + 1;
while (pos < needlelen && needle[pos] == haystack[pos + offset]) {
pos++;
}
if (pos < needlelen) {
offset += pos - maxSuffix;
} else {
// match.
pos = maxSuffix;
while (pos >= 0 && needle[pos] == haystack[pos + offset]) {
pos--;
}
if (pos < 0) {
//std::cerr << "found at 11 " << offset << "\n";
matches.push_back(offset);
}
offset += period;
}
}
} else {
while (offset <= haystacklen) {
pos = std::max(maxSuffix, lastPtr) + 1;
//std::cerr << "wir beginnen mit " << pos << "\n";
while (pos < needlelen && needle[pos] == haystack[pos + offset]) {
pos++;
}
//std::cerr << " status = " << pos << " < " << needlelen << "\n";
if (pos < needlelen) {
lastPtr = -1;
offset += pos - maxSuffix;
} else {
// match.
pos = maxSuffix;
while (pos > lastPtr && needle[pos] == haystack[pos + offset]) {
pos--;
}
if (pos <= lastPtr) {
//std::cerr << "found at 22 " << offset << "\n";
matches.push_back(offset);
}
//std::cerr << "offset vorher " << offset << "\n";
lastPtr = resetPtr;
offset += period;
}
}
}
}
#define MAX_LEN 2000000
char pattern[MAX_LEN + 2];
char str[MAX_LEN + 2];
int main(void) {
std::fstream in("strmatch.in");
in >> pattern >> str;
int M = strlen(pattern);
int N = strlen(str);
int period1, period2;
int suffix1 = computeMaxSuffix(pattern, M, period1, cmpLower);
int suffix2 = computeMaxSuffix(pattern, M, period2, cmpGreater);
//std::cerr << "raus";
// Store the results of preprocessing.
int maxSuffix = (suffix1 > suffix2) ? suffix1 : suffix2;
int period = (suffix1 > suffix2) ? period1 : period2;
//std::cerr << "vorher " << period << " " << maxSuffix << "\n";
// 'equal' stores the repetitive comparison in the initial Two-Way algorithm.
bool equal = !memcmp(pattern, pattern + period, maxSuffix + 1);
//std::cerr << "equal = " << equal << "\n";
if (!equal)
period = std::max(maxSuffix + 1, M - maxSuffix - 1) + 1;
period = (period << 1) | equal;
//std::cerr << "preprocessing " << (period >> 1) << " " << (maxSuffix) << "\n";
twoWaySearchImpl(str, N, pattern, M, maxSuffix, period);
std::ofstream out("strmatch.out");
out << matches.size() << "\n";
for (int i = 0; i < matches.size() && i < 1000; i++)
out << matches[i] << " ";
return 0;
}