Cod sursa(job #2390069)

Utilizator stoianmihailStoian Mihail stoianmihail Data 27 martie 2019 18:58:20
Problema Potrivirea sirurilor Scor 100
Compilator cpp-64 Status done
Runda Arhiva educationala Marime 5.84 kb
#include <fstream>
#include <iostream>
#include <cstring>
#include <cassert>
#include <vector>

std::vector<int> matches;

#define MAX_LEN 2000000
char pattern[MAX_LEN + 2];
char str[MAX_LEN + 2];

int size;
int z[MAX_LEN + 1];

int MIN(int X, int Y) {
  return X < Y ? X : Y;
}

int MAX(int X, int Y) {
  return X > Y ? X : Y;
}

void Z(char *s, int len) {
  int i, l = 0, r = 0;

  for (i = 1; i < len; i++) {
    z[i] = (MIN(z[i - l], r - i + 1) & -(i <= r));
    while (s[z[i]] == s[z[i] + i]) {
      z[i]++;
    }
    if (i + z[i] - 1 > r) {
      r = i + z[i] - 1;
      l = i;
    }
  }
}

int choose(int lim) {
  int i = 0;
  while (i < size && z[i] < lim) {
    i++;
  }
  if (i < size)
    return z[i];
  return z[size - 1];
  //std::cerr << val << " with lim = " << lim << "\n";
}

bool cmpLower(char a, char b) {
  return a < b;
}

bool cmpGreater(char a, char b) {
  return a > b;
}

static int computeMaxSuffix(const char* pattern, int length, int& period, bool compare(char, char)) {
   int maxSuffix = -1;
   int ptr = 0, candPeriod = 1;
   period = 1;

   while (ptr + candPeriod < length) {
      char c1 = pattern[ptr + candPeriod];
      char c2 = pattern[maxSuffix + candPeriod];
      if (!compare(c1, c2)) {
         if (c1 == c2) {
            if (candPeriod == period) {
               candPeriod = 1;
               ptr += period;
            } else {
               candPeriod++;
            }
         } else {
            period = candPeriod = 1;
            maxSuffix = ptr;
            ptr++;
         }
      } else {
         ptr += candPeriod;
         period = ptr - maxSuffix;
         candPeriod = 1;
      }
   }
   return maxSuffix;
}
//---------------------------------------------------------------------------
// Two-Way Algorithm.
// Returns first position where 'needle' is found in 'haystack'.
// 'maxSuffix' and 'period' have been already been preprocessed and the comparison
// memcmp(x, x + period, maxSuffix + 1) is stored in 'equal'.
static void twoWaySearchImpl(const char* haystack, int haystacklen, const char* needle, int needlelen, int maxSuffix, int period) {
   // decode the preprocessing.
   bool equal = period & 1;
   period >>= 1;
   int pos, lastPtr = -1;
   int resetPtr = needlelen - period - 1;
   int offset = 0;
   int lastMatch = 0;

   haystacklen -= needlelen;

   if (!equal) {
      while (offset <= haystacklen) {
         pos = maxSuffix + 1;
         while (pos < needlelen && needle[pos] == haystack[pos + offset]) {
            pos++;
         }
         if (pos < needlelen) {
            offset += pos - maxSuffix;
            if (offset < lastMatch) {
              offset = lastMatch - needlelen + choose(offset - lastMatch + needlelen);
            }
         } else {
            // match.
            int lim = MAX(offset, lastMatch) - offset;
            pos = maxSuffix;
            while (maxSuffix >= lim && needle[pos] == haystack[pos + offset]) {
              pos--;
            }
            if (pos < lim) {
              //std::cerr << "found at 11 " << offset << "\n";
              matches.push_back(offset);
            }
            // Vorsicht hier! Es muss zuerst offset benutzt werden und dann erst geaendert!
            lastMatch = offset + needlelen;
            offset += period;
         }
      }
   } else {
      while (offset <= haystacklen) {
         pos = std::max(maxSuffix, lastPtr) + 1;
         //std::cerr << "wir beginnen mit " << pos << "\n";
         while (pos < needlelen && needle[pos] == haystack[pos + offset]) {
            pos++;
         }
         //std::cerr << " status = " << pos << " < " << needlelen << "\n";
         if (pos < needlelen) {
            lastPtr = -1;
            offset += pos - maxSuffix;
            if (offset < lastMatch) {
              offset = lastMatch - needlelen + choose(offset - lastMatch + needlelen);
            }
         } else {
            // match.
            int lim = MAX(MAX(offset, lastMatch) - offset, lastPtr + 1);
            pos = maxSuffix;
            while (pos >= lim && needle[pos] == haystack[pos + offset]) {
               pos--;
            }
            if (pos < lim) {
              //std::cerr << "found at 22 " << offset << "\n";
              matches.push_back(offset);
            }
            //std::cerr << "offset vorher " << offset << "\n";
            lastMatch = offset + needlelen;
            lastPtr = resetPtr;
            offset += period;
         }
      }
   }
}

int main(void) {
  std::fstream in("strmatch.in");

  in >> pattern >> str;
  int M = strlen(pattern);
  int N = strlen(str);

  int period1, period2;
  int suffix1 = computeMaxSuffix(pattern, M, period1, cmpLower);
  int suffix2 = computeMaxSuffix(pattern, M, period2, cmpGreater);

  //std::cerr << "raus";

  // Store the results of preprocessing.
  int maxSuffix = (suffix1 > suffix2) ? suffix1 : suffix2;
  int period = (suffix1 > suffix2) ? period1 : period2;

  //std::cerr << "vorher " << period << " " << maxSuffix << "\n";

  // 'equal' stores the repetitive comparison in the initial Two-Way algorithm.
  bool equal = !memcmp(pattern, pattern + period, maxSuffix + 1);

  //std::cerr << "equal = " << equal << "\n";

  if (!equal)
    period = std::max(maxSuffix + 1, M - maxSuffix - 1) + 1;
  period = (period << 1) | equal;

  //std::cerr << "preprocessing " << (period >> 1) << " " << (maxSuffix) << "\n";

  Z(pattern, M);
  z[size++] = 1;
  for (int i = 1; i < M; i++) {
    if (z[i] + i == M) {
      z[size++] = i;
    }
  }
  z[size++] = M;

  twoWaySearchImpl(str, N, pattern, M, maxSuffix, period);

  std::ofstream out("strmatch.out");
  out << matches.size() << "\n";
  for (int i = 0; i < matches.size() && i < 1000; i++)
    out << matches[i] << " ";
  return 0;
}