Cod sursa(job #72517)

Utilizator scapryConstantin Berzan scapry Data 14 iulie 2007 09:13:03
Problema ADN Scor 100
Compilator cpp Status done
Runda Arhiva de probleme Marime 4.97 kb
#include <assert.h>
#include <stdio.h>
#include <string.h>

enum { maxstrings = 18, maxlen = 30002, maxconfig = 1 << maxstrings };

int strings;
char *string[maxstrings];
int len[maxstrings];

int kmp_tab[maxlen];
int kmp_pattern_len;
const char *kmp_pattern;

int cost[maxstrings][maxstrings]; //cost[i][j] = longest suffix of string[i] which is a prefix of string[j]
int grand[maxstrings][maxconfig]; //grand[i][ABCDEF] = max cost of chain starting at i and containing A,B,...
int prev[maxstrings][maxconfig]; //grand[i][j] came from grand[ prev[i][j] ][ j without i ]

char ans[maxlen * maxstrings];


void kmp_precalc(int str)
/*
 * builds kmp failure table for the given pattern.
 * O(len)
 */
{
	int i, matched;

	kmp_pattern = string[str];
	kmp_pattern_len = len[str];

	kmp_tab[0] = -1; //no prefix possible
	kmp_tab[1] = 0; //no prefix possible

	matched = 0;
	for(i = 2; i < len[str]; )
	{
		if(kmp_pattern[i - 1] == kmp_pattern[matched]) //the substring continues
		{
			kmp_tab[i] = matched + 1;
			matched++;
			i++;
		}
		else if(matched > 0)       //it doesn't continue, but we may fall back
		{
			matched = kmp_tab[matched];
		}
		else                       //we may not fall back
		{
			kmp_tab[i] = 0;
			i++;
		}
	}
}

int kmp_match(int str)
/*
 * if str is fully matched, returns the start position in the string where the match occurs.
 * otherwise, returns the first position in the string where a prefix of the string occurs.
 * O(len)
 */
{
	int i, matched;
	const char *s = string[str];

	matched = 0;
	for(i = 0; i < len[str]; )
	{
		if(i + matched >= len[str]) //found suffix of str which is prefix of pattern
			return i;

		if(s[i + matched] == kmp_pattern[matched]) //matched a char
		{
			matched++;
			if(matched == kmp_pattern_len) //found full match
				return i;
		}
		else //no match, go ahead using the failure table
		{
			i += matched - kmp_tab[matched];

			if(matched != 0) //don't make it -1
				matched = kmp_tab[matched];
		}
	}

	//found no prefix (i.e. length 0)
	return len[str]; //start position is post-end
}

void rm_dupes()
/*
 * removes words included in others.
 * O(N*N*len)
 */
{
	int i, j, pos, actual;
	bool bad[maxstrings];

	memset(bad, 0, sizeof(bad));

	for(i = 0; i < strings; i++)
	{
		kmp_precalc(i);

		for(j = 0; j < strings; j++)
		{
			if(j == i)
				continue;

			//match j in i
			pos = kmp_match(j);

			if(len[i] > len[j] - pos) //found a prefix, ignore it. MBO
				;
			else                      //found a full match! string j is useless.
				bad[i] = true;
		}
	}

	actual = 0;
	for(i = 0; i < strings; i++)
	{
		if(bad[i])
		{
			delete[] string[i];
			string[i] = NULL; //deleted
		}
		else
		{
			if(i != actual)
			{
				assert(string[actual] == NULL); //free
				string[actual] = string[i];
				string[i] = NULL; //moved
				len[actual] = len[i];
			}

			actual++;
		}
	}

	strings = actual;
}

void calc_cost()
/*
 * fills-up cost[][] array with overlapping data.
 * O(N*N*len)
 */
{
	int i, j, pos, overlap;

	for(i = 0; i < strings; i++)
	{
		kmp_precalc(i);

		for(j = 0; j < strings; j++)
		{
			if(j == i)
				continue;

			//match j in i
			pos = kmp_match(j);

			overlap = len[j] - pos;
			assert(overlap < len[i]); //found prefix not full match

			cost[j][i] = overlap;
		}
	}
}

void calc_grand()
/*
 * DP, fills-up grand[][].
 * O(N^2 * 2^N)
 */
{
	int config, cfg, i, j, tmp;

	for(config = 1; config < (1 << strings); config++)
	{
		for(i = 0; i < strings; i++)
		{
			if(config & (1 << i))
			{
				cfg = config & ~(1 << i);

				if(cfg == 0) //only one bit
				{
					grand[i][config] = 0;
					prev[i][config] = -1;
					break; //no other bits, early out
				}
				else //pick the maximum using every bit
				{
					grand[i][config] = -1;
					prev[i][config] = -1;

					for(j = 0; j < strings; j++)
						if(cfg & (1 << j))
						{
							tmp = cost[i][j] + grand[j][cfg]; //without i, right?

							if(tmp > grand[i][config])
							{
								grand[i][config] = tmp;
								prev[i][config] = j;
							}
						}
				}
			}
		}
	}
}

void build_ans()
{
	int i, best_len = -1, ans_pos = 0,
	    config = (1 << strings) - 1, start = -1, old_start;

	for(i = 0; i < strings; i++) //start pos
		if(grand[i][config] > best_len)
		{
			best_len = grand[i][config];
			start = i;
		}

	assert(start != -1);

	for(i = 0; i < strings; i++)
	{
		old_start = start;
		start = prev[start][config];
		assert(config & (1 << old_start));
		config &= ~(1 << old_start);

		memcpy(ans + ans_pos, string[old_start], len[old_start]);
		ans_pos += len[old_start] - cost[old_start][start];

		if(i == strings - 1)
			assert(start == -1); //end
	}
}

void go()
{
	rm_dupes();
	calc_cost();
	calc_grand();
	build_ans();
}

int main()
{
	int i;
	FILE *f = fopen("adn.in", "r");
	if(!f) return 1;

	fscanf(f, "%d", &strings);
	for(i = 0; i < strings; i++)
	{
		string[i] = new char[maxlen]; //using pointers for fast moving
		fscanf(f, " %s", string[i]);
		len[i] = strlen(string[i]);
	}
	
	fclose(f);
	f = fopen("adn.out", "w");
	if(!f) return 1;

	go();

	fprintf(f, "%s\n", ans);
	fclose(f);
	return 0;
}