The following is a demo question from a coding interview site called codility:
A prefix of a string S is any leading contiguous part of S. For example
Here's a O(n log n) version based on suffix arrays. There are O(n) construction algorithms for suffix arrays, I just don't have the patience to code them.
Example output (this output isn't O(n), but it's only to show that we can indeed compute all the scores):
4*1 a
3*3 aba
2*5 ababa
1*7 abababa
3*2 ab
2*4 abab
1*6 ababab
Basically you have to reverse the string, and compute the suffix array (SA) and the longest common prefix (LCP).
Then you have traverse the SA array backwards looking for LCPs that match the entire suffix (prefix in the original string). If there's a match, increment the counter, otherwise reset it to 1. Each suffix (prefix) receive a "score" (SCR) that corresponds to the number of times it appears in the original string.
#include <iostream>
#include <cstring>
#include <string>
#define MAX 10050
using namespace std;
int RA[MAX], tempRA[MAX];
int SA[MAX], tempSA[MAX];
int C[MAX];
int Phi[MAX], PLCP[MAX], LCP[MAX];
int SCR[MAX];
void suffix_sort(int n, int k) {
memset(C, 0, sizeof C);
for (int i = 0; i < n; i++)
C[i + k < n ? RA[i + k] : 0]++;
int sum = 0;
for (int i = 0; i < max(256, n); i++) {
int t = C[i];
C[i] = sum;
sum += t;
}
for (int i = 0; i < n; i++)
tempSA[C[SA[i] + k < n ? RA[SA[i] + k] : 0]++] = SA[i];
memcpy(SA, tempSA, n*sizeof(int));
}
void suffix_array(string &s) {
int n = s.size();
for (int i = 0; i < n; i++)
RA[i] = s[i] - 1;
for (int i = 0; i < n; i++)
SA[i] = i;
for (int k = 1; k < n; k *= 2) {
suffix_sort(n, k);
suffix_sort(n, 0);
int r = tempRA[SA[0]] = 0;
for (int i = 1; i < n; i++) {
int s1 = SA[i], s2 = SA[i-1];
bool equal = true;
equal &= RA[s1] == RA[s2];
equal &= RA[s1+k] == RA[s2+k];
tempRA[SA[i]] = equal ? r : ++r;
}
memcpy(RA, tempRA, n*sizeof(int));
}
}
void lcp(string &s) {
int n = s.size();
Phi[SA[0]] = -1;
for (int i = 1; i < n; i++)
Phi[SA[i]] = SA[i-1];
int L = 0;
for (int i = 0; i < n; i++) {
if (Phi[i] == -1) {
PLCP[i] = 0;
continue;
}
while (s[i + L] == s[Phi[i] + L])
L++;
PLCP[i] = L;
L = max(L-1, 0);
}
for (int i = 1; i < n; i++)
LCP[i] = PLCP[SA[i]];
}
void score(string &s) {
SCR[s.size()-1] = 1;
int sum = 1;
for (int i=s.size()-2; i>=0; i--) {
if (LCP[i+1] < s.size()-SA[i]-1) {
sum = 1;
} else {
sum++;
}
SCR[i] = sum;
}
}
int main() {
string s = "abababa";
s = string(s.rbegin(), s.rend()) +".";
suffix_array(s);
lcp(s);
score(s);
for(int i=0; i<s.size(); i++) {
string ns = s.substr(SA[i], s.size()-SA[i]-1);
ns = string(ns.rbegin(), ns.rend());
cout << SCR[i] << "*" << ns.size() << " " << ns << endl;
}
}
Most of this code (specially the suffix array and LCP implementations) I have been using for some years in contests. This version in special I adapted from this one I wrote some years ago.
I was working on this challenge for more than 4 days , reading a lot of documentation, I found a solution with O(N) .
I got 81%, the idea is simple using a window slide.
def solution(s: String): Int = {
var max = s.length // length of the string
var i, j = 1 // start with i=j=1 ( is the beginning of the slide and j the end of the slide )
val len = s.length // the length of the string
val count = Array.ofDim[Int](len) // to store intermediate results
while (i < len - 1 || j < len) {
if (i < len && s(0) != s(i)) {
while (i < len && s(0) != s(i)) { // if the begin of the slide is different from
// the first letter of the string skip it
i = i + 1
}
}
j = i + 1
var k = 1
while (j < len && s(j).equals(s(k))) { // check for equality and update the array count
if (count(k) == 0) {
count(k) = 1
}
count(k) = count(k) + 1
max = math.max((k + 1) * count(k), max)
k = k + 1
j = j + 1
}
i = i + 1
}
max // return the max
}
public class Main {
public static void main(String[] args) {
String input = "abababa";
String prefix;
int product;
int maxProduct = 0;
for (int i = 1; i <= input.length(); i++) {
prefix = input.substring(0, i);
String substr;
int occurs = 0;
for (int j = prefix.length(); j <= input.length(); j++) {
substr = input.substring(0, j);
if (substr.endsWith(prefix))
occurs++;
}
product = occurs*prefix.length();
System.out.println("product of " + prefix + " = " +
prefix.length() + " * " + occurs +" = " + product);
maxProduct = (product > maxProduct)?product:maxProduct;
}
System.out.println("maxProduct = " + maxProduct);
}
}