I searched online for a C++ Longest Common Substring implementation but failed to find a decent one. I need a LCS algorithm that returns the substring itself, so it\'s not j
Here is a C# version to find the Longest Common Substring using dynamic programming of two arrays (you may refer to: http://codingworkout.blogspot.com/2014/07/longest-common-substring.html for more details)
class LCSubstring
{
public int Length = 0;
public List> indices = new List>();
}
public string[] LongestCommonSubStrings(string A, string B)
{
int[][] DP_LCSuffix_Cache = new int[A.Length+1][];
for (int i = 0; i <= A.Length; i++)
{
DP_LCSuffix_Cache[i] = new int[B.Length + 1];
}
LCSubstring lcsSubstring = new LCSubstring();
for (int i = 1; i <= A.Length; i++)
{
for (int j = 1; j <= B.Length; j++)
{
//LCSuffix(Xi, Yj) = 0 if X[i] != X[j]
// = LCSuffix(Xi-1, Yj-1) + 1 if Xi = Yj
if (A[i - 1] == B[j - 1])
{
int lcSuffix = 1 + DP_LCSuffix_Cache[i - 1][j - 1];
DP_LCSuffix_Cache[i][j] = lcSuffix;
if (lcSuffix > lcsSubstring.Length)
{
lcsSubstring.Length = lcSuffix;
lcsSubstring.indices.Clear();
var t = new Tuple(i, j);
lcsSubstring.indices.Add(t);
}
else if(lcSuffix == lcsSubstring.Length)
{
//may be more than one longest common substring
lcsSubstring.indices.Add(new Tuple(i, j));
}
}
else
{
DP_LCSuffix_Cache[i][j] = 0;
}
}
}
if(lcsSubstring.Length > 0)
{
List substrings = new List();
foreach(Tuple indices in lcsSubstring.indices)
{
string s = string.Empty;
int i = indices.Item1 - lcsSubstring.Length;
int j = indices.Item2 - lcsSubstring.Length;
Assert.IsTrue(DP_LCSuffix_Cache[i][j] == 0);
for(int l =0; l
Where unit tests are:
[TestMethod]
public void LCSubstringTests()
{
string A = "ABABC", B = "BABCA";
string[] substrings = this.LongestCommonSubStrings(A, B);
Assert.IsTrue(substrings.Length == 1);
Assert.IsTrue(substrings[0] == "BABC");
A = "ABCXYZ"; B = "XYZABC";
substrings = this.LongestCommonSubStrings(A, B);
Assert.IsTrue(substrings.Length == 2);
Assert.IsTrue(substrings.Any(s => s == "ABC"));
Assert.IsTrue(substrings.Any(s => s == "XYZ"));
A = "ABC"; B = "UVWXYZ";
string substring = "";
for(int i =1;i<=10;i++)
{
A += i;
B += i;
substring += i;
substrings = this.LongestCommonSubStrings(A, B);
Assert.IsTrue(substrings.Length == 1);
Assert.IsTrue(substrings[0] == substring);
}
}