What is the complexity of the algorithm is that is used to find the smallest snippet that contains all the search key words?
I think the solution proposed by AndreyT assumes no duplicates exists in the keywords/search terms. Also, the current block can get as big as the text itself if text contains lot of duplicate keywords. For example: Text: 'ABBBBBBBBBB' Keyword text: 'AB' Current Block: 'ABBBBBBBBBB'
Anyway, I have implemented in C#, did some basic testing, would be nice to get some feedback on whether it works or not :)
static string FindMinWindow(string text, string searchTerms)
{
Dictionary searchIndex = new Dictionary();
foreach (var item in searchTerms)
{
searchIndex.Add(item, false);
}
Queue> currentBlock = new Queue>();
int noOfMatches = 0;
int minLength = Int32.MaxValue;
int startIndex = 0;
for(int i = 0; i < text.Length; i++)
{
char item = text[i];
if (searchIndex.ContainsKey(item))
{
if (!searchIndex[item])
{
noOfMatches++;
}
searchIndex[item] = true;
var newEntry = new Tuple ( item, i );
currentBlock.Enqueue(newEntry);
// Normalization step.
while (currentBlock.Count(o => o.Item1.Equals(currentBlock.First().Item1)) > 1)
{
currentBlock.Dequeue();
}
// Figuring out minimum length.
if (noOfMatches == searchTerms.Length)
{
var length = currentBlock.Last().Item2 - currentBlock.First().Item2 + 1;
if (length < minLength)
{
startIndex = currentBlock.First().Item2;
minLength = length;
}
}
}
}
return noOfMatches == searchTerms.Length ? text.Substring(startIndex, minLength) : String.Empty;
}