For example, let the string be the first 10 digits of pi, 3141592653
, and the subsequence be 123
. Note that the sequence occurs twice:
A Javascript answer based on dynamic programming from geeksforgeeks.org and the answer from aioobe:
class SubseqCounter {
constructor(subseq, seq) {
this.seq = seq;
this.subseq = subseq;
this.tbl = Array(subseq.length + 1).fill().map(a => Array(seq.length + 1));
for (var i = 1; i <= subseq.length; i++)
this.tbl[i][0] = 0;
for (var j = 0; j <= seq.length; j++)
this.tbl[0][j] = 1;
}
countMatches() {
for (var row = 1; row < this.tbl.length; row++)
for (var col = 1; col < this.tbl[row].length; col++)
this.tbl[row][col] = this.countMatchesFor(row, col);
return this.tbl[this.subseq.length][this.seq.length];
}
countMatchesFor(subseqDigitsLeft, seqDigitsLeft) {
if (this.subseq.charAt(subseqDigitsLeft - 1) != this.seq.charAt(seqDigitsLeft - 1))
return this.tbl[subseqDigitsLeft][seqDigitsLeft - 1];
else
return this.tbl[subseqDigitsLeft][seqDigitsLeft - 1] + this.tbl[subseqDigitsLeft - 1][seqDigitsLeft - 1];
}
}
Great answer, aioobe! To complement your answer, some possible implementations in Python:
1) straightforward, naïve solution; too slow!
def num_subsequences(seq, sub):
if not sub:
return 1
elif not seq:
return 0
result = num_subsequences(seq[1:], sub)
if seq[0] == sub[0]:
result += num_subsequences(seq[1:], sub[1:])
return result
2) top-down solution using explicit memoization
def num_subsequences(seq, sub):
m, n, cache = len(seq), len(sub), {}
def count(i, j):
if j == n:
return 1
elif i == m:
return 0
k = (i, j)
if k not in cache:
cache[k] = count(i+1, j) + (count(i+1, j+1) if seq[i] == sub[j] else 0)
return cache[k]
return count(0, 0)
3) top-down solution using the lru_cache decorator (available from functools in python >= 3.2)
from functools import lru_cache
def num_subsequences(seq, sub):
m, n = len(seq), len(sub)
@lru_cache(maxsize=None)
def count(i, j):
if j == n:
return 1
elif i == m:
return 0
return count(i+1, j) + (count(i+1, j+1) if seq[i] == sub[j] else 0)
return count(0, 0)
4) bottom-up, dynamic programming solution using a lookup table
def num_subsequences(seq, sub):
m, n = len(seq)+1, len(sub)+1
table = [[0]*n for i in xrange(m)]
def count(iseq, isub):
if not isub:
return 1
elif not iseq:
return 0
return (table[iseq-1][isub] +
(table[iseq-1][isub-1] if seq[m-iseq-1] == sub[n-isub-1] else 0))
for row in xrange(m):
for col in xrange(n):
table[row][col] = count(row, col)
return table[m-1][n-1]
5) bottom-up, dynamic programming solution using a single array
def num_subsequences(seq, sub):
m, n = len(seq), len(sub)
table = [0] * n
for i in xrange(m):
previous = 1
for j in xrange(n):
current = table[j]
if seq[i] == sub[j]:
table[j] += previous
previous = current
return table[n-1] if n else 1
One way to do it would be with two lists. Call them Ones
and OneTwos
.
Go through the string, character by character.
1
, make an entry in the Ones
list.2
, go through the Ones
list and add an entry to the OneTwos
list.3
, go through the OneTwos
list and output a 123
.In the general case that algorithm will be very fast, since it's a single pass through the string and multiple passes through what will normally be much smaller lists. Pathological cases will kill it, though. Imagine a string like 111111222222333333
, but with each digit repeated hundreds of times.