Longest common subsequence of 3+ strings

前端 未结 5 774
深忆病人
深忆病人 2020-12-23 12:30

I am trying to find the longest common subsequence of 3 or more strings. The Wikipedia article has a great description of how to do this for 2 strings, but I\'m a little uns

5条回答
  •  南笙
    南笙 (楼主)
    2020-12-23 13:26

    This below code can find the longest common subsequence in N strings. This uses itertools to generate required index combinations and then use these indexes for finding common substring.

    Example Execution:
    Input:
    Enter the number of sequences: 3
    Enter sequence 1 : 83217
    Enter sequence 2 : 8213897
    Enter sequence 3 : 683147

    Output:
    837

    from itertools import product
    import numpy as np
    import pdb
    
    def neighbors(index):
        N = len(index)
        for relative_index in product((0, -1), repeat=N):
            if not all(i == 0 for i in relative_index):
                yield tuple(i + i_rel for i, i_rel in zip(index, relative_index))
    
    def longestCommonSubSequenceOfN(sqs):
        numberOfSequences = len(sqs);
        lengths = np.array([len(sequence) for sequence in sqs]);
        incrLengths = lengths + 1;
        lengths = tuple(lengths);
        inverseDistances = np.zeros(incrLengths);
        ranges = [tuple(range(1, length+1)) for length in lengths[::-1]];
        for tupleIndex in product(*ranges):
            tupleIndex = tupleIndex[::-1];
            neighborIndexes = list(neighbors(tupleIndex));
            operationsWithMisMatch = np.array([]);
            for neighborIndex in neighborIndexes:
                operationsWithMisMatch = np.append(operationsWithMisMatch, inverseDistances[neighborIndex]);
            operationsWithMatch = np.copy(operationsWithMisMatch);
            operationsWithMatch[-1] = operationsWithMatch[-1] + 1;
            chars = [sqs[i][neighborIndexes[-1][i]] for i in range(numberOfSequences)];
            if(all(elem == chars[0] for elem in chars)):
                inverseDistances[tupleIndex] = max(operationsWithMatch);
            else:
                inverseDistances[tupleIndex] = max(operationsWithMisMatch);
            # pdb.set_trace();
    
        subString = "";
        mainTupleIndex = lengths;
        while(all(ind > 0 for ind in mainTupleIndex)):
            neighborsIndexes = list(neighbors(mainTupleIndex));
            anyOperation = False;
            for tupleIndex in neighborsIndexes:
                current = inverseDistances[mainTupleIndex];
                if(current == inverseDistances[tupleIndex]):
                    mainTupleIndex = tupleIndex;
                    anyOperation = True;
                    break;
            if(not anyOperation):
                subString += str(sqs[0][mainTupleIndex[0] - 1]);
                mainTupleIndex = neighborsIndexes[-1];
        return subString[::-1];
    
    numberOfSequences = int(input("Enter the number of sequences: "));
    sequences = [input("Enter sequence {} : ".format(i)) for i in range(1, numberOfSequences + 1)];
    print(longestCommonSubSequenceOfN(sequences));
    

提交回复
热议问题