Ukkonen's suffix tree algorithm in plain English

后端 未结 7 1675
梦谈多话
梦谈多话 2020-11-22 05:00

I feel a bit thick at this point. I\'ve spent days trying to fully wrap my head around suffix tree construction, but because I don\'t have a mathematical background, many of

7条回答
  •  逝去的感伤
    2020-11-22 05:28

    Hi i have tried to implement the above explained implementation in ruby , please check it out. it seems to work fine.

    the only difference in the implementation is that , i have tried to use the edge object instead of just using symbols.

    its also present at https://gist.github.com/suchitpuri/9304856

        require 'pry'
    
    
    class Edge
        attr_accessor :data , :edges , :suffix_link
        def initialize data
            @data = data
            @edges = []
            @suffix_link = nil
        end
    
        def find_edge element
            self.edges.each do |edge|
                return edge if edge.data.start_with? element
            end
            return nil
        end
    end
    
    class SuffixTrees
        attr_accessor :root , :active_point , :remainder , :pending_prefixes , :last_split_edge , :remainder
    
        def initialize
            @root = Edge.new nil
            @active_point = { active_node: @root , active_edge: nil , active_length: 0}
            @remainder = 0
            @pending_prefixes = []
            @last_split_edge = nil
            @remainder = 1
        end
    
        def build string
            string.split("").each_with_index do |element , index|
    
    
                add_to_edges @root , element        
    
                update_pending_prefix element                           
                add_pending_elements_to_tree element
                active_length = @active_point[:active_length]
    
                # if(@active_point[:active_edge] && @active_point[:active_edge].data && @active_point[:active_edge].data[0..active_length-1] ==  @active_point[:active_edge].data[active_length..@active_point[:active_edge].data.length-1])
                #   @active_point[:active_edge].data = @active_point[:active_edge].data[0..active_length-1]
                #   @active_point[:active_edge].edges << Edge.new(@active_point[:active_edge].data)
                # end
    
                if(@active_point[:active_edge] && @active_point[:active_edge].data && @active_point[:active_edge].data.length == @active_point[:active_length]  )
                    @active_point[:active_node] =  @active_point[:active_edge]
                    @active_point[:active_edge] = @active_point[:active_node].find_edge(element[0])
                    @active_point[:active_length] = 0
                end
            end
        end
    
        def add_pending_elements_to_tree element
    
            to_be_deleted = []
            update_active_length = false
            # binding.pry
            if( @active_point[:active_node].find_edge(element[0]) != nil)
                @active_point[:active_length] = @active_point[:active_length] + 1               
                @active_point[:active_edge] = @active_point[:active_node].find_edge(element[0]) if @active_point[:active_edge] == nil
                @remainder = @remainder + 1
                return
            end
    
    
    
            @pending_prefixes.each_with_index do |pending_prefix , index|
    
                # binding.pry           
    
                if @active_point[:active_edge] == nil and @active_point[:active_node].find_edge(element[0]) == nil
    
                    @active_point[:active_node].edges << Edge.new(element)
    
                else
    
                    @active_point[:active_edge] = node.find_edge(element[0]) if @active_point[:active_edge]  == nil
    
                    data = @active_point[:active_edge].data
                    data = data.split("")               
    
                    location = @active_point[:active_length]
    
    
                    # binding.pry
                    if(data[0..location].join == pending_prefix or @active_point[:active_node].find_edge(element) != nil )                  
    
    
                    else #tree split    
                        split_edge data , index , element
                    end
    
                end
            end 
        end
    
    
    
        def update_pending_prefix element
            if @active_point[:active_edge] == nil
                @pending_prefixes = [element]
                return
    
            end
    
            @pending_prefixes = []
    
            length = @active_point[:active_edge].data.length
            data = @active_point[:active_edge].data
            @remainder.times do |ctr|
                    @pending_prefixes << data[-(ctr+1)..data.length-1]
            end
    
            @pending_prefixes.reverse!
    
        end
    
        def split_edge data , index , element
            location = @active_point[:active_length]
            old_edges = []
            internal_node = (@active_point[:active_edge].edges != nil)
    
            if (internal_node)
                old_edges = @active_point[:active_edge].edges 
                @active_point[:active_edge].edges = []
            end
    
            @active_point[:active_edge].data = data[0..location-1].join                 
            @active_point[:active_edge].edges << Edge.new(data[location..data.size].join)
    
    
            if internal_node
                @active_point[:active_edge].edges << Edge.new(element)
            else
                @active_point[:active_edge].edges << Edge.new(data.last)        
            end
    
            if internal_node
                @active_point[:active_edge].edges[0].edges = old_edges
            end
    
    
            #setup the suffix link
            if @last_split_edge != nil and @last_split_edge.data.end_with?@active_point[:active_edge].data 
    
                @last_split_edge.suffix_link = @active_point[:active_edge] 
            end
    
            @last_split_edge = @active_point[:active_edge]
    
            update_active_point index
    
        end
    
    
        def update_active_point index
            if(@active_point[:active_node] == @root)
                @active_point[:active_length] = @active_point[:active_length] - 1
                @remainder = @remainder - 1
                @active_point[:active_edge] = @active_point[:active_node].find_edge(@pending_prefixes.first[index+1])
            else
                if @active_point[:active_node].suffix_link != nil
                    @active_point[:active_node] = @active_point[:active_node].suffix_link               
                else
                    @active_point[:active_node] = @root
                end 
                @active_point[:active_edge] = @active_point[:active_node].find_edge(@active_point[:active_edge].data[0])
                @remainder = @remainder - 1     
            end
        end
    
        def add_to_edges root , element     
            return if root == nil
            root.data = root.data + element if(root.data and root.edges.size == 0)
            root.edges.each do |edge|
                add_to_edges edge , element
            end
        end
    end
    
    suffix_tree = SuffixTrees.new
    suffix_tree.build("abcabxabcd")
    binding.pry
    

提交回复
热议问题