Efficient data structure for storing a long sequence of (mostly consecutive) integers

前端 未结 5 446
礼貌的吻别
礼貌的吻别 2021-01-04 07:00

I\'d like a data structure to efficiently store a long sequence of numbers. The numbers should always be whole integers, let\'s say Longs.

The feature of the inputs

5条回答
  •  滥情空心
    2021-01-04 07:38

    I'm surprised no one has suggested segment trees over the integer domain of stored values. (When used in geometric applications like graphics in 2d and 3d, they're called quadtrees and octrees resp.) Insert, delete, and lookup will have time and space complexity proportional to the number of bits in (maxval - minval), that is log_2 (maxval - minval), the max and min values of the integer data domain.

    In a nutshell, we are encoding a set of integers in [minval, maxval]. A node at topmost level 0 represents that entire range. Each successive level's nodes represent sub-ranges of approximate size (maxval - minval) / 2^k. When a node is included, some subset of it's corresponding values are part of the represented set. When it's a leaf, all of its values are in the set. When it's absent, none are.

    E.g. if minval=0 and maxval=7, then the k=1 children of the k=0 node represent [0..3] and [4..7]. Their children at level k=2 are [0..1][2..3][4..5], and [6..7], and the k=3 nodes represent individual elements. The set {[1..3], [6..7]} would be the tree (levels left to right):

    [0..7] -- [0..3] -- [0..1]
           |         |        `-[1] 
           |         `- [2..3]
            ` [4..7]
                     `- [6..7]
    

    It's not hard to see that space for the tree will be O(m log (maxval - minval)) where m is the number of intervals stored in the tree.

    It's not common to use segment trees with dynamic insert and delete, but the algorithms turn out to be fairly simple. It takes some care to ensure the number of nodes is minimized.

    Here is some very lightly tested java code.

    import java.util.ArrayList;
    import java.util.Iterator;
    import java.util.List;
    
    public class SegmentTree {
      // Shouldn't differ by more than Long.MAX_VALUE to prevent overflow.
      static final long MIN_VAL = 0;
      static final long MAX_VAL = Long.MAX_VALUE;
      Node root;
    
      static class Node {
        Node left;
        Node right;
        Node(Node left, Node right) {
          this.left = left;
          this.right = right;
        }
      }
    
      private static boolean isLeaf(Node node) {
        return node != null && node.left == null && node.right == null;
      }
    
      private static Node reset(Node node, Node left, Node right) {
        if (node == null) {
          return new Node(left, right);
        }
        node.left = left;
        node.right = right;
        return node;
      }
    
      /**
       * Accept an arbitrary subtree rooted at a node representing a subset S of the range [lo,hi] and
       * transform it into a subtree representing S + [a,b]. It's assumed a >= lo and b <= hi.
       */
      private static Node add(Node node, long lo, long hi, long a, long b) {
        // If the range is empty, the interval tree is always null.
        if (lo > hi) return null;
        // If this is a leaf or insertion is outside the range, there's no change.
        if (isLeaf(node) || a > b || b < lo || a > hi) return node;
        // If insertion fills the range, return a leaf.
        if (a == lo && b == hi) return reset(node, null, null);
        // Insertion doesn't cover the range. Get the children, if any.
        Node left = null, right = null;
        if (node != null) {
          left = node.left;
          right = node.right;
        }
        // Split the range and recur to insert in halves.
        long mid = lo + (hi - lo) / 2;
        left = add(left, lo, mid, a, Math.min(b, mid));
        right = add(right, mid + 1, hi, Math.max(a, mid + 1), b);
        // Build a new node, coallescing to leaf if both children are leaves.
        return isLeaf(left) && isLeaf(right) ? reset(node, null, null) : reset(node, left, right);
      }
    
      /**
       * Accept an arbitrary subtree rooted at a node representing a subset S of the range [lo,hi] and
       * transform it into a subtree representing range(s) S - [a,b].  It's assumed a >= lo and b <= hi.
       */
      private static Node del(Node node, long lo, long hi, long a, long b) {
        // If the tree is null, we can't remove anything, so it's still null
        // or if the range is empty, the tree is null.
        if (node == null || lo > hi) return null;
        // If the deletion is outside the range, there's no change.
        if (a > b || b < lo || a > hi) return node; 
        // If deletion fills the range, return an empty tree.
        if (a == lo && b == hi) return null;
        // Deletion doesn't fill the range. 
        // Replace a leaf with a tree that has the deleted portion removed. 
        if (isLeaf(node)) {
          return add(add(null, lo, hi, b + 1, hi), lo, hi, lo, a - 1);
        }
        // Not a leaf. Get children, if any.
        Node left = node.left, right = node.right;
        long mid = lo + (hi - lo) / 2;
        // Recur to delete in child ranges.
        left = del(left, lo, mid, a, Math.min(b, mid));
        right = del(right, mid + 1, hi, Math.max(a, mid + 1), b);
        // Build a new node, coallescing to empty tree if both children are empty.
        return left == null && right == null ? null : reset(node, left, right);
      }
    
      private static class NotContainedException extends Exception {};
    
      private static void verifyContains(Node node, long lo, long hi, long a, long b)
          throws NotContainedException {
        // If this is a leaf or query is empty, it's always contained.
        if (isLeaf(node) || a > b) return;
        // If tree or search range is empty, the query is never contained.
        if (node == null || lo > hi) throw new NotContainedException();
        long mid = lo + (hi - lo) / 2;
        verifyContains(node.left, lo, mid, a, Math.min(b, mid));
        verifyContains(node.right, mid + 1, hi, Math.max(a, mid + 1), b);
      }
    
      SegmentTree addRange(long a, long b) {
        root = add(root, MIN_VAL, MAX_VAL, Math.max(a, MIN_VAL), Math.min(b, MAX_VAL));
        return this;
      }
    
      SegmentTree addVal(long a) {
        return addRange(a, a);
      }
    
      SegmentTree delRange(long a, long b) {
        root = del(root, MIN_VAL, MAX_VAL, Math.max(a, MIN_VAL), Math.min(b, MAX_VAL));
        return this;
      }
    
      SegmentTree delVal(long a) {
        return delRange(a, a);
      }
    
      boolean containsVal(long a) {
        return containsRange(a, a);
      }
    
      boolean containsRange(long a, long b) {
        try {
          verifyContains(root, MIN_VAL, MAX_VAL, Math.max(a, MIN_VAL), Math.min(b, MAX_VAL));
          return true;
        } catch (NotContainedException expected) {
          return false;
        }
      }
    
      private static final boolean PRINT_SEGS_COALESCED = true;
    
      /** Gather a list of possibly coalesced segments for printing. */
      private static void gatherSegs(List segs, Node node, long lo, long hi) {
        if (node == null) {
          return;
        }
        if (node.left == null && node.right == null) {
          if (PRINT_SEGS_COALESCED && !segs.isEmpty() && segs.get(segs.size() - 1) == lo - 1) {
            segs.remove(segs.size() - 1);
          } else {
            segs.add(lo);
          }
          segs.add(hi);
        } else {
          long mid = lo + (hi - lo) / 2;
          gatherSegs(segs, node.left, lo, mid);
          gatherSegs(segs, node.right, mid + 1, hi);
        }
      }
    
      SegmentTree print() {
        List segs = new ArrayList<>();
        gatherSegs(segs, root, MIN_VAL, MAX_VAL);
        Iterator it = segs.iterator();
        while (it.hasNext()) {
          long a = it.next();
          long b = it.next();
          System.out.print("[" + a + "," + b + "]");
        }
        System.out.println();
        return this;
      }
    
      public static void main(String [] args) {
        SegmentTree tree = new SegmentTree()
            .addRange(0, 4).print()
            .addRange(6, 7).print()
            .delVal(2).print()
            .addVal(5).print()
            .addRange(0,1000).print()
            .addVal(5).print()
            .delRange(22, 777).print();
        System.out.println(tree.containsRange(3, 20));
      }
    }
    

提交回复
热议问题