Java On-Memory Efficient Key-Value Store

前端 未结 6 1744
不思量自难忘°
不思量自难忘° 2020-12-08 23:26

I have store 111 million key-value pairs (one key can have multiple values - maximum 2/3) whose key are 50 bit Integers and values are 32 bit (maximum) Integers. Now, my req

6条回答
  •  执念已碎
    2020-12-09 00:00

    Based on @Tom Andersons solution I removed the need to allocate objects, and added a performance test.

    import java.util.Arrays;
    import java.util.Random;
    
    public class LongIntParallelHashMultimap {
        private static final long NULL = Long.MIN_VALUE;
    
        private final long[] keys;
        private final int[] values;
        private int size;
    
        public LongIntParallelHashMultimap(int capacity) {
            keys = new long[capacity];
            values = new int[capacity];
            Arrays.fill(keys, NULL);
        }
    
        public void put(long key, int value) {
            if (key == NULL) throw new IllegalArgumentException("key cannot be " + NULL);
            if (size == keys.length) throw new IllegalStateException("map is full");
    
            int index = indexFor(key);
            while (keys[index] != NULL) {
                index = successor(index);
            }
            keys[index] = key;
            values[index] = value;
            ++size;
        }
    
        public int get(long key, int[] hits) {
            if (key == NULL) throw new IllegalArgumentException("key cannot be " + NULL);
    
            int index = indexFor(key);
    
            int hitIndex = 0;
    
            while (keys[index] != NULL) {
                if (keys[index] == key) {
                    hits[hitIndex] = values[index];
                    ++hitIndex;
                    if (hitIndex == hits.length)
                        break;
                }
                index = successor(index);
            }
    
            return hitIndex;
        }
    
        private int indexFor(long key) {
            return Math.abs((int) (key % keys.length));
        }
    
        private int successor(int index) {
            index++;
            return index >= keys.length ? index - keys.length : index;
        }
    
        public int size() {
            return size;
        }
    
        public static class PerfTest {
            public static void main(String... args) {
                int values = 110* 1000 * 1000;
                long start0 = System.nanoTime();
                long[] keysValues = generateKeys(values);
    
                LongIntParallelHashMultimap map = new LongIntParallelHashMultimap(222222227);
                long start = System.nanoTime();
                addKeyValues(values, keysValues, map);
                long mid = System.nanoTime();
                int sum = lookUpKeyValues(values, keysValues, map);
                long time = System.nanoTime();
                System.out.printf("Generated %.1f M keys/s, Added %.1f M/s and looked up %.1f M/s%n",
                        values * 1e3 / (start - start0), values * 1e3 / (mid - start), values * 1e3 / (time - mid));
                System.out.println("Expected " + values + " got " + sum);
            }
    
            private static long[] generateKeys(int values) {
                Random rand = new Random();
                long[] keysValues = new long[values];
                for (int i = 0; i < values; i++)
                    keysValues[i] = rand.nextLong();
                return keysValues;
            }
    
            private static void addKeyValues(int values, long[] keysValues, LongIntParallelHashMultimap map) {
                for (int i = 0; i < values; i++) {
                    map.put(keysValues[i], i);
                }
                assert map.size() == values;
            }
    
            private static int lookUpKeyValues(int values, long[] keysValues, LongIntParallelHashMultimap map) {
                int[] found = new int[8];
                int sum = 0;
                for (int i = 0; i < values; i++) {
                    sum += map.get(keysValues[i], found);
                }
                return sum;
            }
        }
    }
    

    prints

    Generated 34.8 M keys/s, Added 11.1 M/s and looked up 7.6 M/s
    

    Run on an 3.8 GHz i7 with Java 7 update 3.

    This is much slower than the previous test because you are accessing main memory, rather than the cache at random. This is really a test of the speed of your memory. The writes are faster because they can be performed asynchronously to main memory.


    Using this collection

    final SetMultimap map = Multimaps.newSetMultimap(
            TDecorators.wrap(new TLongObjectHashMap>()),
            new Supplier>() {
                public Set get() {
                    return TDecorators.wrap(new TIntHashSet());
                }
            });
    

    Running the same test with 50 million entries (which used about 16 GB) and -mx20g I go the following result.

     Generated 47.2 M keys/s, Added 0.5 M/s and looked up 0.7 M/s
    

    For 110 M entries you will need about 35 GB of memory and a machine 10 x faster than mine (3.8 GHz) to perform 5 million adds per second.

提交回复
热议问题