背景:海量数据处理
笔试题:给四十亿不重复的无符号,整形,没排过序。给一个无符号整数,如何快速判断一个数是否在这四十亿个数中
在这里我们就可以用到我们的哈希思想。
位图:我们只需要表示这个数字存不存在的话,只需要一个比特位即可,所以我们用一个数组来装元素的个数,每个元素都有八个比特位,那么就可以表示八个数字【(用uint64_t的话),因为uint64_t不论在多少位机子下都是八字节】
这样就可以轻松解决问题
下面是代码具体实现
bitmap.h
#pragma once #include <stdio.h> #include <stdlib.h> #include <string.h> #include <stddef.h> #include <unistd.h> #include <stdint.h> typedef uint64_t bitmaptype; typedef struct bitmap{ bitmaptype *data; uint64_t capacity; }bitmap; void bitmapinit(bitmap *bm,uint64_t capacity); void bitmapdestroy(bitmap *bm); void bitmapset(bitmap *bm,uint64_t index); void bitmapunset(bitmap *bm,uint64_t index); void bitmapfill(bitmap *bm,uint64_t capacity); void bitmapclear(bitmap *bm,uint64_t capacity); uint64_t bitmaptest(bitmap *bm,uint64_t index);
bitmap.c
#include <stdio.h> #include "bitmap.h" void bitmapinit(bitmap *bm,uint64_t capacity) { if(bm == NULL) { return; } bm->capacity = capacity; uint64_t size = capacity/(sizeof(bitmaptype)*8)+1; bm->data = (bitmaptype *)malloc(sizeof(bitmaptype)*size); memset(bm->data,0,sizeof(bitmaptype)*size); } //uint64_t getsize(uint64_t capacity) //{ // uint64_t ret = capacity/(sizeof(bitmaptype)*8)+1; // return ret; //} void bitmapdestroy(bitmap *bm) { if(bm == NULL) { return; } bm->capacity = 0; free(bm->data); return; } uint64_t bitmaptest(bitmap *bm,uint64_t index) { if(bm == NULL || index >= bm->capacity) { return; } uint64_t n,offset; n = index/(sizeof(bitmaptype)*8); offset = index % (sizeof(bitmaptype)*8); u_int64_t ret = bm->data[n] & (0x1ul << offset); return ret > 0 ? 1 : 0; } //void getoffset(uint64_t index,uint64_t* n,uint64_t *offset) //{ // *n = index/(sizeof(bitmaptype)*8); // *offset = index %(sizeof(bitmaptype)*8); // //} void bitmapset(bitmap *bm,uint64_t index) { if(bm == NULL) { return; } uint64_t n,offset; n = index/(sizeof(bitmaptype)*8); offset = index % (sizeof(bitmaptype)*8); bm->data[n] |= 0x1ul << offset; return; } void bitmapunset(bitmap *bm,uint64_t index) { if(bm == NULL) { return; } uint64_t n,offset; n = index/(sizeof(bitmaptype)*8); offset = index % (sizeof(bitmaptype)*8); bm->data[n] &= ~(0x1ul << offset); return; } void bitmapfill(bitmap *bm,uint64_t capacity) { if(bm == NULL) { return; } uint64_t size = capacity/(sizeof(bitmaptype)*8)+1; memset(bm->data,0xff,sizeof(bitmaptype)*size); return; } void bitmapclear(bitmap *bm,uint64_t capacity) { if(bm == NULL) { return; } uint64_t size = capacity/(sizeof(bitmaptype)*8)+1; memset(bm->data,0x0,sizeof(bitmaptype)*size); return; } #define HEADER printf("\n============%s=============\n",__FUNCTION__) void testbitmaptest() { HEADER; bitmap bitmap; bitmapinit(&bitmap,100); int ret = bitmaptest(&bitmap,50); printf("excpted ret is 0,actul is %d\n",ret); } void testbitmapset() { HEADER; bitmap bitmap; bitmapinit(&bitmap,100); int ret1 = bitmaptest(&bitmap,50); printf("excpted ret1 is 0,actul is %d\n",ret1); bitmapset(&bitmap,50); int ret2 = bitmaptest(&bitmap,50); printf("excpted ret2 is 1,actul is %d\n",ret2); } void testbitmapunset() { HEADER; bitmap bitmap; bitmapinit(&bitmap,100); int ret1 = bitmaptest(&bitmap,50); printf("excpted ret1 is 0,actul is %d\n",ret1); bitmapset(&bitmap,50); int ret2 = bitmaptest(&bitmap,50); printf("excpted ret2 is 1,actul is %d\n",ret2); bitmapunset(&bitmap,50); int ret3 = bitmaptest(&bitmap,50); printf("excpted ret3 is 0,actul is %d\n",ret3); } void testbitmapfill() { HEADER; bitmap bitmap; bitmapinit(&bitmap,100); int ret1 = bitmaptest(&bitmap,50); printf("excpted ret1 is 0,actul is %d\n",ret1); bitmapfill(&bitmap,100); int ret2 = bitmaptest(&bitmap,50); printf("excpted ret2 is 1,actul is %d\n",ret2); } void testbitmapclear() { HEADER; bitmap bitmap; bitmapinit(&bitmap,100); int ret1 = bitmaptest(&bitmap,50); printf("excpted ret1 is 0,actul is %d\n",ret1); bitmapfill(&bitmap,100); int ret2 = bitmaptest(&bitmap,50); printf("excpted ret2 is 1,actul is %d\n",ret2); bitmapclear(&bitmap,100); int ret3 = bitmaptest(&bitmap,50); printf("excpted ret3 is 0,actul is %d\n",ret3); } int main() { testbitmaptest(); testbitmapset(); testbitmapunset(); testbitmapfill(); testbitmapclear(); return; }
下面就是我们的实验结果
不过位图也有缺点,就是它只能表示这个数字存不存在,而不能表示其他东西。
文章来源: 数据结构之哈希变形――位图