How can I use Bit-Fields to save memory?

This is about ANSI-C (C90). This is what I know:

I can directly tell the compiler how many bits I want for a specific variable.
If I want 1 bit which can have the values zero or one.
or 2 bits for the values 0,1,2,3, and so on...;

I'm familiar with the syntax.

I have problem concerning bitfields:

I want to define a SET structure.
It can have maximum 1024 elements (it can have less, but the maximum is 1024 elements).
The domain of the set is from 1 to 1024. So an element could have any value 1-1024.

I'm trying to create a structure for a SET, and it must be efficient as possible for the memory part.

I tried:

typedef struct set
{
    unsigned int var: 1;
} SET;
//now define an array of SETS
SET array_of_sets[MAX_SIZE]  //didn't define MAX_SIZE, but no more than 1024 elements in each set.

I know this isn't efficient; maybe it's even not good for what I want. That's why I'm looking for help.

As noted in extensive comments, using a bit field is not the way to go. You can use just 128 bytes of storage for your set containing values 1..1024. You will need to map the value N to bit N-1 (so you have bits 0..1023 to work with). You also need to decide on the operations you need for your set. This code supports 'create', 'destroy', 'insert', 'delete' and 'in_set'. It does not support iteration over the elements in the set; that can be added if you want it.

sets.h

#ifndef SETS_H_INCLUDED
#define SETS_H_INCLUDED

typedef struct Set Set;
enum { MAX_ELEMENTS = 1024 };

extern Set *create(void);
extern void destroy(Set *set);
extern void insert(Set *set, int value);
extern void delete(Set *set, int value);
extern int in_set(Set *set, int value);

#endif /* SETS_H_INCLUDED */

sets.c

#include "sets.h"
#include <assert.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>

typedef unsigned long Bits;
#define BITS_C(n)  ((Bits)(n))
enum { ARRAY_SIZE = MAX_ELEMENTS / (sizeof(Bits) * CHAR_BIT) };

struct Set
{
     Bits set[ARRAY_SIZE];
};

Set *create(void)
{
    Set *set = malloc(sizeof(*set));
    if (set != 0)
        memset(set, 0, sizeof(*set));
    return set;
}

void destroy(Set *set)
{
    free(set);
}

void insert(Set *set, int value)
{
    assert(value >= 1 && value <= MAX_ELEMENTS);
    value--;  /* 0..1023 */
    int index = value / (sizeof(Bits) * CHAR_BIT);
    int bitnum = value % (sizeof(Bits) * CHAR_BIT);
    Bits mask = BITS_C(1) << bitnum;
    /* printf("I: %d (%d:%d:0x%.2lX)\n", value+1, index, bitnum, mask); */
    set->set[index] |= mask;
}

void delete(Set *set, int value)
{
    assert(value >= 1 && value <= MAX_ELEMENTS);
    value--;  /* 0..1023 */
    int index = value / (sizeof(Bits) * CHAR_BIT);
    int bitnum = value % (sizeof(Bits) * CHAR_BIT);
    Bits mask = BITS_C(1) << bitnum;
    /* printf("D: %d (%d:%d:0x%.2lX)\n", value+1, index, bitnum, mask); */
    set->set[index] &= ~mask;
}

/* C90 does not support <stdbool.h> */
int in_set(Set *set, int value)
{
    assert(value >= 1 && value <= MAX_ELEMENTS);
    value--;  /* 0..1023 */
    int index = value / (sizeof(Bits) * CHAR_BIT);
    int bitnum = value % (sizeof(Bits) * CHAR_BIT);
    Bits mask = BITS_C(1) << bitnum;
    /* printf("T: %d (%d:%d:0x%.2lX) = %d\n", value+1, index, bitnum, mask,
              (set->set[index] & mask) != 0); */
    return (set->set[index] & mask) != 0;
}

#include <stdio.h>

enum { NUMBERS_PER_LINE = 15 };

int main(void)
{
    Set *set = create();
    if (set != 0)
    {
        int i;
        int n = 0;
        for (i = 1; i <= MAX_ELEMENTS; i += 4)
             insert(set, i);
        for (i = 3; i <= MAX_ELEMENTS; i += 6)
             delete(set, i);

        for (i = 1; i <= MAX_ELEMENTS; i++)
        {
             if (in_set(set, i))
             {
                 printf(" %4d", i);
                 if (++n % NUMBERS_PER_LINE == 0)
                 {
                     putchar('\n');
                     n = 0;
                 }
             }
        }
        if (n % NUMBERS_PER_LINE != 0)
            putchar('\n');
        destroy(set);
    }
    return 0;
}

The functions should really be given a systematic prefix, such as set_. The BITS_C macro is based on the INT64_C macro (and the other related macros) defined in <stdint.h> in C99 and later, which is also not a part of C90.

As per my previous comments, here is an example of how you can pack eight 1-bit elements into one char physical element. I have only implemented the function to get the value of a 1-bit element, I leave the function to set it to you (it's easy to do).

Note: you can easily change the type of the array element (unsigned char) and experiment with types which can hold more bits (e.g unsigned int) and test if they perform better in terms of speed. You can also modify the code to make it handle elements bigger than one bit.

#include <stdio.h>
#include <limits.h>

unsigned int get_el(unsigned char* array, unsigned int index)
{
    unsigned int bits_per_arr_el = sizeof(unsigned char)*CHAR_BIT;
    unsigned int arr_index = index / bits_per_arr_el;
    unsigned int bit_offset = index % bits_per_arr_el;
    unsigned int bitmask = 1 << bit_offset;
    unsigned int retval;

    // printf("index=%u\n", index);
    // printf("bits_per_arr_el=%u\n", bits_per_arr_el);
    // printf("arr_index=%u\n", arr_index);
    // printf("bit_offset=%u\n", bit_offset);

    retval = array[arr_index] & bitmask ? 1 : 0; // can be simpler if only True/False is needed
    return(retval);
}

#define MAX_SIZE 10
unsigned char bitarray[MAX_SIZE];

int main()
{
    bitarray[1] = 3; // 00000011
    printf("array[7]=%u, array[8]=%u, array[9]=%u, array[10]=%u\n",
            get_el(bitarray, 7),
            get_el(bitarray, 8),
            get_el(bitarray, 9),
            get_el(bitarray,10));

    return 0;
}

outputs

array[7]=0, array[8]=1, array[9]=1, array[10]=0

typedef struct set
{
    unsigned short var:10; // uint var:1 will be padded to 32 bits
} SET;                     // ushort var:10 (which is max<=1024) padded to 16 bits

As was commented by @Jonathan Leffler use array(unsigned short[]) and define bitmasks

#define bitZer 0x00  //(unsigned)(0 == 0)? true:true;
#define bitOne 0x10  // so from (both inclusive)0-1023 = 1024
...                  // added for clarification  
#define bitTen 0x0A

to look into the bits of each element. http://www.catb.org/esr/structure-packing/ detailed

To store a value from 0 to 1023 (or from 1 to 1024, which is essentially the same and only involves adding/subtracting 1) you need a minimum of 10 bits.

This means that for 32-bit (unsigned) integers, you can pack 3 values into 30 bits, which gives 2 bits of useless padding.

Example:

%define ELEMENTS 100

uint32_t myArray[ (ELEMENTS + 2) / 3 ];

void setValue(int n, int value) {
    uint32_t temp;
    uint32_t mask = (1 << 10) - 1;

    if(n >= ELEMENTS) return;
    value--;                        // Convert "1 to 1024" into "0 to 1023"
    temp = myArray[n / 3];
    mask = mask << (n % 3)*10;
    temp = (temp & ~mask) | (value << (n % 3)*10);
    myArray[n / 3] = temp; 
}

int getValue(int n) {
    uint32_t temp;
    uint32_t mask = (1 << 10) - 1;

    if(n >= ELEMENTS) return 0;
    temp = myArray[n / 3];
    temp >>= (n % 3)*10;
    return (temp & ~mask) + 1;
}

You can do this with bitfields instead, but the code to get/set individual values will end up using branches (e.g. switch( n%3 )) which will be slower in practice.

Removing those 2 bits of padding will cost a little more complexity and a little more overhead. For example:

%define ELEMENTS 100

uint32_t myArray[ (ELEMENTS*10 + 31) / 32 ];

int getValue(int n) {
    uint64_t temp;
    uint64_t mask = (1 << 10) - 1;

    if(n >= ELEMENTS) return 0;

    temp = myArray[n*10/32 + 1];
    temp = (temp << 32) | myArray[n*10/32];

    temp >>= (n*10 % 32);

    return (temp & ~mask) + 1;
}

This can't be done with bitfields. This is the most space efficient way to store an array of values that range from 1 to 1024.

If you are storing an "array of booleans" or setting flags, it can be useful. For instance, you can initialize or compare up to 64 values at a time.

These macros will work for unsigned char, short, int, long long ... but simplifies significantly if you just pick a type (so you can use a safer static inline function)

#define getbit(x,n) x[n/(sizeof(*x)*8)]  &  (typeof(*x))1 << (n&((sizeof(*x)*8)-1)) 
#define setbit(x,n) x[n/(sizeof(*x)*8)] |=  (typeof(*x))1 << (n&((sizeof(*x)*8)-1)) 
#define flpbit(x,n) x[n/(sizeof(*x)*8)] ^=  (typeof(*x))1 << (n&((sizeof(*x)*8)-1)) 
#define clrbit(x,n) x[n/(sizeof(*x)*8)] &= ~( (typeof(*x))1 << (n&((sizeof(*x)*8)-1)) )

to initialize a large array of booleans all you need to do is: char cbits[]={0,0xF,0,0xFF};

or for all zeroes char cbits[4]={0};

or an int example: int ibits[]={0xF0F0F0F0,~0};

//1111000011110000111100001111000011111111111111111111111111111111

If you will only be accessing 1 type of array, it may be better to make the macros into proper functions like:

static inline unsigned char getbit(unsigned char *x, unsigned n){ 
  return x[n>>3]  &  1 << (n&7); 
}
//etc... similar for other types and functions from macros above

You can also compare multiple flags at a time by '|'ing the flags together and using '&'ed masks; however, it does get a bit more complex when you exceed the native types

For your particular instance you can initialize to all zeroes by:

unsigned char flags[128]={0};

or all 1's by:

uint64_t flags[128] = {~0,~0,~0,~0,~0,~0,~0,~0,~0,~0,~0,~0,~0,~0,~0,~0};

You can even use enums to name your flags

enum{
  WHITE, //0
  RED, //1
  BLUE, //2
  GREEN, //3
  ...
  BLACK //1023
}

if (getbit(flags,WHITE) && getbit(flags,RED) && getbit(flags,BLUE))
  printf("red, white and blue\n");

Eric Tsui

1) The proper solution for this question is to use Bit Array

The question provided the solution with Bit Fields with Struct. There are two typical ways to save memory space for bits related problem, another is to use Bit Array. For this specific case in the question, the better way is to use Bit Array (demoed as follows).

If it is the case like purely independent bit flags here, go for the Bit Array
If there is a group of relevant bits , such as the IP address or Control Word definition, then it's better to combine them with a struct, that is to use Bit Fields with Sturct

2) Sample code just for demo Bit Array

#include<limits.h>
#define BITS_OF_INT (sizeof(int)*CHAR_BIT)  
void SetBit(int A[], int k)
     {
       //Set the bit at the k-th position
       A[k/BITS_OF_INT] |= 1 <<(k%BITS_OF_INT);
     } 
void ClearBit(int A[], int k)
     {
       //RESET the bit at the k-th position
       A[k/BITS_OF_INT] &= ~(1 <<(k%BITS_OF_INT)) ;
     }  
int TestBit(int A[], int k)
     {
       // Return TRUE if bit set    
       return ((A[k/BITS_OF_INT] & (1 <<(k%BITS_OF_INT)))!= 0) ;
     }

#define MAX_SIZE 1024
int main()
{
    int A[MAX_SIZE/BITS_OF_INT];
    int i;
    int pos = 100; // position

    for (i = 0; i < MAX_SIZE/BITS_OF_INT; i++)
        A[i] = 0; 

    SetBit(A, pos);
    if (TestBit(A, pos)){//do something}
    ClearBit(A, pos); 
}

3) Furthermore, a worthwhile discussing point from this question is,

How to choose a proper solution between "Bit Array" and "Bit fields with struct"?

Here are some references about this topic.

来源：https://stackoverflow.com/questions/31096894/how-can-i-use-bit-fields-to-save-memory

标签

bit-fields