Uniformly distributed bit sequence

喜欢而已 提交于 2020-03-25 16:07:45

问题


Suppose you have a regular number generator which is able to produce uniformly distributed random 32 bit numbers. And suppose you look for a way to generate a pseudo-random sequence of bits where ones (i.e., the set bits) appear in the sequence with predefined probability.

A naive way of producing such sequence would be running number generator on per bit level but it's terribly inefficient for small probabilities like 0.01 or 1% of bits in the sequence most of the bits will be zero. On average just one bit in a hundred would be set. On the other hand even with such low probability there's a chance to encounter a long sub-sequence of consecutive ones that extends beyond the 8, 16, 32, 64 bits.

The question is how to produce such sequence efficiently using regular PRNG.


Edit

A toy implementation of rational Bernoulli variable sampling in javascript suggested by Peter O.:

// Based on
// https://arxiv.org/abs/1304.1916
// https://arxiv.org/pdf/1304.1916.pdf (page 21, figure 6)

class Xor128 {
    constructor(x, y, z, w) {
        this.x = x;
        this.y = y;
        this.z = z;
        this.w = w;
    }

    prev() {
        var t = this.w ^ this.z ^ (this.z >>> 19);

        t ^= t >>> 8;
        t ^= t >>> 16;

        this.w = this.z;
        this.z = this.y;
        this.y = this.x;

        t ^= t << 11;
        t ^= t << 22;

        this.x = t;

        return this.w;
    }

    curr() {
        return this.w;
    }

    next() {
        var t = this.x ^ (this.x << 11);

        this.x = this.y;
        this.y = this.z;
        this.z = this.w;

        return this.w = this.w ^ (this.w >>> 19) ^ (t ^ (t >>> 8));
    }
}

function* flip(xor128) {
    while (true) {
        var value =  xor128.next();

        yield value & 1; value >>>= 1;
        yield value & 1; value >>>= 1;
        yield value & 1; value >>>= 1;
        yield value & 1; value >>>= 1;
        yield value & 1; value >>>= 1;
        yield value & 1; value >>>= 1;
        yield value & 1; value >>>= 1;
        yield value & 1; value >>>= 1;
        yield value & 1; value >>>= 1;
        yield value & 1; value >>>= 1;
        yield value & 1; value >>>= 1;
        yield value & 1; value >>>= 1;
        yield value & 1; value >>>= 1;
        yield value & 1; value >>>= 1;
        yield value & 1; value >>>= 1;
        yield value & 1; value >>>= 1;
        yield value & 1; value >>>= 1;
        yield value & 1; value >>>= 1;
        yield value & 1; value >>>= 1;
        yield value & 1; value >>>= 1;
        yield value & 1; value >>>= 1;
        yield value & 1; value >>>= 1;
        yield value & 1; value >>>= 1;
        yield value & 1; value >>>= 1;
        yield value & 1; value >>>= 1;
        yield value & 1; value >>>= 1;
        yield value & 1; value >>>= 1;
        yield value & 1; value >>>= 1;
        yield value & 1; value >>>= 1;
        yield value & 1; value >>>= 1;
        yield value & 1; value >>>= 1;
        yield value & 1; value >>>= 1;
    }
} 

function* bernoulli(flip, k, n) {
    var b;
    var v = k

    for (const bit of flip) {
        v <<= 1;

        if (v >= n) {
            v -= n;

            b = 1;
        } else {
            b = 0;
        }

        if (bit === 1) {
            yield b;

            v = k;
        }
    }
}

var xor128 = new Xor128(1, 2, 3, 4);

var z = 0, o = 0;

var then = Date.now();

for (const value of bernoulli(flip(xor128), 5, 1000)) {
    if (value === 0) {
        z++;
    } else {
        o++;
    }

    if (Date.now() - then > 1000) {
        console.log(`${z} ${o}`);
    }
}


// Pieces of code to test out xor128:
//
// for (let index = 0; index < 100; index++) {
//     console.log(xor128.curr())

//     xor128.next();
// }

// console.log('-----------------------------------')
// for (let index = 0; index < 100; index++) {
//     xor128.prev();

//     console.log(xor128.curr())
// }

Another edit

The code below is implemented in C# produces 91.2 million bits per second packed into UInt32 data type (MacBookPro 2019 Core I9 2.4 Ghz). I think in C it'd be possible to get over 100 million bits, also it feels like it is possible to further utilize binary arithmetic to generate all 32 bits of random number in parallel, some loop unrolling or maybe SIMD not sure, anyway here's the code:

public class Bernoulli
{
    public UInt32 X { get; set; }
    public UInt32 Y { get; set; }
    public UInt32 Z { get; set; }
    public UInt32 W { get; set; }

    public Bernoulli()
        : this(Guid.NewGuid())
    {

    }

    public Bernoulli(Guid guid)
    {
        var index = 0;
        var bytes = guid.ToByteArray();

        X = (UInt32)((bytes[index++] << 24) | (bytes[index++] << 16) | (bytes[index++] << 8) | bytes[index++]);
        Y = (UInt32)((bytes[index++] << 24) | (bytes[index++] << 16) | (bytes[index++] << 8) | bytes[index++]);
        Z = (UInt32)((bytes[index++] << 24) | (bytes[index++] << 16) | (bytes[index++] << 8) | bytes[index++]);
        W = (UInt32)((bytes[index++] << 24) | (bytes[index++] << 16) | (bytes[index++] << 8) | bytes[index++]);
    }

    public Bernoulli(UInt32 x, UInt32 y, UInt32 z, UInt32 w)
    {
        X = x;
        Y = y;
        Z = z;
        W = w;
    }


    UInt64 bits = 0;
    UInt32 bitsCount = 0;

    public UInt32 Next(UInt32 k, UInt32 n)
    {
        UInt32 b;
        var c = 0;
        var v = k;
        var r = 0u;

        // ------------------------

        do
        {
            while (bitsCount <= 32)
            {
                b = X ^ (X << 11);

                X = Y;
                Y = Z;
                Z = W;

                bits <<= 32;
                bits |= ((UInt64)(W = W ^ (W >> 19) ^ (b ^ (b >> 8))));
                bitsCount += 32;
            }

            while (c < 32 && 0 < bitsCount)
            {
                v <<= 1;

                // Two lines of code below is a two step optimization:
                // First we optimize the following statement:
                //
                // if (v >= n)
                // {
                //     v -= n;
                //     b = 1;
                // }
                // else
                // {
                //     b = 0;
                // }
                //
                // into the following:
                //
                // var b = v < n ? 0u : 1u;
                // v -= b * n
                //
                // thus reducing branching, but we would like also to omit
                // multiplication, which we can do through:
                b = v < n ? 0u : 0xFFFFFFFFu;
                v -= b & n;

                if ((bits & 1) == 1)
                {
                    r |= b & 1;
                    r <<= 1;
                    v = k;
                    c++;
                }

                bits >>= 1;
                bitsCount--;
            }

        } while (c < 32);

        return r;
    }
}

回答1:


This problem can be restated as:

  • Generate a random integer in the interval [0, N).
  • Output 1 if the integer is 0, or 0 otherwise.

There are various ways to generate random integers in a range from a random bit stream. Of these, J. Lumbroso showed an optimal way to solve this problem given a random bit stream ("Optimal Discrete Uniform Generation from Coin Flips, and Applications", 2013). (However, Appendix B of that paper also points out a solution to your direct problem: generating 0 or 1 with a given probability.) Other ways include those mentioned in "Efficiently Generating a Random Number in a Range" as well as a brand-new algorithm, the "Fast Loaded Dice Roller".




回答2:


Another possible approach. Say you wanted 5% 1-bits and 95% 0-bits:

bitArray = [1, 1, 1, 1, 1, 0, ... 0, 0];  // 95 0s
bitArray.shuffle();

Pull the bits you want from the shuffled bitArray. You can use your 32-bit RNG to create a shuffle() method if needed.



来源:https://stackoverflow.com/questions/60777414/uniformly-distributed-bit-sequence

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!