First and last occurrence for binary search in C

前端未结

关注

 5  941

旧时难觅i 2021-01-06 00:46

I\'m trying to understand how do I modify the binary search for it work for first and last occurrences, surely I can find some code on the web but I\'m trying to reach deep

5条回答

予麋鹿 (楼主)

2021-01-06 01:01

Binary search on an array which contains a sorted set of values, where values may occur more than once does not necessarily yield the first or last element.

It yields the first matching element it finds.

Since this element could be surrounded by by more matching elements, a second step is required, in order to find the first and last matching element. This can be done with linear search as suggested by other posts or it can also be done in logarithmic time.

Let i be the index of the first found match, as reported by binary search.

Then, the start of the "sequence of equals" is in [0..i]. And the end of the "sequence of equals" is in [i..N-1] where N is the length of the sequence. Recursively bisecting those intervals until the border is found eventually yields the first and last match.

The following (f#) program shows the idea in a few lines. It should be a trivial matter to write an equivalent C-function.

let equal_range (a : int[]) i =
    let rec first i0 i1 = 
        if a.[i0] = a.[i1] || (i1-i0) < 2 then
            if a.[i0] <> a.[i1] 
            then
                i1
            else
                i0
        else
            let mid = (i1 - i0) / 2 + i0
            if a.[mid] = a.[i1] then first i0 mid else first mid i1
    let rec last i0 i1 = 
        if a.[i1] = a.[i0] || i1-i0 < 2 then 
            if a.[i0] <> a.[i1] 
            then
                i0
            else
                i1
        else
            let mid = (i1 - i0) / 2 + i0
            if a.[mid] = a.[i0] then last mid i1 else last i0 mid
    (first 0 i),(last i (Array.length a - 1))

let test_arrays = 
    [
        Array.ofList ([1..4] @ [5;5;5;5;5] @ [6..10])
        [|1|]
        [|1;1;1;1;1|]
    ]

test_arrays
|> List.iter(fun a -> 
        printfn "%A" a 
        for i = 0 to Array.length a - 1 do
            printfn "%d(a.[%d] = %d): %A" i i (a.[i]) (equal_range a i)
    )

Here the equivalent, non-recursive C- code:

#include 
#include 
#include 

typedef struct IndexPair_tag
{
    size_t a;
    size_t b;
} IndexPair_t;

bool equal_range(const int * a, size_t n, size_t i, IndexPair_t * result)
{
    if (NULL == a) return false;
    if (NULL == result) return false;
    if (i >= n) return false;

    size_t i0, i1, mid;

    i0 = 0;
    i1 = i;
    while (a[i0] != a[i1] && ((i1 - i0) > 1))
    {
        mid = (i1 - i0) / 2 + i0;
        if (a[mid] == a[i1])
        {
            i1 = mid;
        }
        else
        {
            i0 = mid;
        }
    }
    if (a[i0] != a[i1])
        result->a = i1;
    else
        result->a = i0;

    i0 = i;
    i1 = n - 1;
    while (a[i0] != a[i1] && ((i1 - i0) > 1))
    {
        mid = (i1 - i0) / 2 + i0;
        if (a[mid] == a[i0])
        {
            i0 = mid;
        }
        else
        {
            i1 = mid;
        }
    }
    if (a[i0] != a[i1] )
        result->b = i0;
    else
        result->b = i1;

    return true;
}

static void ShowArray(int *a, size_t N)
{
    if (N > 0)
    {
        printf("[%d", a[0]);
        for (size_t i = 1; i < N; i++)
        {
            printf(", %d", a[i]);
        }
        printf("]\n");
    }
    else
        printf("[]\n");

}

int main()
{
    {
        const size_t N = 14;
        int a[N] = { 1,2,3,4,5,5,5,5,5,6,7,8,9,10 };
        ShowArray(a, N);
        IndexPair_t result;
        for (size_t i = 0; i < N; i++)
        {
            if (equal_range(a, 14, i, &result))
            {
                printf("%d(a[%d] = %d): (%d,%d)\n", i, i, a[i], result.a, result.b);
                assert(a[result.a] == a[result.b]);
            }
            else
            {
                printf("For i = %d, equal_range() returned false.\n", i);
                assert(false);
            }
        }
    }
    {
        const size_t N = 1;
        int a[N] = { 1 };
        ShowArray(a, N);
        IndexPair_t result;
        for (size_t i = 0; i < N; i++)
        {
            if (equal_range(a, N, i, &result))
            {
                printf("%d(a[%d] = %d): (%d,%d)\n", i, i, a[i], result.a, result.b);
                assert(a[result.a] == a[result.b]);
            }
            else
            {
                printf("For i = %d, equal_range() returned false.\n", i);
                assert(false);
            }
        }
    }
    {
        const size_t N = 5;
        int a[N] = { 1,1,1,1,1 };
        ShowArray(a, N);
        IndexPair_t result;
        for (size_t i = 0; i < N; i++)
        {
            if (equal_range(a, N, i, &result))
            {
                printf("%d(a[%d] = %d): (%d,%d)\n", i, i, a[i], result.a, result.b);
                assert(a[result.a] == a[result.b]);
            }
            else
            {
                printf("For i = %d, equal_range() returned false.\n", i);
                assert(false);
            }
        }
    }

    return 0;
}

Update: Jonathan was right, the design of the function was sloppy and had some corner case issues.

Fixed the fact that the function cannot report argument errors.
Added defensive argument tests to equal_range().
Fixed the fact, that for edge cases, wrong results were produced.
Changed test driver (main) so all edge cases are covered.

The fact, that the function takes an index, not a value is okay, IMHO, as it is supposed to be the second step, after a first step which produces the index of the element looked for.

0 讨论(0)

查看其它5个回答