First and last occurrence for binary search in C

前端 未结 5 941
旧时难觅i
旧时难觅i 2021-01-06 00:46

I\'m trying to understand how do I modify the binary search for it work for first and last occurrences, surely I can find some code on the web but I\'m trying to reach deep

5条回答
  •  予麋鹿
    予麋鹿 (楼主)
    2021-01-06 01:01

    Binary search on an array which contains a sorted set of values, where values may occur more than once does not necessarily yield the first or last element.

    It yields the first matching element it finds.

    Since this element could be surrounded by by more matching elements, a second step is required, in order to find the first and last matching element. This can be done with linear search as suggested by other posts or it can also be done in logarithmic time.

    Let i be the index of the first found match, as reported by binary search.

    Then, the start of the "sequence of equals" is in [0..i]. And the end of the "sequence of equals" is in [i..N-1] where N is the length of the sequence. Recursively bisecting those intervals until the border is found eventually yields the first and last match.

    The following (f#) program shows the idea in a few lines. It should be a trivial matter to write an equivalent C-function.

    let equal_range (a : int[]) i =
        let rec first i0 i1 = 
            if a.[i0] = a.[i1] || (i1-i0) < 2 then
                if a.[i0] <> a.[i1] 
                then
                    i1
                else
                    i0
            else
                let mid = (i1 - i0) / 2 + i0
                if a.[mid] = a.[i1] then first i0 mid else first mid i1
        let rec last i0 i1 = 
            if a.[i1] = a.[i0] || i1-i0 < 2 then 
                if a.[i0] <> a.[i1] 
                then
                    i0
                else
                    i1
            else
                let mid = (i1 - i0) / 2 + i0
                if a.[mid] = a.[i0] then last mid i1 else last i0 mid
        (first 0 i),(last i (Array.length a - 1))
    
    let test_arrays = 
        [
            Array.ofList ([1..4] @ [5;5;5;5;5] @ [6..10])
            [|1|]
            [|1;1;1;1;1|]
        ]
    
    test_arrays
    |> List.iter(fun a -> 
            printfn "%A" a 
            for i = 0 to Array.length a - 1 do
                printfn "%d(a.[%d] = %d): %A" i i (a.[i]) (equal_range a i)
        )
    

    Here the equivalent, non-recursive C- code:

    #include 
    #include 
    #include 
    
    typedef struct IndexPair_tag
    {
        size_t a;
        size_t b;
    } IndexPair_t;
    
    bool equal_range(const int * a, size_t n, size_t i, IndexPair_t * result)
    {
        if (NULL == a) return false;
        if (NULL == result) return false;
        if (i >= n) return false;
    
        size_t i0, i1, mid;
    
        i0 = 0;
        i1 = i;
        while (a[i0] != a[i1] && ((i1 - i0) > 1))
        {
            mid = (i1 - i0) / 2 + i0;
            if (a[mid] == a[i1])
            {
                i1 = mid;
            }
            else
            {
                i0 = mid;
            }
        }
        if (a[i0] != a[i1])
            result->a = i1;
        else
            result->a = i0;
    
        i0 = i;
        i1 = n - 1;
        while (a[i0] != a[i1] && ((i1 - i0) > 1))
        {
            mid = (i1 - i0) / 2 + i0;
            if (a[mid] == a[i0])
            {
                i0 = mid;
            }
            else
            {
                i1 = mid;
            }
        }
        if (a[i0] != a[i1] )
            result->b = i0;
        else
            result->b = i1;
    
        return true;
    }
    
    static void ShowArray(int *a, size_t N)
    {
        if (N > 0)
        {
            printf("[%d", a[0]);
            for (size_t i = 1; i < N; i++)
            {
                printf(", %d", a[i]);
            }
            printf("]\n");
        }
        else
            printf("[]\n");
    
    }
    
    int main()
    {
        {
            const size_t N = 14;
            int a[N] = { 1,2,3,4,5,5,5,5,5,6,7,8,9,10 };
            ShowArray(a, N);
            IndexPair_t result;
            for (size_t i = 0; i < N; i++)
            {
                if (equal_range(a, 14, i, &result))
                {
                    printf("%d(a[%d] = %d): (%d,%d)\n", i, i, a[i], result.a, result.b);
                    assert(a[result.a] == a[result.b]);
                }
                else
                {
                    printf("For i = %d, equal_range() returned false.\n", i);
                    assert(false);
                }
            }
        }
        {
            const size_t N = 1;
            int a[N] = { 1 };
            ShowArray(a, N);
            IndexPair_t result;
            for (size_t i = 0; i < N; i++)
            {
                if (equal_range(a, N, i, &result))
                {
                    printf("%d(a[%d] = %d): (%d,%d)\n", i, i, a[i], result.a, result.b);
                    assert(a[result.a] == a[result.b]);
                }
                else
                {
                    printf("For i = %d, equal_range() returned false.\n", i);
                    assert(false);
                }
            }
        }
        {
            const size_t N = 5;
            int a[N] = { 1,1,1,1,1 };
            ShowArray(a, N);
            IndexPair_t result;
            for (size_t i = 0; i < N; i++)
            {
                if (equal_range(a, N, i, &result))
                {
                    printf("%d(a[%d] = %d): (%d,%d)\n", i, i, a[i], result.a, result.b);
                    assert(a[result.a] == a[result.b]);
                }
                else
                {
                    printf("For i = %d, equal_range() returned false.\n", i);
                    assert(false);
                }
            }
        }
    
        return 0;
    }
    

    Update: Jonathan was right, the design of the function was sloppy and had some corner case issues.

    • Fixed the fact that the function cannot report argument errors.
    • Added defensive argument tests to equal_range().
    • Fixed the fact, that for edge cases, wrong results were produced.
    • Changed test driver (main) so all edge cases are covered.

    The fact, that the function takes an index, not a value is okay, IMHO, as it is supposed to be the second step, after a first step which produces the index of the element looked for.

提交回复
热议问题