I\'m trying to understand how do I modify the binary search for it work for first and last occurrences, surely I can find some code on the web but I\'m trying to reach deep
Binary search on an array which contains a sorted set of values, where values may occur more than once does not necessarily yield the first or last element.
It yields the first matching element it finds.
Since this element could be surrounded by by more matching elements, a second step is required, in order to find the first and last matching element. This can be done with linear search as suggested by other posts or it can also be done in logarithmic time.
Let i be the index of the first found match, as reported by binary search.
Then, the start of the "sequence of equals" is in [0..i]. And the end of the "sequence of equals" is in [i..N-1] where N is the length of the sequence. Recursively bisecting those intervals until the border is found eventually yields the first and last match.
The following (f#) program shows the idea in a few lines. It should be a trivial matter to write an equivalent C-function.
let equal_range (a : int[]) i =
let rec first i0 i1 =
if a.[i0] = a.[i1] || (i1-i0) < 2 then
if a.[i0] <> a.[i1]
then
i1
else
i0
else
let mid = (i1 - i0) / 2 + i0
if a.[mid] = a.[i1] then first i0 mid else first mid i1
let rec last i0 i1 =
if a.[i1] = a.[i0] || i1-i0 < 2 then
if a.[i0] <> a.[i1]
then
i0
else
i1
else
let mid = (i1 - i0) / 2 + i0
if a.[mid] = a.[i0] then last mid i1 else last i0 mid
(first 0 i),(last i (Array.length a - 1))
let test_arrays =
[
Array.ofList ([1..4] @ [5;5;5;5;5] @ [6..10])
[|1|]
[|1;1;1;1;1|]
]
test_arrays
|> List.iter(fun a ->
printfn "%A" a
for i = 0 to Array.length a - 1 do
printfn "%d(a.[%d] = %d): %A" i i (a.[i]) (equal_range a i)
)
Here the equivalent, non-recursive C- code:
#include
#include
#include
typedef struct IndexPair_tag
{
size_t a;
size_t b;
} IndexPair_t;
bool equal_range(const int * a, size_t n, size_t i, IndexPair_t * result)
{
if (NULL == a) return false;
if (NULL == result) return false;
if (i >= n) return false;
size_t i0, i1, mid;
i0 = 0;
i1 = i;
while (a[i0] != a[i1] && ((i1 - i0) > 1))
{
mid = (i1 - i0) / 2 + i0;
if (a[mid] == a[i1])
{
i1 = mid;
}
else
{
i0 = mid;
}
}
if (a[i0] != a[i1])
result->a = i1;
else
result->a = i0;
i0 = i;
i1 = n - 1;
while (a[i0] != a[i1] && ((i1 - i0) > 1))
{
mid = (i1 - i0) / 2 + i0;
if (a[mid] == a[i0])
{
i0 = mid;
}
else
{
i1 = mid;
}
}
if (a[i0] != a[i1] )
result->b = i0;
else
result->b = i1;
return true;
}
static void ShowArray(int *a, size_t N)
{
if (N > 0)
{
printf("[%d", a[0]);
for (size_t i = 1; i < N; i++)
{
printf(", %d", a[i]);
}
printf("]\n");
}
else
printf("[]\n");
}
int main()
{
{
const size_t N = 14;
int a[N] = { 1,2,3,4,5,5,5,5,5,6,7,8,9,10 };
ShowArray(a, N);
IndexPair_t result;
for (size_t i = 0; i < N; i++)
{
if (equal_range(a, 14, i, &result))
{
printf("%d(a[%d] = %d): (%d,%d)\n", i, i, a[i], result.a, result.b);
assert(a[result.a] == a[result.b]);
}
else
{
printf("For i = %d, equal_range() returned false.\n", i);
assert(false);
}
}
}
{
const size_t N = 1;
int a[N] = { 1 };
ShowArray(a, N);
IndexPair_t result;
for (size_t i = 0; i < N; i++)
{
if (equal_range(a, N, i, &result))
{
printf("%d(a[%d] = %d): (%d,%d)\n", i, i, a[i], result.a, result.b);
assert(a[result.a] == a[result.b]);
}
else
{
printf("For i = %d, equal_range() returned false.\n", i);
assert(false);
}
}
}
{
const size_t N = 5;
int a[N] = { 1,1,1,1,1 };
ShowArray(a, N);
IndexPair_t result;
for (size_t i = 0; i < N; i++)
{
if (equal_range(a, N, i, &result))
{
printf("%d(a[%d] = %d): (%d,%d)\n", i, i, a[i], result.a, result.b);
assert(a[result.a] == a[result.b]);
}
else
{
printf("For i = %d, equal_range() returned false.\n", i);
assert(false);
}
}
}
return 0;
}
Update: Jonathan was right, the design of the function was sloppy and had some corner case issues.
The fact, that the function takes an index, not a value is okay, IMHO, as it is supposed to be the second step, after a first step which produces the index of the element looked for.