问题
I'm trying to continuously find a byte array (byte[]) within a byte array and I found a code that only finds the first occurrence.
This is where I found the code: Find an array (byte[]) inside another array?
Question: How can I continuously find a byte array with this code below?
public int SearchBytes(byte[] haystack, byte[] needle)
{
int len = needle.Length;
int limit = haystack.Length - len;
for (int i = 0; i <= limit; i++)
{
int k = 0;
for (; k < len; k++)
{
if (needle[k] != haystack[i + k]) break;
}
if (k == len) return i;
}
return -1;
}
回答1:
You can change the method to accept a start index like this:
public int SearchBytes(byte[] haystack, byte[] needle, int start_index)
{
int len = needle.Length;
int limit = haystack.Length - len;
for (int i = start_index; i <= limit; i++)
{
int k = 0;
for (; k < len; k++)
{
if (needle[k] != haystack[i + k]) break;
}
if (k == len) return i;
}
return -1;
}
The difference is simply that this method accepts a start_index
and starts the search at this specific index.
Now, you can use it like this:
byte[] haystack = new byte[] { 1, 2, 3, 4, 5, 1, 2, 3 };
byte[] needle = new byte[] {1,2,3};
int index = 0;
while (true)
{
index = SearchBytes(haystack, needle, index);
if (index == -1)
break;
Console.WriteLine("Found at " + index);
index += needle.Length;
}
This loop starts on index 0, then it uses the result of the previous search to set a new index to start the next search.
It adds needle.Length
to the index so that we start searching immediately after the end of the previously found result.
UPDATE:
Here is how this code can be used to create a method that returns the indexes as an array:
public int[] SearchBytesMultiple(byte[] haystack, byte[] needle)
{
int index = 0;
List<int> results = new List<int>();
while (true)
{
index = SearchBytes(haystack, needle, index);
if (index == -1)
break;
results.Add(index);
index += needle.Length;
}
return results.ToArray();
}
And it can be used like this:
byte[] haystack = new byte[] { 1, 2, 3, 4, 5, 1, 2, 3 };
byte[] needle = new byte[] {1,2,3};
int[] indexes = SearchBytesMultiple(haystack, needle);
回答2:
As an alternative, you could consider using the Boyer-Moore algorithm, which is extremely performant if the size of needle[]
or haystack[]
is large.
However, I wouldn't recommend this for very short needle[]
or haystack[]
because the overhead of setting up the offset tables will be higher than just doing a simple linear search.
Here's an implementation that I converted from the Java one on the Wiki page that I linked:
using System;
using System.Collections.Generic;
namespace ConsoleApplication1
{
public sealed class BoyerMoore
{
readonly byte[] needle;
readonly int[] charTable;
readonly int[] offsetTable;
public BoyerMoore(byte[] needle)
{
this.needle = needle;
this.charTable = makeByteTable(needle);
this.offsetTable = makeOffsetTable(needle);
}
public IEnumerable<int> Search(byte[] haystack)
{
if (needle.Length == 0)
yield break;
for (int i = needle.Length - 1; i < haystack.Length;)
{
int j;
for (j = needle.Length - 1; needle[j] == haystack[i]; --i, --j)
{
if (j != 0)
continue;
yield return i;
i += needle.Length - 1;
break;
}
i += Math.Max(offsetTable[needle.Length - 1 - j], charTable[haystack[i]]);
}
}
static int[] makeByteTable(byte[] needle)
{
const int ALPHABET_SIZE = 256;
int[] table = new int[ALPHABET_SIZE];
for (int i = 0; i < table.Length; ++i)
table[i] = needle.Length;
for (int i = 0; i < needle.Length - 1; ++i)
table[needle[i]] = needle.Length - 1 - i;
return table;
}
static int[] makeOffsetTable(byte[] needle)
{
int[] table = new int[needle.Length];
int lastPrefixPosition = needle.Length;
for (int i = needle.Length - 1; i >= 0; --i)
{
if (isPrefix(needle, i + 1))
lastPrefixPosition = i + 1;
table[needle.Length - 1 - i] = lastPrefixPosition - i + needle.Length - 1;
}
for (int i = 0; i < needle.Length - 1; ++i)
{
int slen = suffixLength(needle, i);
table[slen] = needle.Length - 1 - i + slen;
}
return table;
}
static bool isPrefix(byte[] needle, int p)
{
for (int i = p, j = 0; i < needle.Length; ++i, ++j)
if (needle[i] != needle[j])
return false;
return true;
}
static int suffixLength(byte[] needle, int p)
{
int len = 0;
for (int i = p, j = needle.Length - 1; i >= 0 && needle[i] == needle[j]; --i, --j)
++len;
return len;
}
}
}
Here's some test code:
byte[] haystack = new byte[1000];
byte[] needle = {1, 2, 3, 4, 5, 6, 7, 8, 9};
for (int i = 100; i <= 900; i += 100)
Array.Copy(needle, 0, haystack, i, needle.Length);
var searcher = new BoyerMoore(needle);
foreach (int index in searcher.Search(haystack))
Console.WriteLine(index);
来源:https://stackoverflow.com/questions/34924626/c-sharp-how-to-continuously-find-a-byte-array-inside-a-byte-array