问题
I have simple method in C# :
public static string BetweenOf(string ActualStr, string StrFirst, string StrLast)
{
return ActualStr.Substring(ActualStr.IndexOf(StrFirst) + StrFirst.Length, (ActualStr.Substring(ActualStr.IndexOf(StrFirst))).IndexOf(StrLast) + StrLast.Length);
}
How can i optimise this ?
回答1:
Here's how the code from @Chris here stacks up against a regular expression test:
void Main()
{
string input = "abcdefghijklmnopq";
string first = "de";
string last = "op";
Regex re1 = new Regex("de(.*)op", RegexOptions.None);
Regex re2 = new Regex("de(.*)op", RegexOptions.Compiled);
// pass 1 is JIT preheat
for (int pass = 1; pass <= 2; pass++)
{
int iterations = 1000000;
if (pass == 1)
iterations = 1;
Stopwatch sw = Stopwatch.StartNew();
for (int index = 0; index < iterations; index++)
BetweenOfFixed(input, first, last);
sw.Stop();
if (pass == 2)
Debug.WriteLine("IndexOf: " +
sw.ElapsedMilliseconds + "ms");
sw = Stopwatch.StartNew();
for (int index = 0; index < iterations; index++)
BetweenOfRegexAdhoc(input, first, last);
sw.Stop();
if (pass == 2)
Debug.WriteLine("Regex adhoc: " +
sw.ElapsedMilliseconds + "ms");
sw = Stopwatch.StartNew();
for (int index = 0; index < iterations; index++)
BetweenOfRegexCached(input, first, last);
sw.Stop();
if (pass == 2)
Debug.WriteLine("Regex uncompiled: " +
sw.ElapsedMilliseconds + "ms");
sw = Stopwatch.StartNew();
for (int index = 0; index < iterations; index++)
BetweenOfRegexCompiled(input, first, last);
sw.Stop();
if (pass == 2)
Debug.WriteLine("Regex compiled: " +
sw.ElapsedMilliseconds + "ms");
}
}
public static string BetweenOfFixed(string ActualStr, string StrFirst,
string StrLast)
{
int startIndex = ActualStr.IndexOf(StrFirst) + StrFirst.Length;
int endIndex = ActualStr.IndexOf(StrLast, startIndex);
return ActualStr.Substring(startIndex, endIndex - startIndex);
}
public static string BetweenOfRegexAdhoc(string ActualStr, string StrFirst,
string StrLast)
{
// I'm assuming you don't replace the delimiters on every call
Regex re = new Regex("de(.*)op", RegexOptions.None);
return re.Match(ActualStr).Groups[1].Value;
}
private static Regex _BetweenOfRegexCached =
new Regex("de(.*)op", RegexOptions.None);
public static string BetweenOfRegexCached(string ActualStr, string StrFirst,
string StrLast)
{
return _BetweenOfRegexCached.Match(ActualStr).Groups[1].Value;
}
private static Regex _BetweenOfRegexCompiled =
new Regex("de(.*)op", RegexOptions.Compiled);
public static string BetweenOfRegexCompiled(string ActualStr, string StrFirst,
string StrLast)
{
return _BetweenOfRegexCompiled.Match(ActualStr).Groups[1].Value;
}
Output:
IndexOf: 1419ms Regex adhoc: 7788ms Regex uncompiled: 1074ms Regex compiled: 682ms
回答2:
If I have understood what you want to do, I think your implementation is possibly not correct.
Here is an implementation that I believe will perform better at least in terms of the GC because it does not use multiple calls to SubString which create new strings on the heap which are only used temporarily.
public static string BetweenOfFixed(string ActualStr, string StrFirst, string StrLast)
{
int startIndex = ActualStr.IndexOf(StrFirst) + StrFirst.Length;
int endIndex = ActualStr.IndexOf(StrLast, startIndex);
return ActualStr.Substring(startIndex, endIndex - startIndex);
}
It would be interesting to compare the performance of this vs. the regex solution.
回答3:
You could construct a regex:
var regex = strFirst + "(.*)" + strLast;
Your between text will be the first (and only) capture for the match.
回答4:
What about using a regular expression? This would probably be faster than building of temporary strings. Also this would enable to easily and gently handle the case where no such string can be found.
来源:https://stackoverflow.com/questions/4912155/between-of-two-strings