I\'d like to write an extension method for the .NET String class. I\'d like it to be a special varation on the Split method - one that takes an escape character to prevent s
How about:
public static IEnumerable<string> Split(this string input,
string separator,
char escapeCharacter)
{
int startOfSegment = 0;
int index = 0;
while (index < input.Length)
{
index = input.IndexOf(separator, index);
if (index > 0 && input[index-1] == escapeCharacter)
{
index += separator.Length;
continue;
}
if (index == -1)
{
break;
}
yield return input.Substring(startOfSegment, index-startOfSegment);
index += separator.Length;
startOfSegment = index;
}
yield return input.Substring(startOfSegment);
}
That seems to work (with a few quick test strings), but it doesn't remove the escape character - that will depend on your exact situation, I suspect.
My first observation is that the separator ought to be a char not a string since escaping a string using a single character may be hard -- how much of the following string does the escape character cover? Other than that, @James Curran's answer is pretty much how I would handle it - though, as he says it needs some clean up. Initializing j to 0 in the loop initializer, for instance. Figuring out how to handle null inputs, etc.
You probably want to also support StringSplitOptions and specify whether empty string should be returned in the collection.
public string RemoveMultipleDelimiters(string sSingleLine)
{
string sMultipleDelimitersLine = "";
string sMultipleDelimitersLine1 = "";
int iDelimeterPosition = -1;
iDelimeterPosition = sSingleLine.IndexOf('>');
iDelimeterPosition = sSingleLine.IndexOf('>', iDelimeterPosition + 1);
if (iDelimeterPosition > -1)
{
sMultipleDelimitersLine = sSingleLine.Substring(0, iDelimeterPosition - 1);
sMultipleDelimitersLine1 = sSingleLine.Substring(sSingleLine.IndexOf('>', iDelimeterPosition) - 1);
sMultipleDelimitersLine1 = sMultipleDelimitersLine1.Replace('>', '*');
sSingleLine = sMultipleDelimitersLine + sMultipleDelimitersLine1;
}
return sSingleLine;
}
You can try something like this. Although, I would suggest implementing with unsafe code for performance critical tasks.
public static class StringExtensions
{
public static string[] Split(this string text, char escapeChar, params char[] seperator)
{
return Split(text, escapeChar, seperator, int.MaxValue, StringSplitOptions.None);
}
public static string[] Split(this string text, char escapeChar, char[] seperator, int count)
{
return Split(text, escapeChar, seperator, count, StringSplitOptions.None);
}
public static string[] Split(this string text, char escapeChar, char[] seperator, StringSplitOptions options)
{
return Split(text, escapeChar, seperator, int.MaxValue, options);
}
public static string[] Split(this string text, char escapeChar, char[] seperator, int count, StringSplitOptions options)
{
if (text == null)
{
throw new ArgumentNullException("text");
}
if (text.Length == 0)
{
return new string[0];
}
var segments = new List<string>();
bool previousCharIsEscape = false;
var segment = new StringBuilder();
for (int i = 0; i < text.Length; i++)
{
if (previousCharIsEscape)
{
previousCharIsEscape = false;
if (seperator.Contains(text[i]))
{
// Drop the escape character when it escapes a seperator character.
segment.Append(text[i]);
continue;
}
// Retain the escape character when it escapes any other character.
segment.Append(escapeChar);
segment.Append(text[i]);
continue;
}
if (text[i] == escapeChar)
{
previousCharIsEscape = true;
continue;
}
if (seperator.Contains(text[i]))
{
if (options != StringSplitOptions.RemoveEmptyEntries || segment.Length != 0)
{
// Only add empty segments when options allow.
segments.Add(segment.ToString());
}
segment = new StringBuilder();
continue;
}
segment.Append(text[i]);
}
if (options != StringSplitOptions.RemoveEmptyEntries || segment.Length != 0)
{
// Only add empty segments when options allow.
segments.Add(segment.ToString());
}
return segments.ToArray();
}
}
This will need to be cleaned up a bit, but this is essentially it....
List<string> output = new List<string>();
for(int i=0; i<input.length; ++i)
{
if (input[i] == separator && (i==0 || input[i-1] != escapeChar))
{
output.Add(input.substring(j, i-j);
j=i;
}
}
return output.ToArray();
I had this problem as well and didn't find a solution. So I wrote such a method myself:
public static IEnumerable<string> Split(
this string text,
char separator,
char escapeCharacter)
{
var builder = new StringBuilder(text.Length);
bool escaped = false;
foreach (var ch in text)
{
if (separator == ch && !escaped)
{
yield return builder.ToString();
builder.Clear();
}
else
{
// separator is removed, escape characters are kept
builder.Append(ch);
}
// set escaped for next cycle,
// or reset unless escape character is escaped.
escaped = escapeCharacter == ch && !escaped;
}
yield return builder.ToString();
}
It goes in combination with Escape and Unescape, which escapes the separator and escape character and removes escape characters again:
public static string Escape(this string text, string controlChars, char escapeCharacter)
{
var builder = new StringBuilder(text.Length + 3);
foreach (var ch in text)
{
if (controlChars.Contains(ch))
{
builder.Append(escapeCharacter);
}
builder.Append(ch);
}
return builder.ToString();
}
public static string Unescape(string text, char escapeCharacter)
{
var builder = new StringBuilder(text.Length);
bool escaped = false;
foreach (var ch in text)
{
escaped = escapeCharacter == ch && !escaped;
if (!escaped)
{
builder.Append(ch);
}
}
return builder.ToString();
}
Examples for escape / unescape
separator = ','
escapeCharacter = '\\'
//controlCharacters is always separator + escapeCharacter
@"AB,CD\EF\," <=> @"AB\,CD\\EF\\\,"
Split:
@"AB,CD\,EF\\,GH\\\,IJ" => [@"AB", @"CD\,EF\\", @"GH\\\,IJ"]
So to use it, Escape before Join, and Unescape after Split.