I need to replace accents in the string to their english equivalents
for example
ä = ae
ö = oe
Ö = Oe
ü = ue
I know to strip
This class removes diacritic characters (é, ì, è, etc.) and replaces umlauts and the German "ß" with their equivalents "ae (ä)", "oe (ö)", "ue (ü)" and "ss (ß)".
public sealed class UmlautConverter
{
private Dictionary converter = new Dictionary()
{
{ 'ä', "ae" },
{ 'Ä', "AE" },
{ 'ö', "oe" },
{ 'Ö', "OE" },
{ 'ü', "ue" },
{ 'Ü', "UE" },
{ 'ß', "ss" }
};
string value = null;
public UmlautConverter(string value)
{
if (!string.IsNullOrWhiteSpace(value))
{
this.value = value;
}
}
public string RemoveDiacritics()
{
if (string.IsNullOrWhiteSpace(value))
{
return null;
}
string normalizedString = this.value.Normalize();
foreach (KeyValuePair item in this.converter)
{
string temp = normalizedString;
normalizedString = temp.Replace(item.Key.ToString(), item.Value);
}
StringBuilder stringBuilder = new StringBuilder();
for (int i = 0; i < normalizedString.Length; i++)
{
normalizedString = normalizedString.Normalize(NormalizationForm.FormD);
string c = normalizedString[i].ToString();
if (CharUnicodeInfo.GetUnicodeCategory(Convert.ToChar(c)) != UnicodeCategory.NonSpacingMark)
{
stringBuilder.Append(c);
}
}
return stringBuilder.ToString();
}
public bool HasUmlaut()
{
if (string.IsNullOrWhiteSpace(value))
{
return false;
}
foreach (KeyValuePair item in this.converter)
{
if (this.value.Contains(item.Key.ToString()))
{
return true;
}
}
return false;
}
}
Usage:
Console.WriteLine(new UmlautConverter("Nürnberger Straße").RemoveDiacritics()); // Nuernberger Strasse
Console.WriteLine(new UmlautConverter("Größenwahn").RemoveDiacritics()); // Groessenwahn
Console.WriteLine(new UmlautConverter("Übermut").RemoveDiacritics()); // UEbermut
Console.WriteLine(new UmlautConverter("Università").RemoveDiacritics()); // Universita
Console.WriteLine(new UmlautConverter("Perché").RemoveDiacritics());// Perche
Console.WriteLine(new UmlautConverter("être").RemoveDiacritics()); // etre
There is a minor bug in the "Übermut" case replacing "Ü" with "UE" instead of Ue". But this can be easily fixed. Enjoy :)