I know this is old and might be answers somewhere I couldn't find already, but since I couldn't find anything that works for me, this is what I came up with which I think works a lot like Google Contacts and Microsoft Outlook. It doesn't handle edge cases well, but for a good CRM type app, the user can always be asked to resolve those (in my app I actually have separate fields all the time, but I need this for data import from another app that only has one field):
public static void ParseName(this string s, out string prefix, out string first, out string middle, out string last, out string suffix)
{
prefix = "";
first = "";
middle = "";
last = "";
suffix = "";
// Split on period, commas or spaces, but don't remove from results.
List<string> parts = Regex.Split(s, @"(?<=[., ])").ToList();
// Remove any empty parts
for (int x = parts.Count - 1; x >= 0; x--)
if (parts[x].Trim() == "")
parts.RemoveAt(x);
if (parts.Count > 0)
{
// Might want to add more to this list
string[] prefixes = { "mr", "mrs", "ms", "dr", "miss", "sir", "madam", "mayor", "president" };
// If first part is a prefix, set prefix and remove part
string normalizedPart = parts.First().Replace(".", "").Replace(",", "").Trim().ToLower();
if (prefixes.Contains(normalizedPart))
{
prefix = parts[0].Trim();
parts.RemoveAt(0);
}
}
if (parts.Count > 0)
{
// Might want to add more to this list, or use code/regex for roman-numeral detection
string[] suffixes = { "jr", "sr", "i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix", "x", "xi", "xii", "xiii", "xiv", "xv" };
// If last part is a suffix, set suffix and remove part
string normalizedPart = parts.Last().Replace(".", "").Replace(",", "").Trim().ToLower();
if (suffixes.Contains(normalizedPart))
{
suffix = parts.Last().Replace(",", "").Trim();
parts.RemoveAt(parts.Count - 1);
}
}
// Done, if no more parts
if (parts.Count == 0)
return;
// If only one part left...
if (parts.Count == 1)
{
// If no prefix, assume first name, otherwise last
// i.e.- "Dr Jones", "Ms Jones" -- likely to be last
if(prefix == "")
first = parts.First().Replace(",", "").Trim();
else
last = parts.First().Replace(",", "").Trim();
}
// If first part ends with a comma, assume format:
// Last, First [...First...]
else if (parts.First().EndsWith(","))
{
last = parts.First().Replace(",", "").Trim();
for (int x = 1; x < parts.Count; x++)
first += parts[x].Replace(",", "").Trim() + " ";
first = first.Trim();
}
// Otherwise assume format:
// First [...Middle...] Last
else
{
first = parts.First().Replace(",", "").Trim();
last = parts.Last().Replace(",", "").Trim();
for (int x = 1; x < parts.Count - 1; x++)
middle += parts[x].Replace(",", "").Trim() + " ";
middle = middle.Trim();
}
}
Sorry that the code is long and ugly, I haven't gotten around to cleaning it up. It is a C# extension, so you would use it like:
string name = "Miss Jessica Dark-Angel Alba";
string prefix, first, middle, last, suffix;
name.ParseName(out prefix, out first, out middle, out last, out suffix);