What is a good complete regular expression or some other process that would take the title:
How do you change a title to be part of the URL like Stack
Here is my version of Jeff's code. I've made the following changes:
The case conversion is now also optional.
public static class Slug
{
public static string Create(bool toLower, params string[] values)
{
return Create(toLower, String.Join("-", values));
}
///
/// Creates a slug.
/// References:
/// http://www.unicode.org/reports/tr15/tr15-34.html
/// https://meta.stackexchange.com/questions/7435/non-us-ascii-characters-dropped-from-full-profile-url/7696#7696
/// https://stackoverflow.com/questions/25259/how-do-you-include-a-webpage-title-as-part-of-a-webpage-url/25486#25486
/// https://stackoverflow.com/questions/3769457/how-can-i-remove-accents-on-a-string
///
///
///
///
public static string Create(bool toLower, string value)
{
if (value == null)
return "";
var normalised = value.Normalize(NormalizationForm.FormKD);
const int maxlen = 80;
int len = normalised.Length;
bool prevDash = false;
var sb = new StringBuilder(len);
char c;
for (int i = 0; i < len; i++)
{
c = normalised[i];
if ((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9'))
{
if (prevDash)
{
sb.Append('-');
prevDash = false;
}
sb.Append(c);
}
else if (c >= 'A' && c <= 'Z')
{
if (prevDash)
{
sb.Append('-');
prevDash = false;
}
// Tricky way to convert to lowercase
if (toLower)
sb.Append((char)(c | 32));
else
sb.Append(c);
}
else if (c == ' ' || c == ',' || c == '.' || c == '/' || c == '\\' || c == '-' || c == '_' || c == '=')
{
if (!prevDash && sb.Length > 0)
{
prevDash = true;
}
}
else
{
string swap = ConvertEdgeCases(c, toLower);
if (swap != null)
{
if (prevDash)
{
sb.Append('-');
prevDash = false;
}
sb.Append(swap);
}
}
if (sb.Length == maxlen)
break;
}
return sb.ToString();
}
static string ConvertEdgeCases(char c, bool toLower)
{
string swap = null;
switch (c)
{
case 'ı':
swap = "i";
break;
case 'ł':
swap = "l";
break;
case 'Ł':
swap = toLower ? "l" : "L";
break;
case 'đ':
swap = "d";
break;
case 'ß':
swap = "ss";
break;
case 'ø':
swap = "o";
break;
case 'Þ':
swap = "th";
break;
}
return swap;
}
}
For more details, the unit tests, and an explanation of why Facebook's URL scheme is a little smarter than Stack Overflows, I've got an expanded version of this on my blog.