Definition
From Wikipedia:
A slug is the part of a URL which identifies a page using human-readable keywords.
To make the URL easier for users to type, special characters are often removed or replaced as well. For instance, accented characters are usually replaced by letters from the English alphabet; punctuation marks are generally removed; and spaces (which have to be encoded as %20 or +) are replaced by dashes (-) or underscores (_), which are more aesthetically pleasing.
Context
I developed a photo-sharing website on which users can upload, share and view photos.
All pages are generated automatically without my grip on the title. Because the title of a photo or the name of a user may contain accented characters or spaces, I needed a function to automatically create slugs and keep readable URLs.
I created the following function which replaces accented characters (âèêëçî), removes punctuation and bad characters (#@&~^!) and transforms spaces in dashes.
Questions
- What do you think about this function?
- Do you know any other functions to create slugs?
Code
php:
function sluggable($str) {
$before = array(
'àáâãäåòóôõöøèéêëðçìíîïùúûüñšž',
'/[^a-z0-9\s]/',
array('/\s/', '/--+/', '/---+/')
);
$after = array(
'aaaaaaooooooeeeeeciiiiuuuunsz',
'',
'-'
);
$str = strtolower($str);
$str = strtr($str, $before[0], $after[0]);
$str = preg_replace($before[1], $after[1], $str);
$str = trim($str);
$str = preg_replace($before[2], $after[2], $str);
return $str;
}
It seems ok, maybe it's incomplete. Check http://code.google.com/p/php-slugs/ for a code example.
I like the php-slugs code at google code solution. But if you want a simpler one that works with UTF-8:
function format_uri( $string, $separator = '-' )
{
$accents_regex = '~&([a-z]{1,2})(?:acute|cedil|circ|grave|lig|orn|ring|slash|th|tilde|uml);~i';
$special_cases = array( '&' => 'and', "'" => '');
$string = mb_strtolower( trim( $string ), 'UTF-8' );
$string = str_replace( array_keys($special_cases), array_values( $special_cases), $string );
$string = preg_replace( $accents_regex, '$1', htmlentities( $string, ENT_QUOTES, 'UTF-8' ) );
$string = preg_replace("/[^a-z0-9]/u", "$separator", $string);
$string = preg_replace("/[$separator]+/u", "$separator", $string);
return $string;
}
So
echo format_uri("#@&~^!âèêëçî");
outputs
-and-aeeeci
Please, comment if you find some errors
A few people have linked to "php-slugs" on google.com, but it looks like their page is a little screwy now, so here it is if anyone needs it:
// source: https://code.google.com/archive/p/php-slugs/
function my_str_split($string)
{
$slen=strlen($string);
for($i=0; $i<$slen; $i++)
{
$sArray[$i]=$string{$i};
}
return $sArray;
}
function noDiacritics($string)
{
//cyrylic transcription
$cyrylicFrom = array('А', 'Б', 'В', 'Г', 'Д', 'Е', 'Ё', 'Ж', 'З', 'И', 'Й', 'К', 'Л', 'М', 'Н', 'О', 'П', 'Р', 'С', 'Т', 'У', 'Ф', 'Х', 'Ц', 'Ч', 'Ш', 'Щ', 'Ъ', 'Ы', 'Ь', 'Э', 'Ю', 'Я', 'а', 'б', 'в', 'г', 'д', 'е', 'ё', 'ж', 'з', 'и', 'й', 'к', 'л', 'м', 'н', 'о', 'п', 'р', 'с', 'т', 'у', 'ф', 'х', 'ц', 'ч', 'ш', 'щ', 'ъ', 'ы', 'ь', 'э', 'ю', 'я');
$cyrylicTo = array('A', 'B', 'W', 'G', 'D', 'Ie', 'Io', 'Z', 'Z', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'R', 'S', 'T', 'U', 'F', 'Ch', 'C', 'Tch', 'Sh', 'Shtch', '', 'Y', '', 'E', 'Iu', 'Ia', 'a', 'b', 'w', 'g', 'd', 'ie', 'io', 'z', 'z', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'r', 's', 't', 'u', 'f', 'ch', 'c', 'tch', 'sh', 'shtch', '', 'y', '', 'e', 'iu', 'ia');
$from = array("Á", "À", "Â", "Ä", "Ă", "Ā", "Ã", "Å", "Ą", "Æ", "Ć", "Ċ", "Ĉ", "Č", "Ç", "Ď", "Đ", "Ð", "É", "È", "Ė", "Ê", "Ë", "Ě", "Ē", "Ę", "Ə", "Ġ", "Ĝ", "Ğ", "Ģ", "á", "à", "â", "ä", "ă", "ā", "ã", "å", "ą", "æ", "ć", "ċ", "ĉ", "č", "ç", "ď", "đ", "ð", "é", "è", "ė", "ê", "ë", "ě", "ē", "ę", "ə", "ġ", "ĝ", "ğ", "ģ", "Ĥ", "Ħ", "I", "Í", "Ì", "İ", "Î", "Ï", "Ī", "Į", "IJ", "Ĵ", "Ķ", "Ļ", "Ł", "Ń", "Ň", "Ñ", "Ņ", "Ó", "Ò", "Ô", "Ö", "Õ", "Ő", "Ø", "Ơ", "Œ", "ĥ", "ħ", "ı", "í", "ì", "i", "î", "ï", "ī", "į", "ij", "ĵ", "ķ", "ļ", "ł", "ń", "ň", "ñ", "ņ", "ó", "ò", "ô", "ö", "õ", "ő", "ø", "ơ", "œ", "Ŕ", "Ř", "Ś", "Ŝ", "Š", "Ş", "Ť", "Ţ", "Þ", "Ú", "Ù", "Û", "Ü", "Ŭ", "Ū", "Ů", "Ų", "Ű", "Ư", "Ŵ", "Ý", "Ŷ", "Ÿ", "Ź", "Ż", "Ž", "ŕ", "ř", "ś", "ŝ", "š", "ş", "ß", "ť", "ţ", "þ", "ú", "ù", "û", "ü", "ŭ", "ū", "ů", "ų", "ű", "ư", "ŵ", "ý", "ŷ", "ÿ", "ź", "ż", "ž");
$to = array("A", "A", "A", "AE", "A", "A", "A", "A", "A", "AE", "C", "C", "C", "C", "C", "D", "D", "D", "E", "E", "E", "E", "E", "E", "E", "E", "G", "G", "G", "G", "G", "a", "a", "a", "ae", "ae", "a", "a", "a", "a", "ae", "c", "c", "c", "c", "c", "d", "d", "d", "e", "e", "e", "e", "e", "e", "e", "e", "g", "g", "g", "g", "g", "H", "H", "I", "I", "I", "I", "I", "I", "I", "I", "IJ", "J", "K", "L", "L", "N", "N", "N", "N", "O", "O", "O", "OE", "O", "O", "O", "O", "CE", "h", "h", "i", "i", "i", "i", "i", "i", "i", "i", "ij", "j", "k", "l", "l", "n", "n", "n", "n", "o", "o", "o", "oe", "o", "o", "o", "o", "o", "R", "R", "S", "S", "S", "S", "T", "T", "T", "U", "U", "U", "UE", "U", "U", "U", "U", "U", "U", "W", "Y", "Y", "Y", "Z", "Z", "Z", "r", "r", "s", "s", "s", "s", "ss", "t", "t", "b", "u", "u", "u", "ue", "u", "u", "u", "u", "u", "u", "w", "y", "y", "y", "z", "z", "z");
$from = array_merge($from, $cyrylicFrom);
$to = array_merge($to, $cyrylicTo);
$newstring=str_replace($from, $to, $string);
return $newstring;
}
function makeSlugs($string, $maxlen=0)
{
$newStringTab=array();
$string=strtolower(noDiacritics($string));
if(function_exists('str_split'))
{
$stringTab=str_split($string);
}
else
{
$stringTab=my_str_split($string);
}
$numbers=array("0","1","2","3","4","5","6","7","8","9","-");
//$numbers=array("0","1","2","3","4","5","6","7","8","9");
foreach($stringTab as $letter)
{
if(in_array($letter, range("a", "z")) || in_array($letter, $numbers))
{
$newStringTab[]=$letter;
}
elseif($letter==" ")
{
$newStringTab[]="-";
}
}
if(count($newStringTab))
{
$newString=implode($newStringTab);
if($maxlen>0)
{
$newString=substr($newString, 0, $maxlen);
}
$newString = removeDuplicates('--', '-', $newString);
}
else
{
$newString='';
}
return $newString;
}
function checkSlug($sSlug)
{
if(preg_match("/^[a-zA-Z0-9]+[a-zA-Z0-9\-]*$/", $sSlug) == 1)
{
return true;
}
return false;
}
function removeDuplicates($sSearch, $sReplace, $sSubject)
{
$i=0;
do{
$sSubject=str_replace($sSearch, $sReplace, $sSubject);
$pos=strpos($sSubject, $sSearch);
$i++;
if($i>100)
{
die('removeDuplicates() loop error');
}
}while($pos!==false);
return $sSubject;
}
setlocale(LC_ALL, 'en_US.UTF8');
function slugify($text)
{
// replace non letter or digits by -
$text = preg_replace('~[^\\pL\d]+~u', '-', $text);
// trim
$text = trim($text, '-');
// transliterate
$text = iconv('utf-8', 'us-ascii//TRANSLIT', $text);
// lowercase
$text = strtolower($text);
// remove unwanted characters
$text = preg_replace('~[^-\w]+~', '', $text);
if (empty($text))
{
return 'n-a';
}
return $text;
}
$slug = slugify($var);
I found this on the net, does exactly as you want, but keeps the case.
function sluggable($p) {
$ts = array("/[À-Å]/","/Æ/","/Ç/","/[È-Ë]/","/[Ì-Ï]/","/Ð/","/Ñ/","/[Ò-ÖØ]/","/×/","/[Ù-Ü]/","/[Ý-ß]/","/[à-å]/","/æ/","/ç/","/[è-ë]/","/[ì-ï]/","/ð/","/ñ/","/[ò-öø]/","/÷/","/[ù-ü]/","/[ý-ÿ]/");
$tn = array("A","AE","C","E","I","D","N","O","X","U","Y","a","ae","c","e","i","d","n","o","x","u","y");
return preg_replace($ts,$tn, $p);
}
This really works fine. Returns correct clean url slug.
$string = '(1234) S*m@#ith S)&+*t `E}{xam)ple?>land - - 1!_2)#3)(*4""5';
// remove all non alphanumeric characters except spaces
$clean = preg_replace('/[^a-zA-Z0-9\s]/', '', strtolower($string));
// replace one or multiple spaces into single dash (-)
$clean = preg_replace('!\s+!', '-', $clean);
echo $clean; // 1234-smith-st-exampleland-12345
function remove_accents($string)
{
$a = 'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûýýþÿŔŕ';
$b = 'aaaaaaaceeeeiiiidnoooooouuuuybsaaaaaaaceeeeiiiidnoooooouuuyybyRr';
$string = strtr(utf8_decode($string), utf8_decode($a), $b);
return utf8_encode($string);
}
function format_slug($title)
{
$title = remove_accents($title);
$title = trim(strtolower($title));
$title = preg_replace('#[^a-z0-9\\-/]#i', '_', $title);
return trim(preg_replace('/-+/', '-', $title), '-/');
}
use : echo format_slug($var);
This group of functions converts special characters to only include a-z
, 0-9
, and -
. It also does a couple extra things such as convert ampersands (&
) to the word and
.
<?php
/* Call this function to create a slug from $string */
function create_slug($string){
$string = remove_accents($string);
$string = symbols_to_words($string);
$string = strtolower($string); // Force lowercase
$space_chars = array(
" ", // space
"…", // ellipsis
"–", // en dash
"—", // em dash
"/", // slash
"\\", // backslash
":", // colon
";", // semi-colon
".", // period
"+", // plus sign
"#", // pound sign
"~", // tilde
"_", // underscore
"|", // pipe
);
foreach($space_chars as $char){
$string = str_replace($char, '-', $string); // Change spaces to dashes
}
// Only allow letters, numbers, and dashes
$string = preg_replace('/([^a-zA-Z0-9\-]+)/', '', $string);
$string = preg_replace('/-+/', '-', $string); // Clean up extra dashes
if(substr($string, -1)==='-'){ // Remove - from end
$string = substr($string, 0, -1);
}
if(substr($string, 0, 1)==='-'){ // Remove - from start
$string = substr($string, 1);
}
return $string;
}
/**
* Borrowed from WordPress
* Converts all accent characters to ASCII characters.
*
* If there are no accent characters, then the string given is just returned.
*
* @since 1.2.1
*
* @param string $string Text that might have accent characters
* @return string Filtered string with replaced "nice" characters.
*/
function remove_accents($string) {
if(!preg_match('/[\x80-\xff]/', $string)){
return $string;
}
if($this->seems_utf8($string)){
$chars = array(
// Decompositions for Latin-1 Supplement
chr(195).chr(128) => 'A', chr(195).chr(129) => 'A',
chr(195).chr(130) => 'A', chr(195).chr(131) => 'A',
chr(195).chr(132) => 'A', chr(195).chr(133) => 'A',
chr(195).chr(135) => 'C', chr(195).chr(136) => 'E',
chr(195).chr(137) => 'E', chr(195).chr(138) => 'E',
chr(195).chr(139) => 'E', chr(195).chr(140) => 'I',
chr(195).chr(141) => 'I', chr(195).chr(142) => 'I',
chr(195).chr(143) => 'I', chr(195).chr(145) => 'N',
chr(195).chr(146) => 'O', chr(195).chr(147) => 'O',
chr(195).chr(148) => 'O', chr(195).chr(149) => 'O',
chr(195).chr(150) => 'O', chr(195).chr(153) => 'U',
chr(195).chr(154) => 'U', chr(195).chr(155) => 'U',
chr(195).chr(156) => 'U', chr(195).chr(157) => 'Y',
chr(195).chr(159) => 's', chr(195).chr(160) => 'a',
chr(195).chr(161) => 'a', chr(195).chr(162) => 'a',
chr(195).chr(163) => 'a', chr(195).chr(164) => 'a',
chr(195).chr(165) => 'a', chr(195).chr(167) => 'c',
chr(195).chr(168) => 'e', chr(195).chr(169) => 'e',
chr(195).chr(170) => 'e', chr(195).chr(171) => 'e',
chr(195).chr(172) => 'i', chr(195).chr(173) => 'i',
chr(195).chr(174) => 'i', chr(195).chr(175) => 'i',
chr(195).chr(177) => 'n', chr(195).chr(178) => 'o',
chr(195).chr(179) => 'o', chr(195).chr(180) => 'o',
chr(195).chr(181) => 'o', chr(195).chr(182) => 'o',
chr(195).chr(182) => 'o', chr(195).chr(185) => 'u',
chr(195).chr(186) => 'u', chr(195).chr(187) => 'u',
chr(195).chr(188) => 'u', chr(195).chr(189) => 'y',
chr(195).chr(191) => 'y',
// Decompositions for Latin Extended-A
chr(196).chr(128) => 'A', chr(196).chr(129) => 'a',
chr(196).chr(130) => 'A', chr(196).chr(131) => 'a',
chr(196).chr(132) => 'A', chr(196).chr(133) => 'a',
chr(196).chr(134) => 'C', chr(196).chr(135) => 'c',
chr(196).chr(136) => 'C', chr(196).chr(137) => 'c',
chr(196).chr(138) => 'C', chr(196).chr(139) => 'c',
chr(196).chr(140) => 'C', chr(196).chr(141) => 'c',
chr(196).chr(142) => 'D', chr(196).chr(143) => 'd',
chr(196).chr(144) => 'D', chr(196).chr(145) => 'd',
chr(196).chr(146) => 'E', chr(196).chr(147) => 'e',
chr(196).chr(148) => 'E', chr(196).chr(149) => 'e',
chr(196).chr(150) => 'E', chr(196).chr(151) => 'e',
chr(196).chr(152) => 'E', chr(196).chr(153) => 'e',
chr(196).chr(154) => 'E', chr(196).chr(155) => 'e',
chr(196).chr(156) => 'G', chr(196).chr(157) => 'g',
chr(196).chr(158) => 'G', chr(196).chr(159) => 'g',
chr(196).chr(160) => 'G', chr(196).chr(161) => 'g',
chr(196).chr(162) => 'G', chr(196).chr(163) => 'g',
chr(196).chr(164) => 'H', chr(196).chr(165) => 'h',
chr(196).chr(166) => 'H', chr(196).chr(167) => 'h',
chr(196).chr(168) => 'I', chr(196).chr(169) => 'i',
chr(196).chr(170) => 'I', chr(196).chr(171) => 'i',
chr(196).chr(172) => 'I', chr(196).chr(173) => 'i',
chr(196).chr(174) => 'I', chr(196).chr(175) => 'i',
chr(196).chr(176) => 'I', chr(196).chr(177) => 'i',
chr(196).chr(178) => 'IJ',chr(196).chr(179) => 'ij',
chr(196).chr(180) => 'J', chr(196).chr(181) => 'j',
chr(196).chr(182) => 'K', chr(196).chr(183) => 'k',
chr(196).chr(184) => 'k', chr(196).chr(185) => 'L',
chr(196).chr(186) => 'l', chr(196).chr(187) => 'L',
chr(196).chr(188) => 'l', chr(196).chr(189) => 'L',
chr(196).chr(190) => 'l', chr(196).chr(191) => 'L',
chr(197).chr(128) => 'l', chr(197).chr(129) => 'L',
chr(197).chr(130) => 'l', chr(197).chr(131) => 'N',
chr(197).chr(132) => 'n', chr(197).chr(133) => 'N',
chr(197).chr(134) => 'n', chr(197).chr(135) => 'N',
chr(197).chr(136) => 'n', chr(197).chr(137) => 'N',
chr(197).chr(138) => 'n', chr(197).chr(139) => 'N',
chr(197).chr(140) => 'O', chr(197).chr(141) => 'o',
chr(197).chr(142) => 'O', chr(197).chr(143) => 'o',
chr(197).chr(144) => 'O', chr(197).chr(145) => 'o',
chr(197).chr(146) => 'OE',chr(197).chr(147) => 'oe',
chr(197).chr(148) => 'R',chr(197).chr(149) => 'r',
chr(197).chr(150) => 'R',chr(197).chr(151) => 'r',
chr(197).chr(152) => 'R',chr(197).chr(153) => 'r',
chr(197).chr(154) => 'S',chr(197).chr(155) => 's',
chr(197).chr(156) => 'S',chr(197).chr(157) => 's',
chr(197).chr(158) => 'S',chr(197).chr(159) => 's',
chr(197).chr(160) => 'S', chr(197).chr(161) => 's',
chr(197).chr(162) => 'T', chr(197).chr(163) => 't',
chr(197).chr(164) => 'T', chr(197).chr(165) => 't',
chr(197).chr(166) => 'T', chr(197).chr(167) => 't',
chr(197).chr(168) => 'U', chr(197).chr(169) => 'u',
chr(197).chr(170) => 'U', chr(197).chr(171) => 'u',
chr(197).chr(172) => 'U', chr(197).chr(173) => 'u',
chr(197).chr(174) => 'U', chr(197).chr(175) => 'u',
chr(197).chr(176) => 'U', chr(197).chr(177) => 'u',
chr(197).chr(178) => 'U', chr(197).chr(179) => 'u',
chr(197).chr(180) => 'W', chr(197).chr(181) => 'w',
chr(197).chr(182) => 'Y', chr(197).chr(183) => 'y',
chr(197).chr(184) => 'Y', chr(197).chr(185) => 'Z',
chr(197).chr(186) => 'z', chr(197).chr(187) => 'Z',
chr(197).chr(188) => 'z', chr(197).chr(189) => 'Z',
chr(197).chr(190) => 'z', chr(197).chr(191) => 's',
// Euro Sign
chr(226).chr(130).chr(172) => 'E',
// GBP (Pound) Sign
chr(194).chr(163) => '');
$string = strtr($string, $chars);
} else {
// Assume ISO-8859-1 if not UTF-8
$chars['in'] = chr(128).chr(131).chr(138).chr(142).chr(154).chr(158)
.chr(159).chr(162).chr(165).chr(181).chr(192).chr(193).chr(194)
.chr(195).chr(196).chr(197).chr(199).chr(200).chr(201).chr(202)
.chr(203).chr(204).chr(205).chr(206).chr(207).chr(209).chr(210)
.chr(211).chr(212).chr(213).chr(214).chr(216).chr(217).chr(218)
.chr(219).chr(220).chr(221).chr(224).chr(225).chr(226).chr(227)
.chr(228).chr(229).chr(231).chr(232).chr(233).chr(234).chr(235)
.chr(236).chr(237).chr(238).chr(239).chr(241).chr(242).chr(243)
.chr(244).chr(245).chr(246).chr(248).chr(249).chr(250).chr(251)
.chr(252).chr(253).chr(255);
$chars['out'] = "EfSZszYcYuAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy";
$string = strtr($string, $chars['in'], $chars['out']);
$double_chars['in'] = array(chr(140), chr(156), chr(198), chr(208), chr(222), chr(223), chr(230), chr(240), chr(254));
$double_chars['out'] = array('OE', 'oe', 'AE', 'DH', 'TH', 'ss', 'ae', 'dh', 'th');
$string = str_replace($double_chars['in'], $double_chars['out'], $string);
}
return $string;
}
function symbols_to_words($output){
$output = str_replace('@', ' at ', $output);
$output = str_replace('%', ' percent ', $output);
$output = str_replace('&', ' and ', $output);
return $output;
}
it means be SEO friendly permalink. Keep your URL small and SEO friendly using keywords. You can ignore prepositions in post permalinks. Don’t repeat any keyword here
function seourl($phrase, $maxLength = 100000000000000) {
$result = strtolower($phrase);
$result = preg_replace("~[^A-Za-z0-9-\s]~", "", $result);
$result = trim(preg_replace("~[\s-]+~", " ", $result));
$result = trim(substr($result, 0, $maxLength));
$result = preg_replace("~\s~", "-", $result);
return $result;
}
来源:https://stackoverflow.com/questions/5305879/generate-seo-friendly-urls-slugs