问题
I found this slugify function over here: PHP function to make slug (URL string)
And I tried to rewrite it to MySQL, that what I've done is:
- borrowed regex_replace function from: https://techras.wordpress.com/2011/06/02/regex-replace-for-mysql/
- and transliterate function from: http://igstan.ro/posts/2009-02-13-mysql-transliteration-function.html
- rewrote mentioned at the top as correct answer function from symfony fw to the last bit of mysql create function, function.
, but at the output I'm getting only lowered text, no dashes and letters seem to be transliterated as well, so only thing left is dashes.
My query:
UPDATE `ad_kategorija` SET `slug_lt`=slugify(`kat_pavlt`), `slug_ru`=slugify(`kat_pavru`), `slug_en`=slugify(`kat_paven`)
Functions:
# Regex Replace function
DELIMITER $$
CREATE DEFINER=`root`@`localhost` FUNCTION `regex_replace`(pattern VARCHAR(1000),replacement VARCHAR(1000),original VARCHAR(1000)) RETURNS varchar(1000) CHARSET utf8
DETERMINISTIC
BEGIN
DECLARE temp VARCHAR(1000);
DECLARE ch VARCHAR(1);
DECLARE i INT;
SET i = 1;
SET temp = '';
IF original REGEXP pattern THEN
loop_label: LOOP
IF i>CHAR_LENGTH(original) THEN
LEAVE loop_label;
END IF;
SET ch = SUBSTRING(original,i,1);
IF NOT ch REGEXP pattern THEN
SET temp = CONCAT(temp,ch);
ELSE
SET temp = CONCAT(temp,replacement);
END IF;
SET i=i+1;
END LOOP;
ELSE
SET temp = original;
END IF;
RETURN temp;
END$$
# Transliteration function
CREATE DEFINER=`root`@`localhost` FUNCTION `transliterate`(original VARCHAR(512)) RETURNS varchar(512) CHARSET utf8
BEGIN
DECLARE translit VARCHAR(512) DEFAULT '';
DECLARE len INT(3) DEFAULT 0;
DECLARE pos INT(3) DEFAULT 1;
DECLARE letter CHAR(1);
DECLARE is_lower BIT;
SET len = CHAR_LENGTH(original);
WHILE (pos <= len) DO
SET letter = SUBSTRING(original, pos, 1);
SET is_lower = IF(LCASE(letter) COLLATE utf8_bin = letter COLLATE utf8_bin, 1, 0);
CASE TRUE
WHEN letter = 'a' THEN SET letter = IF(is_lower, 'a', 'A');
WHEN letter = 'b' THEN SET letter = IF(is_lower, 'b', 'B');
WHEN letter = 'c' THEN SET letter = IF(is_lower, 'c', 'C');
WHEN letter = 'd' THEN SET letter = IF(is_lower, 'd', 'D');
WHEN letter = 'e' THEN SET letter = IF(is_lower, 'e', 'E');
WHEN letter = 'f' THEN SET letter = IF(is_lower, 'f', 'F');
WHEN letter = 'g' THEN SET letter = IF(is_lower, 'g', 'G');
WHEN letter = 'h' THEN SET letter = IF(is_lower, 'h', 'H');
WHEN letter = 'i' THEN SET letter = IF(is_lower, 'i', 'I');
WHEN letter = 'j' THEN SET letter = IF(is_lower, 'j', 'J');
WHEN letter = 'k' THEN SET letter = IF(is_lower, 'k', 'K');
WHEN letter = 'l' THEN SET letter = IF(is_lower, 'l', 'L');
WHEN letter = 'ł' THEN SET letter = IF(is_lower, 'l', 'L');
WHEN letter = 'm' THEN SET letter = IF(is_lower, 'm', 'M');
WHEN letter = 'n' THEN SET letter = IF(is_lower, 'n', 'N');
WHEN letter = 'o' THEN SET letter = IF(is_lower, 'o', 'O');
WHEN letter = 'p' THEN SET letter = IF(is_lower, 'p', 'P');
WHEN letter = 'q' THEN SET letter = IF(is_lower, 'q', 'Q');
WHEN letter = 'r' THEN SET letter = IF(is_lower, 'r', 'R');
WHEN letter = 's' THEN SET letter = IF(is_lower, 's', 'S');
WHEN letter = 't' THEN SET letter = IF(is_lower, 't', 'T');
WHEN letter = 'u' THEN SET letter = IF(is_lower, 'u', 'U');
WHEN letter = 'v' THEN SET letter = IF(is_lower, 'v', 'V');
WHEN letter = 'w' THEN SET letter = IF(is_lower, 'w', 'W');
WHEN letter = 'x' THEN SET letter = IF(is_lower, 'x', 'X');
WHEN letter = 'y' THEN SET letter = IF(is_lower, 'y', 'Y');
WHEN letter = 'z' THEN SET letter = IF(is_lower, 'z', 'Z');
ELSE
SET letter = letter;
END CASE;
-- CONCAT seems to ignore the whitespace character. As a workaround we use
-- CONCAT_WS with a whitespace separator when the letter is a whitespace.
SET translit = CONCAT_WS(IF(letter = ' ', ' ', ''), translit, letter);
SET pos = pos + 1;
END WHILE;
RETURN translit;
END$$
# slug create function
CREATE DEFINER=`root`@`localhost` FUNCTION `slugify`(`dirty_string` VARCHAR(255) CHARSET utf8) RETURNS varchar(255) CHARSET utf8
DETERMINISTIC
BEGIN
DECLARE temp_string VarChar(255) DEFAULT '';
DECLARE output VarChar(255);
SET temp_string = regex_replace('~[^\\pL\\d]+~u', '-', dirty_string);
SET temp_string = TRIM(BOTH '-' FROM temp_string);
SET temp_string = transliterate(temp_string);
SET temp_string = LOWER(temp_string);
SET temp_string = regex_replace('~[^-\\w]+~', '', temp_string);
If temp_string = '' Then
SET temp_string = '';
End If;
SET output = temp_string;
Return output;
END$$
DELIMITER ;
So can someone help me on finishing it, main problem is I'm not getting dashes replaced on spaces, maybe regex is wrong. Or regex replace function needs some addons to do it. Please help.
回答1:
Fixed my function, removed regex_replace usage and instead used function from here: mySQL Stored Function to create a slug
Added transliteration tweak, final code is here. transliteration function doesn't change:
DELIMITER $$
--
-- Functions
--
CREATE DEFINER=`root`@`localhost` FUNCTION `slugify`(`dirty_string` VARCHAR(255)) RETURNS varchar(255) CHARSET utf8
DETERMINISTIC
BEGIN
DECLARE x, y , z Int;
Declare temp_string, allowed_chars, new_string VarChar(255);
Declare is_allowed Bool;
Declare c, check_char VarChar(1);
set allowed_chars = "abcdefghijklmnopqrstuvwxyz0123456789-";
set temp_string = transliterate(dirty_string);
set temp_string = lower(temp_string);
Select temp_string Regexp('&') Into x;
If x = 1 Then
Set temp_string = replace(temp_string, '&', ' and ');
End If;
Select temp_string Regexp('[^a-z0-9]+') into x;
If x = 1 then
set z = 1;
While z <= Char_length(temp_string) Do
Set c = Substring(temp_string, z, 1);
Set is_allowed = False;
Set y = 1;
Inner_Check: While y <= Char_length(allowed_chars) Do
If (strCmp(ascii(Substring(allowed_chars,y,1)), Ascii(c)) = 0) Then
Set is_allowed = True;
Leave Inner_Check;
End If;
Set y = y + 1;
End While;
If is_allowed = False Then
Set temp_string = Replace(temp_string, c, '-');
End If;
set z = z + 1;
End While;
End If;
Select temp_string Regexp("^-|-$|'") into x;
If x = 1 Then
Set temp_string = Replace(temp_string, "'", '');
Set z = Char_length(temp_string);
Set y = Char_length(temp_string);
Dash_check: While z > 1 Do
If Strcmp(SubString(temp_string, -1, 1), '-') = 0 Then
Set temp_string = Substring(temp_string,1, y-1);
Set y = y - 1;
Else
Leave Dash_check;
End If;
Set z = z - 1;
End While;
End If;
Repeat
Select temp_string Regexp("--") into x;
If x = 1 Then
Set temp_string = Replace(temp_string, "--", "-");
End If;
Until x <> 1 End Repeat;
If LOCATE('-', temp_string) = 1 Then
Set temp_string = SUBSTRING(temp_string, 2);
End If;
Return temp_string;
END$$
CREATE DEFINER=`root`@`localhost` FUNCTION `transliterate`(original VARCHAR(512)) RETURNS varchar(512) CHARSET utf8
BEGIN
DECLARE translit VARCHAR(512) DEFAULT '';
DECLARE len INT(3) DEFAULT 0;
DECLARE pos INT(3) DEFAULT 1;
DECLARE letter CHAR(1);
DECLARE is_lower BIT;
SET len = CHAR_LENGTH(original);
WHILE (pos <= len) DO
SET letter = SUBSTRING(original, pos, 1);
SET is_lower = IF(LCASE(letter) COLLATE utf8_bin = letter COLLATE utf8_bin, 1, 0);
CASE TRUE
WHEN letter = 'a' THEN SET letter = IF(is_lower, 'a', 'A');
WHEN letter = 'b' THEN SET letter = IF(is_lower, 'b', 'B');
WHEN letter = 'c' THEN SET letter = IF(is_lower, 'c', 'C');
WHEN letter = 'd' THEN SET letter = IF(is_lower, 'd', 'D');
WHEN letter = 'e' THEN SET letter = IF(is_lower, 'e', 'E');
WHEN letter = 'f' THEN SET letter = IF(is_lower, 'f', 'F');
WHEN letter = 'g' THEN SET letter = IF(is_lower, 'g', 'G');
WHEN letter = 'h' THEN SET letter = IF(is_lower, 'h', 'H');
WHEN letter = 'i' THEN SET letter = IF(is_lower, 'i', 'I');
WHEN letter = 'j' THEN SET letter = IF(is_lower, 'j', 'J');
WHEN letter = 'k' THEN SET letter = IF(is_lower, 'k', 'K');
WHEN letter = 'l' THEN SET letter = IF(is_lower, 'l', 'L');
WHEN letter = 'ł' THEN SET letter = IF(is_lower, 'l', 'L');
WHEN letter = 'm' THEN SET letter = IF(is_lower, 'm', 'M');
WHEN letter = 'n' THEN SET letter = IF(is_lower, 'n', 'N');
WHEN letter = 'o' THEN SET letter = IF(is_lower, 'o', 'O');
WHEN letter = 'p' THEN SET letter = IF(is_lower, 'p', 'P');
WHEN letter = 'q' THEN SET letter = IF(is_lower, 'q', 'Q');
WHEN letter = 'r' THEN SET letter = IF(is_lower, 'r', 'R');
WHEN letter = 's' THEN SET letter = IF(is_lower, 's', 'S');
WHEN letter = 't' THEN SET letter = IF(is_lower, 't', 'T');
WHEN letter = 'u' THEN SET letter = IF(is_lower, 'u', 'U');
WHEN letter = 'v' THEN SET letter = IF(is_lower, 'v', 'V');
WHEN letter = 'w' THEN SET letter = IF(is_lower, 'w', 'W');
WHEN letter = 'x' THEN SET letter = IF(is_lower, 'x', 'X');
WHEN letter = 'y' THEN SET letter = IF(is_lower, 'y', 'Y');
WHEN letter = 'z' THEN SET letter = IF(is_lower, 'z', 'Z');
ELSE
SET letter = letter;
END CASE;
-- CONCAT seems to ignore the whitespace character. As a workaround we use
-- CONCAT_WS with a whitespace separator when the letter is a whitespace.
SET translit = CONCAT_WS(IF(letter = ' ', ' ', ''), translit, letter);
SET pos = pos + 1;
END WHILE;
RETURN translit;
END$$
DELIMITER ;
P.S. Someone was asking on chat for test scenario. You need at least one table with 2x varchars(255) columns. And sentence from any book or text, with commas, dots, brackets, other identificational signs and etc. As the results only numbers, words, letters and single dashes has to remain. But when I started it was as result lower case word with spaces.
来源:https://stackoverflow.com/questions/30570865/how-to-rewrite-this-php-slugify-function-to-mysql