I have a database with several thousand records, and I need to strip down one of the fields to ensure that it only contains certain characters (Alphanumeric, spaces, and sin
My version of MySQL doesn't have REGEXP_REPLACE(). I used the following two workarounds: 1. Remove specified characters (if you know what characters you want to remove)
create function fn_remove_selected_characters
(v_input_string varchar(255),
v_unacceptable_characters varchar(255))
RETURNS varchar(255)
BEGIN
-- declare variables
declare i int;
declare unacceptable_values varchar(255);
declare this_character char(1);
declare output_string varchar(255);
declare input_length int;
declare boolean_value int;
declare space varchar(3);
-- Set variable values
set input_length = char_length(v_input_string);
set i = 0;
set unacceptable_values = v_unacceptable_characters;
set output_string = '';
set boolean_value = 0;
set space = 'no';
begin
-- Leave spaces if they aren't in the exclude list
if instr( unacceptable_values, ' ') = 0 then
begin
while i < input_length do
SET this_character = SUBSTRING( v_input_string, i, 1 );
-- If the current character is a space,
-- then concatenate a space to the output
-- Although it seems redundant to explicitly add a space,
-- SUBSTRING() equates a space to the empty string
if this_character = ' ' then
set output_string = concat(output_string, ' ');
-- if the current character is not a space, remove it if it's unwanted
elseif instr(unacceptable_values, this_character) then
set output_string = concat(output_string, '');
-- otherwise include the character
else set output_string = concat(output_string, this_character);
end if;
set i = i + 1;
end while;
end;
else
begin
while i < input_length do
begin
SET this_character = SUBSTRING( v_input_string, i, 1 );
if instr(unacceptable_values, this_character) > 0 then
set output_string = concat(output_string, '');
else set output_string = concat(output_string, this_character);
end if;
end;
set i = i + 1;
end while;
end;
end if;
end;
RETURN output_string;
create function fn_preserve_selected_characters
(v_input_string varchar(255),
v_acceptable_characters varchar(255))
returns varchar(255)
begin
declare i int;
declare acceptable_values varchar(255);
declare this_character char(1);
declare output_string varchar(255);
declare input_length int;
declare boolean_value int;
declare space varchar(3);
set input_length = char_length(v_input_string);
set i = 0;
set acceptable_values = v_acceptable_characters;
set output_string = '';
set boolean_value = 0;
set space = 'no';
begin
-- check for existence of spaces
if instr( acceptable_values, ' ') then
begin
while i < input_length do
-- SUBSTRING() treats spaces as empty strings
-- so handle them specially
SET this_character = SUBSTRING( v_input_string, i, 1 );
if this_character = ' ' then
set output_string = concat(output_string, ' ');
elseif instr(acceptable_values, this_character) then
set output_string = concat(output_string, this_character);
else set output_string = concat(output_string, '');
end if;
set i = i + 1;
end while;
end;
-- if there are no spaces in input string
-- then this section is complete
else
begin
while i <= input_length do
SET this_character = SUBSTRING( v_input_string, i, 1 );
-- if the current character exists in the punctuation string
if LOCATE( this_character, acceptable_values ) > 0 THEN
set output_string = concat(output_string, this_character);
end if;
set i = i+1;
end while;
end;
end if;
end;
RETURN output_string;
There is no regular expression replacement. Use the following code to replace all special characters with '-'.
UPDATE <table> SET <column> = REPLACE ( REPLACE ( REPLACE ( REPLACE ( REPLACE ( REPLACE ( REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (<column>, '/', '-'), ',', '-'), '.', '-'), '<', '-'), '>', '-'), '?', '-'), ';', '-'), ':', '-'), '"', '-'), "'", '-'), '|', '-'), '\\', '-'), '=', '-'), '+', '-'), '*', '-'), '&', '-'), '^', '-'), '%', '-'), '$', '-'), '#', '-'), '@', '-'), '!', '-'), '~', '-'), '`', '-'), '', '-'), '{', '-' ), '}', '-' ), '[', '-' ), ']', '-' ), '(', '-' ), ')', '-' )
Code formatted
UPDATE
<table>
SET
<column> =
REPLACE
(
REPLACE
(
REPLACE
(
REPLACE
(
REPLACE
(
REPLACE
(
REPLACE
(
REPLACE
(
REPLACE
(
REPLACE
(
REPLACE
(
REPLACE
(
REPLACE
(
REPLACE
(
REPLACE
(
REPLACE
(
REPLACE
(
REPLACE
(
REPLACE
(
REPLACE
(
REPLACE
(
REPLACE
(
REPLACE
(
REPLACE
(
REPLACE
(
REPLACE
(
REPLACE
(
REPLACE
(
REPLACE
(
REPLACE
(
REPLACE
(<column>, '/', '-'),
',',
'-'
),
'.',
'-'
),
'<',
'-'
),
'>',
'-'
),
'?',
'-'
),
';',
'-'
),
':',
'-'
),
'"',
'-'
),
"'",
'-'
),
'|',
'-'
),
'\\',
'-'
),
'=',
'-'
),
'+',
'-'
),
'*',
'-'
),
'&',
'-'
),
'^',
'-'
),
'%',
'-'
),
'$',
'-'
),
'#',
'-'
),
'@',
'-'
),
'!',
'-'
),
'~',
'-'
),
'`',
'-'
),
'',
'-'
),
'{',
'-'
),
'}',
'-'
),
'[',
'-'
),
']',
'-'
),
'(',
'-'
),
')',
'-'
)
Adeel's answer is by far the best and simplest.
The OP needed to update the db, which is what I need too. So I figured I'd put that here for the next poor sole, like me, not to have to redo what I did.
Double check first, select it and scan them to make sure you're getting the right rows, before you update.
SELECT REGEXP_REPLACE(columnName, '[^\\x20-\\x7E]', '') from tableName;
Count to do a safety check ...
SELECT count(*) from tableName WHERE columnName REGEXP '[^\\x20-\\x7E]';
For some names I had to do another mapping so as not to lose their meaning like Ramon to Ramn because the o has a umlaut or grave or circumflex. So I used this to map ... https://theasciicode.com.ar
Then update This update is a catch all after the mapping update. Change the limit number to the count value above ...
UPDATE tablename SET columnName = REGEXP_REPLACE(columnName, '[^\\x20-\\x7E]', '') WHERE columnName REGEXP '[^\\x20-\\x7E]' LIMIT 1;
Elaborating on Vinnies answer... you can use the following (note the escaping in the last two statements...
update table set column = REPLACE(column,"`","");
update table set column = REPLACE(column,"~","");
update table set column = REPLACE(column,"!","");
update table set column = REPLACE(column,"@","");
update table set column = REPLACE(column,"#","");
update table set column = REPLACE(column,"$","");
update table set column = REPLACE(column,"%","");
update table set column = REPLACE(column,"^","");
update table set column = REPLACE(column,"&","");
update table set column = REPLACE(column,"*","");
update table set column = REPLACE(column,"(","");
update table set column = REPLACE(column,")","");
update table set column = REPLACE(column,"-","");
update table set column = REPLACE(column,"_","");
update table set column = REPLACE(column,"=","");
update table set column = REPLACE(column,"+","");
update table set column = REPLACE(column,"{","");
update table set column = REPLACE(column,"}","");
update table set column = REPLACE(column,"[","");
update table set column = REPLACE(column,"]","");
update table set column = REPLACE(column,"|","");
update table set column = REPLACE(column,";","");
update table set column = REPLACE(column,":","");
update table set column = REPLACE(column,"'","");
update table set column = REPLACE(column,"<","");
update table set column = REPLACE(column,",","");
update table set column = REPLACE(column,">","");
update table set column = REPLACE(column,".","");
update table set column = REPLACE(column,"/","");
update table set column = REPLACE(column,"?","");
update table set column = REPLACE(column,"\\","");
update table set column = REPLACE(column,"\"","");