Remove special characters from a database field

前端 未结 10 1674
梦如初夏
梦如初夏 2020-12-06 01:00

I have a database with several thousand records, and I need to strip down one of the fields to ensure that it only contains certain characters (Alphanumeric, spaces, and sin

相关标签:
10条回答
  • 2020-12-06 01:57

    My version of MySQL doesn't have REGEXP_REPLACE(). I used the following two workarounds: 1. Remove specified characters (if you know what characters you want to remove)

        create function fn_remove_selected_characters
            (v_input_string varchar(255),
             v_unacceptable_characters varchar(255))
        RETURNS varchar(255)
        BEGIN
    
        -- declare variables
        declare i int;
        declare unacceptable_values varchar(255);
        declare this_character char(1);
        declare output_string varchar(255);
        declare input_length int;
        declare boolean_value int;
        declare space varchar(3);
    
        -- Set variable values
        set input_length = char_length(v_input_string);
        set i = 0;
        set unacceptable_values = v_unacceptable_characters;
        set output_string = '';
        set boolean_value = 0;
        set space = 'no';
    
        begin
        -- Leave spaces if they aren't in the exclude list
        if instr( unacceptable_values, ' ') = 0 then
            begin
            while i < input_length do
                SET this_character = SUBSTRING( v_input_string, i, 1 );
                    -- If the current character is a space, 
                    -- then concatenate a space to the output
                    -- Although it seems redundant to explicitly add a space,
                    -- SUBSTRING() equates a space to the empty string
                    if this_character = ' ' then
                        set output_string = concat(output_string, ' ');
                    -- if the current character is not a space, remove it if it's unwanted
                    elseif instr(unacceptable_values, this_character) then
                        set output_string = concat(output_string, '');
                    -- otherwise include the character
                    else set output_string = concat(output_string, this_character);
                    end if;
                set i = i + 1;
            end while;
            end;
        else
            begin
            while i < input_length do
                begin
                SET this_character = SUBSTRING( v_input_string, i, 1 );
                if instr(unacceptable_values, this_character) > 0 then
                    set output_string = concat(output_string, '');
                else set output_string = concat(output_string, this_character);
                end if;
                end;
                set i = i + 1;
            end while;
            end;
        end if;
        end;
            RETURN output_string;
    
    1. Keep only the characters you want:
        create function fn_preserve_selected_characters
            (v_input_string varchar(255),
             v_acceptable_characters varchar(255))
        returns varchar(255)
    
        begin
        declare i int;
        declare acceptable_values varchar(255);
        declare this_character char(1);
        declare output_string varchar(255);
        declare input_length int;
        declare boolean_value int;
        declare space varchar(3);
    
        set input_length = char_length(v_input_string);
        set i = 0;
        set acceptable_values = v_acceptable_characters;
        set output_string = '';
        set boolean_value = 0;
        set space = 'no';
    
        begin
    
        -- check for existence of spaces
        if instr( acceptable_values, ' ') then
            begin
            while i < input_length do
                -- SUBSTRING() treats spaces as empty strings
                -- so handle them specially
                SET this_character = SUBSTRING( v_input_string, i, 1 );
                    if this_character = ' ' then
                        set output_string = concat(output_string, ' ');
                    elseif instr(acceptable_values, this_character) then
                        set output_string = concat(output_string, this_character);
                    else set output_string = concat(output_string, '');
                    end if;
                set i = i + 1;
            end while;
            end;
        -- if there are no spaces in input string
        -- then this section is complete
        else 
            begin
            while i <= input_length do
                SET this_character = SUBSTRING( v_input_string, i, 1 );
                -- if the current character exists in the punctuation string
                if LOCATE( this_character, acceptable_values ) > 0 THEN
                    set output_string = concat(output_string, this_character);
                end if;
                set i = i+1;
            end while;
            end;
        end if;
        end;
            RETURN output_string;
    
    0 讨论(0)
  • 2020-12-06 01:59

    There is no regular expression replacement. Use the following code to replace all special characters with '-'.

    UPDATE <table> SET <column> = REPLACE ( REPLACE ( REPLACE ( REPLACE ( REPLACE ( REPLACE ( REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (<column>, '/', '-'), ',', '-'), '.', '-'), '<', '-'), '>', '-'), '?', '-'), ';', '-'), ':', '-'), '"', '-'), "'", '-'), '|', '-'), '\\', '-'), '=', '-'), '+', '-'), '*', '-'), '&', '-'), '^', '-'), '%', '-'), '$', '-'), '#', '-'), '@', '-'), '!', '-'), '~', '-'), '`', '-'), '', '-'), '{', '-' ), '}', '-' ), '[', '-' ), ']', '-' ), '(', '-' ), ')', '-' )
    

    Code formatted

    UPDATE
        <table>
    SET
        <column> =
    REPLACE
        (
        REPLACE
            (
            REPLACE
                (
                REPLACE
                    (
                    REPLACE
                        (
                        REPLACE
                            (
                            REPLACE
                                (
                                REPLACE
                                    (
                                    REPLACE
                                        (
                                        REPLACE
                                            (
                                            REPLACE
                                                (
                                                REPLACE
                                                    (
                                                    REPLACE
                                                        (
                                                        REPLACE
                                                            (
                                                            REPLACE
                                                                (
                                                                REPLACE
                                                                    (
                                                                    REPLACE
                                                                        (
                                                                        REPLACE
                                                                            (
                                                                            REPLACE
                                                                                (
                                                                                REPLACE
                                                                                    (
                                                                                    REPLACE
                                                                                        (
                                                                                        REPLACE
                                                                                            (
                                                                                            REPLACE
                                                                                                (
                                                                                                REPLACE
                                                                                                    (
                                                                                                    REPLACE
                                                                                                        (
                                                                                                        REPLACE
                                                                                                            (
                                                                                                            REPLACE
                                                                                                                (
                                                                                                                REPLACE
                                                                                                                    (
                                                                                                                    REPLACE
                                                                                                                        (
                                                                                                                        REPLACE
                                                                                                                            (
                                                                                                                        REPLACE
                                                                                                                            (<column>, '/', '-'),
                                                                                                                            ',',
                                                                                                                            '-'
                                                                                                                        ),
                                                                                                                        '.',
                                                                                                                        '-'
                                                                                                                    ),
                                                                                                                    '<',
                                                                                                                    '-'
                                                                                                                ),
                                                                                                                '>',
                                                                                                                '-'
                                                                                                            ),
                                                                                                            '?',
                                                                                                            '-'
                                                                                                        ),
                                                                                                        ';',
                                                                                                        '-'
                                                                                                    ),
                                                                                                    ':',
                                                                                                    '-'
                                                                                                ),
                                                                                                '"',
                                                                                                '-'
                                                                                            ),
                                                                                            "'",
                                                                                            '-'
                                                                                        ),
                                                                                        '|',
                                                                                        '-'
                                                                                    ),
                                                                                    '\\',
                                                                                    '-'
                                                                                ),
                                                                                '=',
                                                                                '-'
                                                                            ),
                                                                            '+',
                                                                            '-'
                                                                        ),
                                                                        '*',
                                                                        '-'
                                                                    ),
                                                                    '&',
                                                                    '-'
                                                                ),
                                                                '^',
                                                                '-'
                                                            ),
                                                            '%',
                                                            '-'
                                                        ),
                                                        '$',
                                                        '-'
                                                    ),
                                                    '#',
                                                    '-'
                                                ),
                                                '@',
                                                '-'
                                            ),
                                            '!',
                                            '-'
                                        ),
                                        '~',
                                        '-'
                                    ),
                                    '`',
                                    '-'
                                ),
                                '',
                                '-'
                            ),
                            '{',
                            '-'
                        ),
                        '}',
                        '-'
                    ),
                    '[',
                    '-'
                ),
                ']',
                '-'
            ),
            '(',
            '-'
        ),
        ')',
        '-'
    )
    
    0 讨论(0)
  • 2020-12-06 01:59

    Adeel's answer is by far the best and simplest.

    The OP needed to update the db, which is what I need too. So I figured I'd put that here for the next poor sole, like me, not to have to redo what I did.

    Double check first, select it and scan them to make sure you're getting the right rows, before you update.

    SELECT REGEXP_REPLACE(columnName, '[^\\x20-\\x7E]', '') from tableName;
    

    Count to do a safety check ...

    SELECT count(*) from tableName WHERE columnName REGEXP '[^\\x20-\\x7E]';
    

    For some names I had to do another mapping so as not to lose their meaning like Ramon to Ramn because the o has a umlaut or grave or circumflex. So I used this to map ... https://theasciicode.com.ar

    Then update This update is a catch all after the mapping update. Change the limit number to the count value above ...

    UPDATE tablename SET columnName = REGEXP_REPLACE(columnName, '[^\\x20-\\x7E]', '') WHERE columnName REGEXP '[^\\x20-\\x7E]' LIMIT 1;
    
    0 讨论(0)
  • 2020-12-06 02:04

    Elaborating on Vinnies answer... you can use the following (note the escaping in the last two statements...

    update table set column = REPLACE(column,"`","");
    update table set column = REPLACE(column,"~","");
    update table set column = REPLACE(column,"!","");
    update table set column = REPLACE(column,"@","");
    update table set column = REPLACE(column,"#","");
    update table set column = REPLACE(column,"$","");
    update table set column = REPLACE(column,"%","");
    update table set column = REPLACE(column,"^","");
    update table set column = REPLACE(column,"&","");
    update table set column = REPLACE(column,"*","");
    update table set column = REPLACE(column,"(","");
    update table set column = REPLACE(column,")","");
    update table set column = REPLACE(column,"-","");
    update table set column = REPLACE(column,"_","");
    update table set column = REPLACE(column,"=","");
    update table set column = REPLACE(column,"+","");
    update table set column = REPLACE(column,"{","");
    update table set column = REPLACE(column,"}","");
    update table set column = REPLACE(column,"[","");
    update table set column = REPLACE(column,"]","");
    update table set column = REPLACE(column,"|","");
    update table set column = REPLACE(column,";","");
    update table set column = REPLACE(column,":","");
    update table set column = REPLACE(column,"'","");
    update table set column = REPLACE(column,"<","");
    update table set column = REPLACE(column,",","");
    update table set column = REPLACE(column,">","");
    update table set column = REPLACE(column,".","");
    update table set column = REPLACE(column,"/","");
    update table set column = REPLACE(column,"?","");
    update table set column = REPLACE(column,"\\","");
    update table set column = REPLACE(column,"\"","");
    
    0 讨论(0)
提交回复
热议问题