MySql: Count amount of times the words occur in a column

后端 未结 5 1421
心在旅途
心在旅途 2021-01-06 19:13

For instance, if I have data in a column like this

data
I love book
I love apple
I love book
I hate apple
I hate apple

How can I get result

5条回答
  •  半阙折子戏
    2021-01-06 19:33

    Split-string procedure is not my job. You can find it here

    http://forge.mysql.com/tools/tool.php?id=4

    I wrote you the rest of code.

    drop table if exists mytable;
    create table mytable (
    id int not null auto_increment primary key,
    mytext varchar(1000)
    ) engine = myisam;
    
    insert into mytable (mytext)
    values ('I love book,but book sucks!What do you,think   about it? me too'),('I love apple! it rulez.,No, it sucks a lot!!!'),('I love book'),('I hate apple!!! Me too.,!'),('I hate apple');
    
    drop table if exists mywords;
    create table mywords (
    id int not null auto_increment primary key,
    word varchar(50)
    ) engine = myisam;
    
    
    delimiter //
    drop procedure if exists split_string //
    create procedure split_string (
        in input text
        , in `delimiter` varchar(10) 
    ) 
    sql security invoker
    begin
        declare cur_position int default 1 ;
        declare remainder text;
        declare cur_string varchar(1000);
        declare delimiter_length tinyint unsigned;
    
        drop temporary table if exists SplitValues;
        create temporary table SplitValues (
            value varchar(1000) not null 
        ) engine=myisam;
    
        set remainder = input;
        set delimiter_length = char_length(delimiter);
    
        while char_length(remainder) > 0 and cur_position > 0 do
            set cur_position = instr(remainder, `delimiter`);
            if cur_position = 0 then
                set cur_string = remainder;
            else
                set cur_string = left(remainder, cur_position - 1);
            end if;
            if trim(cur_string) != '' then
                insert into SplitValues values (cur_string);
            end if;
            set remainder = substring(remainder, cur_position + delimiter_length);
        end while;
    
    end //
    delimiter ;
    
    
    delimiter // 
    drop procedure if exists single_words//
    create procedure single_words()
    begin
    declare finish int default 0;
    declare str varchar(200);
    declare cur_table cursor for  select replace(replace(replace(replace(mytext,'!',' '),',',' '),'.',' '),'?',' ') from mytable;
    declare continue handler for not found set finish = 1;
    truncate table mywords;
    open cur_table;
    my_loop:loop
    fetch cur_table into str;
    if finish = 1 then
    leave my_loop;
    end if;
    call split_string(str,' ');
    insert into mywords (word) select * from splitvalues;
    end loop;
    close cur_table;
    end;//
    delimiter ;
    
    call single_words();
    
    select word,count(*) as word_count 
    from mywords
    group by word;
    
    +-------+------------+
    | word  | word_count |
    +-------+------------+
    | a     |          1 |
    | about |          1 |
    | apple |          3 |
    | book  |          3 |
    | but   |          1 |
    | do    |          1 |
    | hate  |          2 |
    | I     |          5 |
    | it    |          3 |
    | lot   |          1 |
    | love  |          3 |
    | me    |          2 |
    | No    |          1 |
    | rulez |          1 |
    | sucks |          2 |
    | think |          1 |
    | too   |          2 |
    | What  |          1 |
    | you   |          1 |
    +-------+------------+
    19 rows in set (0.00 sec)
    

    The code must be improved in order to consider any punctuation but this is the general idea.

提交回复
热议问题