Group strings by longest common starting substring

佐手、 提交于 2020-02-02 12:51:16

问题


Here's the problem. Say I have these strings:

  • apple ipad mini 32gb
  • apple ipad mini 64gb
  • apple ipad air 64gb
  • apple ipad air 32gb
  • panasonic gh4
  • samsung s2 galaxy
  • samsung s2 galaxy red
  • samsung s3 galaxy

I want these strings to be grouped like this:

  • apple ipad mini: [apple ipad mini 32gb, apple ipad mini 64gb]
  • apple ipad air: [apple ipad air 64gb, apple ipad 32gb]
  • panasonic gh4: [panasonic gh4]
  • samsung s2 galaxy: [samsung s2 galaxy, samsung s2 galaxy red]
  • samsung s3 galaxy

The point is to separate name of the item from its attributes(color, memory capacity etc).

I used this algorithm for finding longest common substring: link

Can you guys share your ideas? No code or implementation needed. Thank you.

Edited:

    this.data = _.sortBy(this.data, function(item) {
        return item.title;
    });

    var i = 0;
    var groups = {};
    var len = this.data.length - 1;
    while(i < len) {
        var key = this.lcs(this.data[i][this.attr], this.data[i+1][this.attr]) || this.data[i][this.attr];
        groups[key] = true;
        i++;
        while(this.data[i][this.attr].startsWith(key) && i < len) {
            i++;
        }
    }
    console.log(groups) 

This works great(tested only adding keys). But I want to add samsung s3 galaxy to list too. Thanks for help guys!


回答1:


If you just want to simply group by longest common prefix (that means "apple ipad mini" will be chosen even though "apple ipad" would yield a larger group), then maybe something like this?

sort the list
i = 0
while i < end of list:
  key = longest common prefix of list[i] & list[i + 1]
        or list[i] if the common prefix is less than (1?) words or i is the last index
  groups[key] = list[i++]
  while key is prefix of list[i]:
    add list[i++] to groups[key]



回答2:


An attempt to solve the problem with comparing two strings with same words and a look up if the length of the words is smaller then the previous path.

function groupObject(i, l) {
    return { item: i, length: l };
}

function group(r, a, i, o) {
    var rr = r.item.split(' '),
        aa = a.split(' '),
        j = 0,
        key, keys = [];

    while (aa[j] === rr[j]) {
        keys.push(aa[j]);
        j++;
    }
    if (keys.length < r.length && i < o.length - 1) {
        return group(groupObject(o[i + 1], 0), a, Number.MAX_VALUE, o);
    }
    key = keys.join(' ');
    if (!key || keys.length < r.length && i === o.length - 1) {
        key = a;
    }
    grouped[key] = grouped[key] || [];
    grouped[key].push(a);
    return groupObject(a, keys.length);
}

var data = ['apple ipad mini 32gb', 'apple ipad mini 64gb', 'apple ipad air 64gb', 'apple ipad air 32gb', 'panasonic gh4', 'samsung s2 galaxy', 'samsung s2 galaxy red', 'samsung s3 galaxy'],
    grouped = {};

data.reduce(group, groupObject(data[1], 0));
document.write('<pre>' + JSON.stringify(grouped, 0, 4) + '</pre>');


来源:https://stackoverflow.com/questions/34210034/group-strings-by-longest-common-starting-substring

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!