Most frequent n - words in java ArrayList

谁都会走 提交于 2019-12-20 04:21:15

问题


I need to find the most frequent words (n words, so if n = 5, the most frequent 5 words) in an ArrayList.

private ArrayList<String> wordList = new ArrayList<String>();


public ArrayList<String> mostOften(int k)
{
    ArrayList<String> lista = new ArrayList<String>();
    Set<String> unique = new HashSet<String>(wordList);
    for (String key : unique) 
        System.out.println(key + ": " + Collections.frequency(wordList, key));

    return lista;
}

The function needs to return a list of the most frequent words, sorted by frequency. If 2 words have the same frequency, I need to sort them alphabetaclly. I have posted what I tried, but this only finds the frequency and I do not know how to do the rest. Any help?


回答1:


You could write a Comparator class which is initialized with the list. Then you could invoke Collections.sort() with the list and the Comparator. The code might look like this:

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

public class FrequencyComparator implements Comparator<String>{

    List<String> list;

    @Override
    public int compare(String o1, String o2) {
        if (Collections.frequency(list, o1) > Collections.frequency(list, o2)){
            return -1;
        }else if (Collections.frequency(list, o1) < Collections.frequency(list, o2)){
            return 1;
        }else{
            return o1.compareTo(o2);
        }
    }

    public FrequencyComparator(List<String> list){
        this.list = list;
    }

    public static void main(String[] args)
    {
        List<String> list = new ArrayList<String>();
        list.add("Hello");
        list.add("You");
        list.add("Hello");
        list.add("You");
        list.add("Apple");
        list.add("Apple");
        list.add("Hello");
        Set<String> unique = new HashSet<>(list);
        List<String> uniqueList = new ArrayList<>(unique);
        Collections.sort(uniqueList, new FrequencyComparator(list));
        System.out.println(uniqueList);
        //Take the most frequent 2 objects
        System.out.println(uniqueList.subList(uniqueList.size() - 2, uniqueList.size());
    }

}



回答2:


public class WordFrequency {

    public static void main(String[] args) {
        List<String> list = new ArrayList<>();
        list.add("Hello");
        list.add("Hello");
        list.add("aaaa");
        list.add("aaaa");
        list.add("World");
        list.add("abc");
        list.add("abc");
        list.add("cba");
        list.add("abc");
        list.add("World");
        list.add("abc");
        System.out.println(mostOften(list));
    }

    public static List<Word> mostOften(List<String> words){
        Map<String, Word> wordMap = new HashMap<>();
        for (String word : words) {
            Word currentWord = wordMap.get(word);
            if(currentWord == null) 
                wordMap.put(word, new Word(word, 1));
            else
                currentWord.frequency++;
        }

        List<Word> wordList = new ArrayList<>(wordMap.values());
        wordList.sort(new Comparator<Word>() {
            @Override
            public int compare(Word o1, Word o2) {
                if(o1.frequency == o2.frequency)
                    return o1.word.compareToIgnoreCase(o2.word);

                /* sort words with high frequency first */
                return Integer.compare(o2.frequency, o1.frequency);
            }
        });
        return wordList;
    }


}


public class Word{
    String word;
    int frequency;

    public Word(String word, int total) {
        this.word = word;
        this.frequency = total;
    }

    public String toString(){
        return "[" + word + ", " + frequency + "]";
    }
}



回答3:


class Pair {
    String text;
    int freq;

    public Pair(String text, int freq) {
       super();
       this.text = text;
       this.freq = freq;
    }

}

 public List<Pair> sortFreq(List<String> wordList) {
    Set<String> unique = new HashSet<String>(wordList);
    List<Pair> list = new ArrayList<Pair>(unique.size());
    for (String key : unique) {
        int freq = Collections.frequency(wordList, key);
        Pair tempPair = new Pair(key, freq);
        list.add(tempPair);
    }
    Collections.sort(list,new Comparator<Pair>() {

        @Override
        public int compare(Pair o1, Pair o2) {
        if(o1.freq == o2.freq){
            return o1.text.compareTo(o2.text);
        }
        return o2.freq - o1.freq;
        }

    });
    return list;
 }



回答4:


Here is a solution using Java 8 with streams, complete with counting the word frequencies, sorting and limiting to k words:

List<String> wordList = new ArrayList<String>();
int k = 5;
List<String> mostFrequentWords = wordList.stream().collect(Collectors.collectingAndThen(
    Collectors.groupingBy(Function.identity(), Collectors.counting()),
    map -> map.entrySet().stream()
        .sorted(Comparator.<Entry<String, Long>> comparingLong(Entry::getValue).reversed()
            .thenComparing(Entry::getKey))
        .map(Entry::getKey)
        .limit(k)
        .collect(Collectors.toList())));


来源:https://stackoverflow.com/questions/34241695/most-frequent-n-words-in-java-arraylist

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!