
How can I implement javascript function to calculate frequency of each word in
I'd go with Sampson's match-reduce method for slightly better efficiency. Here's a modified version of it that is more production-ready. It's not perfect, but it should cover the vast majority of scenarios (i.e., "good enough").
function calcWordFreq(s) {
// Normalize
s = s.toLowerCase();
// Strip quotes and brackets
s = s.replace(/["“”(\[{}\])]|\B['‘]([^'’]+)['’]/g, '$1');
// Strip dashes and ellipses
s = s.replace(/[‒–—―…]|--|\.\.\./g, ' ');
// Strip punctuation marks
s = s.replace(/[!?;:.,]\B/g, '');
return s.match(/\S+/g).reduce(function(oFreq, sWord) {
if (oFreq.hasOwnProperty(sWord)) ++oFreq[sWord];
else oFreq[sWord] = 1;
return oFreq;
}, {});
}
calcWordFreq('A ‘bad’, “BAD” wolf-man...a good ol\' spook -- I\'m frightened!') returns
{
"a": 2
"bad": 2
"frightened": 1
"good": 1
"i'm": 1
"ol'": 1
"spook": 1
"wolf-man": 1
}