C# - 510 451 436 446 434 426 422 chars (minified)
Not that short, but now probably correct! Note, the previous version did not show the first line of the bars, did not scale the bars correctly, downloaded the file instead of getting it from stdin, and did not include all the required C# verbosity. You could easily shave many strokes if C# didn't need so much extra crap. Maybe Powershell could do better.
using C=System.Console; // alias for Console
using System.Linq; // for Split, GroupBy, Select, OrderBy, etc.
class Class // must define a class
{
static void Main() // must define a Main
{
// split into words
var allwords = System.Text.RegularExpressions.Regex.Split(
// convert stdin to lowercase
C.In.ReadToEnd().ToLower(),
// eliminate stopwords and non-letters
@"(?:\b(?:the|and|of|to|a|i[tns]?|or)\b|\W)+")
.GroupBy(x => x) // group by words
.OrderBy(x => -x.Count()) // sort descending by count
.Take(22); // take first 22 words
// compute length of longest bar + word
var lendivisor = allwords.Max(y => y.Count() / (76.0 - y.Key.Length));
// prepare text to print
var toPrint = allwords.Select(x=>
new {
// remember bar pseudographics (will be used in two places)
Bar = new string('_',(int)(x.Count()/lendivisor)),
Word=x.Key
})
.ToList(); // convert to list so we can index into it
// print top of first bar
C.WriteLine(" " + toPrint[0].Bar);
toPrint.ForEach(x => // for each word, print its bar and the word
C.WriteLine("|" + x.Bar + "| " + x.Word));
}
}
422 chars with lendivisor inlined (which makes it 22 times slower) in the below form (newlines used for select spaces):
using System.Linq;using C=System.Console;class M{static void Main(){var
a=System.Text.RegularExpressions.Regex.Split(C.In.ReadToEnd().ToLower(),@"(?:\b(?:the|and|of|to|a|i[tns]?|or)\b|\W)+").GroupBy(x=>x).OrderBy(x=>-x.Count()).Take(22);var
b=a.Select(x=>new{p=new string('_',(int)(x.Count()/a.Max(y=>y.Count()/(76d-y.Key.Length)))),t=x.Key}).ToList();C.WriteLine(" "+b[0].p);b.ForEach(x=>C.WriteLine("|"+x.p+"| "+x.t));}}