How do you parse HTML with a variety of languages and parsing libraries?
When answering:
Individual comments will be linked to in answers to questions
Using phantomjs, save this file as extract-links.js:
var page = new WebPage(),
url = 'http://www.udacity.com';
page.open(url, function (status) {
if (status !== 'success') {
console.log('Unable to access network');
} else {
var results = page.evaluate(function() {
var list = document.querySelectorAll('a'), links = [], i;
for (i = 0; i < list.length; i++) {
links.push(list[i].href);
}
return links;
});
console.log(results.join('\n'));
}
phantom.exit();
});
run:
$ ../path/to/bin/phantomjs extract-links.js