How do you parse HTML with a variety of languages and parsing libraries?
When answering:
Individual comments will be linked to in answers to questions
Language: JavaScript/Node.js
Library: Request and Cheerio
var request = require('request');
var cheerio = require('cheerio');
var url = "https://news.ycombinator.com/";
request(url, function (error, response, html) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
var anchorTags = $('a');
anchorTags.each(function(i,element){
console.log(element["attribs"]["href"]);
});
}
});
Request library downloads the html document and Cheerio lets you use jquery css selectors to target the html document.