How do you parse HTML with a variety of languages and parsing libraries?
When answering:
Individual comments will be linked to in answers to questions
Language: Java
Library: jsoup
import java.io.IOException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.xml.sax.SAXException;
public class HtmlTest {
public static void main(final String[] args) throws SAXException, ValidityException, ParsingException, IOException {
final Document document = Jsoup.parse("");
final Elements links = document.select("a[href]");
for (final Element element : links) {
System.out.println(element.attr("href"));
}
}
}