How do you parse HTML with a variety of languages and parsing libraries?
When answering:
Individual comments will be linked to in answers to questions
language: Perl
library: HTML::Parser
#!/usr/bin/perl
use strict;
use warnings;
use HTML::Parser;
my $find_links = HTML::Parser->new(
start_h => [
sub {
my ($tag, $attr) = @_;
if ($tag eq 'a' and exists $attr->{href}) {
print "$attr->{href}\n";
}
},
"tag, attr"
]
);
my $html = join '',
"",
(map { qq($_) } qw/foo bar baz/),
"";
$find_links->parse($html);