I have a list of million urls. I need to extract the TLD for each url and create multiple files for each TLD. For example collect all urls with .com as tld and dump that in
host method to get the host,get_root_domain to parse the host name.tld or suffix method to get the real TLD or the pseudo TLD.
use feature qw( say );
use Domain::PublicSuffix qw( );
use URI qw( );
my $dps = Domain::PublicSuffix->new();
for (qw(
http://www.google.com/
http://www.google.co.uk/
)) {
my $url = $_;
# Treat relative URLs as absolute URLs with missing http://.
$url = "http://$url" if $url !~ /^\w+:/;
my $host = URI->new($url)->host();
$host =~ s/\.\z//; # D::PS doesn't handle "domain.com.".
$dps->get_root_domain($host)
or die $dps->error();
say $dps->tld(); # com uk
say $dps->suffix(); # com co.uk
}