I've written a little Perl script that I use for this kind of thing. It can do more than what you ask for but it can also do what you need:
#!/usr/bin/env perl -w
use strict;
use Getopt::Std;
my %opts;
getopts('hvfcmdk:', \%opts);
my $missing=$opts{m}||undef;
my $column=$opts{k}||undef;
my $common=$opts{c}||undef;
my $verbose=$opts{v}||undef;
my $fast=$opts{f}||undef;
my $dupes=$opts{d}||undef;
$missing=1 unless $common || $dupes;;
&usage() unless $ARGV[1];
&usage() if $opts{h};
my (%found,%k,%fields);
if ($column) {
die("The -k option only works in fast (-f) mode\n") unless $fast;
$column--; ## So I don't need to count from 0
}
open(my $F1,"$ARGV[0]")||die("Cannot open $ARGV[0]: $!\n");
while(<$F1>){
chomp;
if ($fast){
my @aa=split(/\s+/,$_);
$k{$aa[0]}++;
$found{$aa[0]}++;
}
else {
$k{$_}++;
$found{$_}++;
}
}
close($F1);
my $n=0;
open(F2,"$ARGV[1]")||die("Cannot open $ARGV[1]: $!\n");
my $size=0;
if($verbose){
while(){
$size++;
}
}
close(F2);
open(F2,"$ARGV[1]")||die("Cannot open $ARGV[1]: $!\n");
while(){
next if /^\s+$/;
$n++;
chomp;
print STDERR "." if $verbose && $n % 10==0;
print STDERR "[$n of $size lines]\n" if $verbose && $n % 800==0;
if($fast){
my @aa=split(/\s+/,$_);
$k{$aa[0]}++ if defined($k{$aa[0]});
$fields{$aa[0]}=\@aa if $column;
}
else{
my @keys=keys(%k);
foreach my $key(keys(%found)){
if (/\Q$key/){
$k{$key}++ ;
$found{$key}=undef unless $dupes;
}
}
}
}
close(F2);
print STDERR "[$n of $size lines]\n" if $verbose;
if ($column) {
$missing && do map{my @aa=@{$fields{$_}}; print "$aa[$column]\n" unless $k{$_}>1}keys(%k);
$common && do map{my @aa=@{$fields{$_}}; print "$aa[$column]\n" if $k{$_}>1}keys(%k);
$dupes && do map{my @aa=@{$fields{$_}}; print "$aa[$column]\n" if $k{$_}>2}keys(%k);
}
else {
$missing && do map{print "$_\n" unless $k{$_}>1}keys(%k);
$common && do map{print "$_\n" if $k{$_}>1}keys(%k);
$dupes && do map{print "$_\n" if $k{$_}>2}keys(%k);
}
sub usage{
print STDERR <
In your case, you would run it as
list_compare.pl -cf file1.txt file2.txt
The -f option makes it compare only the first word (defined by whitespace) of file2 and greatly speeds things up. To compare the entire line, remove the -f.