问题
I try to remove duplicates lines only if contain a specific string. It's easy to remove only duplicates lines, but some useful lines is deleted with :
awk '!seen[$0]++'
or
perl -ne 'print unless $seen{$_}++'
Exemple :
keep first occurence of lines containing "host_name="
keep all occurrences of lines containing "plugin output="
with above awk
or Perl
commands that delete the client number too.
My output command :
host_name=Client1
plugin_output=Name : Client1 Marseille
host_name=Client1
plugin_output=Client : 168131
host_name=Client1
host_name=Client1
host_name=Client1
host_name=Client1
host_name=Client1
host_name=Client1
host_name=Client1
host_name=Client2
plugin_output=Name : Client2 Besançon
host_name=Client2
plugin_output=Client : 168131
host_name=Client2
host_name=Client2
host_name=Client2
host_name=Client2
host_name=Client2
host_name=Client2
host_name=Client2
host_name=Client2
host_name=Client3
plugin_output=Name : Client3 BRETAGNE
host_name=Client3
plugin_output=Client : 168131
host_name=Client3
host_name=Client3
host_name=Client3
host_name=Client3
host_name=Client3
host_name=Client3
host_name=Client3
host_name=Client4
plugin_output=Name : Client4
host_name=Client4
plugin_output=Client : 168131
host_name=Client4
host_name=Client4
host_name=Client4
host_name=Client4
host_name=Client4
host_name=Client4
host_name=Client4
host_name=Client5
plugin_output=Name : Client5
host_name=Client5
plugin_output=Client : 168131
host_name=Client5
host_name=Client5
host_name=Client5
host_name=Client5
host_name=Client5
host_name=Client5
host_name=Client5
host_name=Client6
plugin_output=Name : Client6
host_name=Client6
plugin_output=Client : 168131
host_name=Client6
host_name=Client6
host_name=Client6
host_name=Client6
host_name=Client6
host_name=Client6
host_name=Client6
host_name=Client6
host_name=Client7
plugin_output=Name : Client7
host_name=Client7
plugin_output=Client : 168131
host_name=Client7
host_name=Client7
host_name=Client7
host_name=Client7
host_name=Client7
host_name=Client7
host_name=Client7
host_name=Client7
host_name=Client8
plugin_output=Name : Client8
host_name=Client8
plugin_output=Client : 168131
host_name=Client8
host_name=Client8
host_name=Client8
host_name=Client8
host_name=Client8
host_name=Client8
host_name=Client8
Desired value after awk/sed/perl/...:
host_name=Client1
plugin_output=Name : Client1 Marseille
plugin_output=Client : 168131
host_name=Client2
plugin_output=Name : Client2 Besançon
plugin_output=Client : 168131
host_name=Client3
plugin_output=Name : Client3 BRETAGNE
plugin_output=Client : 168131
host_name=Client4
plugin_output=Name : Client4
plugin_output=Client : 168131
host_name=Client5
plugin_output=Name : Client5
plugin_output=Client : 168131
host_name=Client6
plugin_output=Name : Client6
plugin_output=Client : 168131
host_name=Client7
plugin_output=Name : Client7
plugin_output=Client : 168131
host_name=Client8
plugin_output=Name : Client8
plugin_output=Client : 168131
回答1:
You can use this awk:
awk '/^plugin_output=/ || !seen[$0]++' file
host_name=Client1
plugin_output=Name : Client1 Marseille
plugin_output=Client : 168131
host_name=Client2
plugin_output=Name : Client2 Besançon
plugin_output=Client : 168131
host_name=Client3
plugin_output=Name : Client3 BRETAGNE
plugin_output=Client : 168131
host_name=Client4
plugin_output=Name : Client4
plugin_output=Client : 168131
host_name=Client5
plugin_output=Name : Client5
plugin_output=Client : 168131
host_name=Client6
plugin_output=Name : Client6
plugin_output=Client : 168131
host_name=Client7
plugin_output=Name : Client7
plugin_output=Client : 168131
host_name=Client8
plugin_output=Name : Client8
plugin_output=Client : 168131
Which prints a record if it starts with plugin_output=
or if it is unique.
回答2:
The snippet you're using is doing something slightly different to what you're trying to do.
In order to accomplish what you want - you're going to have to parse the current line.
perl -pe 'if ( my ($host) = m/host_name=(\w+)/ ) { next if $seen{$host}++; }'
Should the trick
回答3:
Perl version:
perl -ne 'print if !$seen{$_}++ || /^plugin_output=/'
or if you still want to use unless
(I find if
more readable in this case):
perl -ne 'print unless $seen{$_}++ && !/^plugin_output=/'
来源:https://stackoverflow.com/questions/33012885/remove-duplicate-line-only-contain-specific-string