Every now and then I need to filter out lines that are mostly the same
but slightly different.
The uniq
command can filter out lines that are duplicates (-u
)
but doesn’t get rid of lines that are mostly the same. I had time the
last time if came up and wrote this - uniqish
. Kind of curious if
there are other ways to accomplish this.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
| #!/usr/bin/env perl
use strict;
use warnings;
sub strip_line {
my $line = shift;
foreach my $s (@ARGV) {
$line =~ s/$s//g;
}
return $line;
}
my $uniq = 0;
if ($ARGV[0] eq "-u") {
$uniq = 1;
shift;
}
my $line;
my $lastdup = 0;
my $last = <STDIN>;
my $lstripped = strip_line($last);
while (<STDIN>) {
$line = $_;
my $stripped = strip_line($line);
if ($lstripped ne $stripped) {
if (!$uniq || !$lastdup) {
print $last;
}
$lastdup = 0;
} else {
$lastdup = 1;
}
$last = $line;
$lstripped = $stripped;
}
if (!$uniq || !$lastdup) {
print $last;
}
|