I could not wait for Perl 6 :-) Outrageous errors and ghastly
inefficiencies possible/evitable but at least it matches pearl, pelt,
and peel for 'perl' (insert, delete, change, one character,
respectively). I have not yet done comparative research against
'agrep' to see how my solution fares (like matching identically...)
This excercise added two items to the letter to Santa Claus:
- overloading m// operator (and I would not mind overloading s///...)
- a way to find out "how many submatches did I
have in the latest regexp match" and a way to get
the submatch n via a function/array _without_ using the $n --
with these the eval I use below could be avoided by directly
length()ing the concatenation of all the submatches
After a little more testing and optimisation and a fuzzysubstitute()
I will naturally submit this to the CPAN/PAUSE.
---
package Text::Fuzzy;
require 5;
%R = (); # cache for the fuzzy match regexps
@L = (); # cache for the length expressions
sub fuzzymatch {
my ($str, $ins, $del, $chg, $mod) = @_;
return undef unless defined $str;
$ins = 1 unless defined $ins;
$del = 1 unless defined $del;
$chg = 1 unless defined $chg;
$mod = defined $mod ? "(?$mod)" : '';
my @str = split(//, $str);
$R{$str}->{"i$ins"} = join('(.*?)', @str)
unless (defined $R{$str}->{"i$ins"});
$R{$str}->{"d$del"} = join('', map { "($_?)" } @str)
unless (defined $R{$str}->{"d$del"});
$R{$str}->{"c$chg"} = join('', map { "(?:$_|(.))" } @str)
unless (defined $R{$str}->{"c$chg"});
$L[$#str] = 'length("'.join('', map { "\$$_" } 1..$#str+1).'")'
if ($L[$#str] eq '');
return 1
if (/$mod$R{$str}->{"i$ins"}/ and eval "$L[$#str] <= $ins");
return 1
if (/$mod$R{$str}->{"d$ins"}/ and eval "$L[$#str] > $#str - $del");
return 1
if (/$mod$R{$str}->{"c$ins"}/ and eval "$L[$#str] <= $chg");
0;
}
package main;
select(STDOUT); $| = 1;
while (<STDIN>) {
print if Text::FuzzyMatch::fuzzymatch(@ARGV);
}
---
++jhi;
inefficiencies possible/evitable but at least it matches pearl, pelt,
and peel for 'perl' (insert, delete, change, one character,
respectively). I have not yet done comparative research against
'agrep' to see how my solution fares (like matching identically...)
This excercise added two items to the letter to Santa Claus:
- overloading m// operator (and I would not mind overloading s///...)
- a way to find out "how many submatches did I
have in the latest regexp match" and a way to get
the submatch n via a function/array _without_ using the $n --
with these the eval I use below could be avoided by directly
length()ing the concatenation of all the submatches
After a little more testing and optimisation and a fuzzysubstitute()
I will naturally submit this to the CPAN/PAUSE.
---
package Text::Fuzzy;
require 5;
%R = (); # cache for the fuzzy match regexps
@L = (); # cache for the length expressions
sub fuzzymatch {
my ($str, $ins, $del, $chg, $mod) = @_;
return undef unless defined $str;
$ins = 1 unless defined $ins;
$del = 1 unless defined $del;
$chg = 1 unless defined $chg;
$mod = defined $mod ? "(?$mod)" : '';
my @str = split(//, $str);
$R{$str}->{"i$ins"} = join('(.*?)', @str)
unless (defined $R{$str}->{"i$ins"});
$R{$str}->{"d$del"} = join('', map { "($_?)" } @str)
unless (defined $R{$str}->{"d$del"});
$R{$str}->{"c$chg"} = join('', map { "(?:$_|(.))" } @str)
unless (defined $R{$str}->{"c$chg"});
$L[$#str] = 'length("'.join('', map { "\$$_" } 1..$#str+1).'")'
if ($L[$#str] eq '');
return 1
if (/$mod$R{$str}->{"i$ins"}/ and eval "$L[$#str] <= $ins");
return 1
if (/$mod$R{$str}->{"d$ins"}/ and eval "$L[$#str] > $#str - $del");
return 1
if (/$mod$R{$str}->{"c$ins"}/ and eval "$L[$#str] <= $chg");
0;
}
package main;
select(STDOUT); $| = 1;
while (<STDIN>) {
print if Text::FuzzyMatch::fuzzymatch(@ARGV);
}
---
++jhi;