--- a/src/perl/matcher +++ b/src/perl/matcher @@ -6,6 +6,7 @@ #:META:RESOURCE:%.launcher:string:default launcher command #:META:RESOURCE:%.button:string:the mouse button used to activate a match +#:META:RESOURCE:%.join:int:max continuation lines to join a hard-wrapped match #:META:RESOURCE:%.pattern.:string:extra pattern to match #:META:RESOURCE:%.launcher.:string:custom launcher for pattern #:META:RESOURCE:%.rend.:string:custom rendition for pattern @@ -88,6 +89,30 @@ URxvt.matcher.pattern.2: \\B(/\\S+?):(\\d+)(?=:|$) URxvt.matcher.launcher.2: gvim +$2 $1 +=head2 Joining matches split across hard line breaks + +Soft-wrapped lines (text that wraps at the terminal edge) are already +treated as a single logical line, so a URL that merely wraps at the +window border is matched and clickable as a whole. + +A URL split by a real newline (for example a long link hard-wrapped by a +mail client into C) is, however, two +separate logical lines, and by default only the first half would be +matched. The C resource controls how many following +logical lines may be glued to the current one to recover such a match: + + URxvt.matcher.join: 2 + +The join is conservative: it stops at a line boundary that has trailing +or leading whitespace, and only continues onto a line that begins with a +character that can be part of a match. Set it to C<0> to disable joining +entirely and restore the stock per-line behaviour. The default is C<2>. + +Because the heuristic cannot know whether a line that happens to end in a +match character was truly cut mid-token, two adjacent tokens on +consecutive lines can occasionally be glued; lower C or set +it to C<0> if that is a problem. + =head2 Regex encoding/wide character matching Urxvt stores all text as unicode, in a special encoding that uses @@ -265,6 +290,10 @@ $self->{launcher} = $self->my_resource ("launcher") || $self->x_resource("url-launcher") || "sensible-browser"; + # how many continuation lines to glue (0 = off) + my $join = $self->my_resource ("join"); + $self->{join} = (defined $join && $join =~ /^\d+$/) ? $join : 2; + $self->{button} = 2; $self->{state} = 0; if($self->{argv}[0] || $self->my_resource ("button")) { @@ -300,25 +329,36 @@ sub on_line_update { my ($self, $row) = @_; - # fetch the line that has changed - my $line = $self->line ($row); - my $text = $line->t; - my $rend; + # fetch the line that has changed, plus any wrapped continuation + my ($text, $segs) = $self->joined_line ($row); + + my @rends; # find all urls (if any) for my $matcher (@{$self->{matchers}}) { while ($text =~ /$matcher->[0]/g) { #print "$&\n"; - $rend ||= $line->r; + my ($beg, $end) = ($-[0], $+[0]); # mark all characters as underlined. we _must_ not toggle underline, # as we might get called on an already-marked url. - &{$matcher->[2]} - for @{$rend}[$-[0] .. $+[0] - 1]; + # the match may span segments, so clip it to each one. + for my $si (0 .. $#$segs) { + my ($line, $start, $len) = @{ $segs->[$si] }; + my $lo = $beg > $start ? $beg : $start; + my $hi = $end < $start + $len ? $end : $start + $len; + next if $lo >= $hi; + + $rends[$si] ||= $line->r; + &{$matcher->[2]} + for @{ $rends[$si] }[ ($lo - $start) .. ($hi - $start - 1) ]; + } } } - $line->r ($rend) if $rend; + for my $si (0 .. $#$segs) { + $segs->[$si][0]->r ($rends[$si]) if $rends[$si]; + } () } @@ -331,11 +371,59 @@ ($event->{state} & $mask) == $self->{state}); } -sub find_matches { - my ($self, $row, $col) = @_; +# join $row with the following logical line(s), so a url split by a hard +# newline still matches. returns the text and segments [line, start, len]. +sub joined_line { + my ($self, $row) = @_; + my $line = $self->line ($row); my $text = $line->t; - my $off = $line->offset_of ($row, $col) if defined $col; + my @segs = ([ $line, 0, length $text ]); + + my $join = $self->{join}; + if ($join > 0) { + my $maxrow = $self->nrow - 1; + + for (1 .. $join) { + # url ended before the break + last if $text =~ /[\s\Q$urxvt::NOCHAR\E]\z/; + + my $nextrow = $line->end + 1; + last if $nextrow > $maxrow; + + my $next = $self->line ($nextrow); + my $ntext = $next->t; + + # continuation must start with a url char, not whitespace or prose + last unless $ntext =~ /\A[\w\-\@;\/?:&=%\$.+!*\x27,~#]/; + + push @segs, [ $next, length $text, length $ntext ]; + $text .= $ntext; + $line = $next; + } + } + + ($text, \@segs) +} + +# map a joined-text offset back to (row, col) via its owning segment +sub coord_in_segments { + my ($segs, $offset) = @_; + + for my $seg (@$segs) { + my ($line, $start, $len) = @$seg; + return $line->coord_of ($offset - $start) if $offset < $start + $len; + } + + # one past the last segment + my ($line, $start) = @{ $segs->[-1] }; + $line->coord_of ($offset - $start) +} + +sub find_matches { + my ($self, $row, $col) = @_; + my ($text, $segs) = $self->joined_line ($row); + my $off = $segs->[0][0]->offset_of ($row, $col) if defined $col; my @matches; for my $matcher (@{$self->{matchers}}) { @@ -360,7 +448,7 @@ } split /\s+/, $launcher; } - push @matches, [ $line->coord_of ($begin[0]), $line->coord_of ($end[0]), $match, @exec ]; + push @matches, [ coord_in_segments ($segs, $begin[0]), coord_in_segments ($segs, $end[0]), $match, @exec ]; } } }