[Templates-cvs] cvs commit: TT3/lib/Template Parser.pm
cvs@template-toolkit.org
cvs@template-toolkit.org
Wed, 17 Nov 2004 14:52:32 +0000
cvs 04/11/17 14:52:32
Modified: lib/Template Parser.pm
Log:
* various fixes to improve detection of end tag in regexen for matching
comments, whitespace, operators, etc.
Revision Changes Path
1.5 +185 -132 TT3/lib/Template/Parser.pm
Index: Parser.pm
===================================================================
RCS file: /template-toolkit/TT3/lib/Template/Parser.pm,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- Parser.pm 2004/11/15 19:29:40 1.4
+++ Parser.pm 2004/11/17 14:52:32 1.5
@@ -20,7 +20,7 @@
# modify it under the same terms as Perl itself.
#
# REVISION
-# $Id: Parser.pm,v 1.4 2004/11/15 19:29:40 abw Exp $
+# $Id: Parser.pm,v 1.5 2004/11/17 14:52:32 abw Exp $
#
#========================================================================
@@ -31,7 +31,7 @@
use Template::Base;
use base qw( Template::Base );
-our $VERSION = sprintf("%d.%02d", q$Revision: 1.4 $ =~ /(\d+)\.(\d+)/);
+our $VERSION = sprintf("%d.%02d", q$Revision: 1.5 $ =~ /(\d+)\.(\d+)/);
our $DEBUG = 0 unless defined $DEBUG;
our $ERROR = '';
our $THROW = 'parser';
@@ -57,9 +57,6 @@
} keys %$BRACKETS,
};
-# default regex to match comments, redefined in init() if tag_end is defined
-our $COMMENT = qr/ \#[^\n]*\n /x;
-
# regexen to match the assignment operators (in both forms, '=' and '=>'),
# the list range operator, '..', unary ops and binary ops in various
# different forms (mathematical, comparison and boolean logic ops)
@@ -134,26 +131,26 @@
sub init {
my ($self, $config) = @_;
- my ($comment, $tag_end);
+ my ($tag_end, $eol);
if (defined ($tag_end = $config->{ tag_end })) {
- # construct regex to match comments starting with '#'
- # and continuing up to the end of the line, or the
- # end of the tag
- $comment = qr/
- \# .*? # capture everything after '#', but non-greedily
+ # construct regex to match everything up to the end of the current
+ # line or the end of tag token, whichever comes first
+ $eol = qr/
+ .*? # capture everything non-greedily
(?: \n # either match and consumer a newline character
| (?= # or look ahead for the end-of-tag marker
- $config->{ tag_end }
+ $tag_end
)
)
/sx;
- $self->{ tag_end } = qr/ (?= \G $config->{ tag_end } ) /x;
}
else {
- $comment = $self->pkgvar( COMMENT => $COMMENT );
+ # no tag end, so just match up to the end of line
+ $eol = qr/ [^\n]*\n /x;
}
+ my $comment = qr/ \# $eol /sx;
my $wspace = qr/ \s* (?:$comment\s*)* /sx;
my $assign = $config->{ assign } || $self->pkgvar( ASSIGN => $ASSIGN );
my $range = $config->{ range } || $self->pkgvar( RANGE => $RANGE );
@@ -166,17 +163,25 @@
# '..' range operator. all are defined to start from the current
# regex match point \G
- $self->{ wspace } = qr/ \G $wspace /x;
- $self->{ comma } = qr/ \G (?:$wspace,)? $wspace /x;
- $self->{ question } = qr/ \G $wspace \? $wspace /x;
- $self->{ colon } = qr/ \G $wspace : $wspace /x;
- $self->{ assign } = qr/ \G $wspace ($assign) $wspace /x;
- $self->{ range } = qr/ \G $wspace ($range) $wspace /x;
- $self->{ unary } = qr/ \G $wspace ($unary) $wspace /x;
- $self->{ binary } = qr/ \G $wspace ($binary) $wspace /x;
+ $self->{ eol } = qr/ \G $eol /x;
+ $self->{ wspace } = qr/ \G $wspace /x;
+ $self->{ comma } = qr/ \G (?:$wspace,)? $wspace /x;
+ $self->{ question } = qr/ \G $wspace \? $wspace /x;
+ $self->{ colon } = qr/ \G $wspace : $wspace /x;
+ $self->{ semicolon } = qr/ \G $wspace ; $wspace /x;
+ $self->{ assign } = qr/ \G $wspace ($assign) $wspace /x;
+ $self->{ range } = qr/ \G $wspace ($range) $wspace /x;
+ $self->{ unary } = qr/ \G $wspace ($unary) $wspace /x;
+ $self->{ binary } = qr/ \G $wspace ($binary) $wspace /x;
+ $self->{ tag_end } = qr/ (?= \G $wspace $tag_end ) /sx
+ if $tag_end;
# TODO: embed and interp?
+ # need to know keywords of any directives in play so that we can look
+ # out for them
+ $self->{ directives } = $config->{ directives } || { };
+
return $self;
}
@@ -219,7 +224,7 @@
last CHUNK;
}
else {
- $self->debug("end_regex not found before unaryop: '", $self->next_text($textref), "'\n") if $DEBUG;
+ $self->debug("end_regex ($tag_end) not found before unaryop: '", $self->next_token($textref), "'\n") if $DEBUG;
}
if ($$textref =~ /$self->{ unary }/cg ) {
@@ -258,17 +263,18 @@
push(@tokens, $term);
# check for end_regex
- if ($tag_end && ($$textref =~ /$tag_end/cg)) {
+ if ($tag_end && ($$textref =~ /$tag_end/cgx)) {
$self->debug("matched end_regex before binaryop\n") if $DEBUG;
$stop = 1;
last CHUNK;
}
else {
- $self->debug("end regex not found before binary op\n") if $DEBUG;
+ $self->debug("end regex not found before binary op [",
+ $self->next_token($textref), "]\n") if $DEBUG;
}
# is there a binary operator indicating more to come?
- if ($$textref =~ /$self->{ binary }/cg) {
+ if ($$textref =~ /$self->{ binary }/cgx) {
$binop = $1;
$self->debug("expr binop: $binop\n") if $DEBUG;
push(@tokens, $binop);
@@ -333,7 +339,7 @@
my ($self, $textref, $options) = @_;
my ($term);
- $self->debug("parse_term(", $self->next_text($textref), ")\n")
+ $self->debug("parse_term(", $self->next_token($textref), ")\n")
if $DEBUG;
# skip any leading whitespace
@@ -405,7 +411,7 @@
sub parse_number {
my ($self, $textref) = @_;
- $self->debug("parse_number(", $self->next_text($textref), ")\n")
+ $self->debug("parse_number(", $self->next_token($textref), ")\n")
if $DEBUG;
if ($$textref =~ /$NUMBER/cog) {
@@ -426,7 +432,7 @@
sub parse_squote {
my ($self, $textref, $options) = @_;
- $self->debug("parse_squote(", $self->next_text($textref), ")\n")
+ $self->debug("parse_squote(", $self->next_token($textref), ")\n")
if $DEBUG;
if ($$textref =~ /$SQUOTE/cog) {
@@ -447,7 +453,7 @@
sub parse_dquote {
my ($self, $textref, $options) = @_;
- $self->debug("parse_dquote(", $self->next_text($textref), ")\n")
+ $self->debug("parse_dquote(", $self->next_token($textref), ")\n")
if $DEBUG;
if ($$textref =~ /$DQUOTE/cog) {
@@ -519,7 +525,7 @@
my ($self, $textref, $options) = @_;
my ($term, $terms, $args);
- $self->debug("parse_variable(", $self->next_text($textref), ")\n")
+ $self->debug("parse_variable(", $self->next_token($textref), ")\n")
if $DEBUG;
my $pos = pos $$textref;
@@ -578,7 +584,7 @@
$nodes ||= [ ];
my $node;
- $self->debug("parse_varnodes(", $self->next_text($textref), ")\n")
+ $self->debug("parse_varnodes(", $self->next_token($textref), ")\n")
if $DEBUG;
do {
@@ -615,7 +621,7 @@
my ($self, $textref, $options) = @_;
my ($term, $args);
- $self->debug("parse_varnode(", $self->next_text($textref), ")\n")
+ $self->debug("parse_varnode(", $self->next_token($textref), ")\n")
if $DEBUG;
if ($$textref =~ /$INTEGER/cog) {
@@ -693,7 +699,7 @@
my ($pos, $op, $key, $value);
my $args = [ ];
- $self->debug("parse_args(", $self->next_text($textref), ")\n")
+ $self->debug("parse_args(", $self->next_token($textref), ")\n")
if $DEBUG;
# skip any leading whitespace
@@ -703,6 +709,8 @@
# save current string position
$pos = pos $$textref;
+ # TODO: check for keywords?
+
# look for something that can be the LHS of an assignment
if ( ($$textref =~ /$IDENT/cog) || ($$textref =~ /$SQUOTE/cog) ) {
$key = $1;
@@ -749,6 +757,68 @@
#------------------------------------------------------------------------
+# parse_params($textref, $options)
+#------------------------------------------------------------------------
+
+sub parse_params {
+ my ($self, $textref, $options) = @_;
+ my ($pos, $op, $key, $value);
+ my $params = [ ];
+
+ $self->debug("parse_params(", $self->next_token($textref), ")\n")
+ if $DEBUG;
+
+ # skip any leading whitespace
+ $$textref =~ /$self->{ wspace }/cg;
+
+ while (1) {
+ # save current string position
+ $pos = pos $$textref;
+
+ if ($$textref =~ /$IDENT/cog) {
+ $key = $1;
+ if ($self->{ directives }->{ $key }) {
+ $self->debug("found keyword '$key' in args, ending\n") if $DEBUG;
+ pos $$textref = $pos;
+ return $params;
+ }
+ }
+ elsif ($$textref =~ /$SQUOTE/cog) {
+ $key = $1;
+ }
+ else {
+ last;
+ }
+
+ $self->debug("possible parameter name: $key\n") if $DEBUG;
+
+ if ($$textref =~ /$self->{ assign }/cg) {
+ # named parameter
+ $op = $1;
+ ($value = $self->parse_expression($textref))
+ || return $self->missing( $textref,
+ "missing expression after '$op'");
+ push(@$params, [ $key, $value ]);
+ }
+ else {
+ # doesn't look like an assignment, so reset and return
+ $self->debug("not a parameter, rewinding\n") if $DEBUG;
+ pos $$textref = $pos;
+ return $params;
+ }
+
+ # skip comma and/or whitespace
+ $$textref =~ /$self->{ comma }/cg;
+ }
+
+ # remove trailing comma/whitespace
+ $$textref =~ /$self->{ comma }/cg;
+
+ return $params;
+}
+
+
+#------------------------------------------------------------------------
# parse_qwlist($textref, $left)
#
# Parses a quoted word list, e.g. qw[ ] qw( foo bar baz ) qw< x y z >.
@@ -936,6 +1006,44 @@
#------------------------------------------------------------------------
+# parse_template_name($textref, $options)
+#
+# filename: FILENAME
+# | QUOTED
+# | LITERAL
+# | $variable
+#------------------------------------------------------------------------
+
+sub parse_template_name {
+ my ($self, $textref) = @_;
+ my $term;
+
+ if ($$textref =~ /$FILENAME/cog) {
+ $term = [ filename => $1 ];
+ }
+ elsif ($$textref =~ /$SQUOTE/cog) {
+ $term = [ squote => $1 ];
+ }
+ elsif ($$textref =~ /$DQUOTE/cog) {
+ my $text = $1;
+ $term = $self->parse_dstring(\$text);
+ }
+ elsif ($$textref =~ /$INTERP/cog) {
+ $self->debug("parsing filename variable\n") if $DEBUG;
+ $term = $self->parse_variable($textref)
+ || return $self->missing( $textref, $term,
+ "missing variable after '\$'");
+ }
+ else {
+ return 0; #$self->decline('not a filename');
+ }
+
+ return $term;
+}
+
+
+
+#------------------------------------------------------------------------
# parse_whitespace($text)
#
# Scan over any whitespace and/or other ignorable items (e.g. comments).
@@ -954,6 +1062,45 @@
#------------------------------------------------------------------------
+# parse_semicolon($text)
+#
+# Scan for a semicolon surrounding by optional whitespace, comments, etc.
+#------------------------------------------------------------------------
+
+sub parse_semicolon {
+ my ($self, $textref) = @_;
+ $$textref =~ /$self->{ semicolon }/cgx;
+}
+
+
+#------------------------------------------------------------------------
+# parse_tag_end($text)
+#
+# Scan for the token marking the end of a tag, as defined by the tag_end
+# configuration parameter. Also accounts for whitespace, comments, etc.
+#------------------------------------------------------------------------
+
+sub parse_tag_end {
+ my ($self, $textref) = @_;
+
+ return $self->declined('no tag_end defined')
+ unless $self->{ tag_end };
+
+ # scan for tag end
+ return $$textref =~ /$self->{ tag_end }/cgx;
+}
+
+
+
+# parse to end of line or tag end
+
+sub parse_end_of_line {
+ my ($self, $textref) = @_;
+ return $$textref =~ /$self->{ eol }/cgx;
+}
+
+
+#------------------------------------------------------------------------
# unexpected(\$text, $message)
#
# Error reporting method.
@@ -1015,7 +1162,7 @@
my ($self, $textref) = @_;
return '' unless length $$textref;
my $pos = pos $$textref;
- my $got = ($$textref =~ / \G \s* (\w+|.) /gcsx);
+ my $got = ($$textref =~ / \G (\s* \w+|.{1,5}) /gcsx);
pos $$textref = $pos;
return $got ? $1 : '';
}
@@ -1158,53 +1305,12 @@
}
-#------------------------------------------------------------------------
-# parse_filename($textref, $options)
-#
-# filename: FILENAME
-# | QUOTED
-# | LITERAL
-# | $variable
-#------------------------------------------------------------------------
-
-sub parse_filename {
- my ($self, $textref, $options) = @_;
- my $term;
-
- # skip any leading whitespace
- $$textref =~ / \G $WSPACE /gcx;
-
- if ($$textref =~ /$FILENAME/cog) {
- $term = [ filename => $1 ];
- }
- elsif ($$textref =~ /$SQUOTE/cog) {
- $term = [ squote => $1 ];
- }
- elsif ($$textref =~ /$DQUOTE/cog) {
- my $text = $1;
- $term = $self->parse_dstring(\$text, $options);
- }
- elsif ($$textref =~ /$INTERP/cog) {
- $self->debug("parsing filename variable\n") if $DEBUG;
- $term = $self->parse_variable($textref, $options)
- || return $self->missing( $textref, $term,
- "missing variable after '\$'");
- }
- else {
- return 0; #$self->decline('not a filename');
- }
-
- return $term;
-}
-
-
-
1;
__END__
=head1 NAME
-Template::TT3::Parser - parser basic language elements
+Template::Parser - parser basic language elements
=head1 SYNOPSIS
@@ -1291,65 +1397,12 @@
Andy Wardley E<lt>abw@wardley.orgE<gt>
=head1 VERSION
-
-$Revision: 1.4 $
-
-=head1 COPYRIGHT
-
- Copyright (C) 1996-2004 Andy Wardley. All Rights Reserved.
- Copyright (C) 1998-2002 Canon Research Centre Europe Ltd.
- Copyright (C) 2003-2004 Fotango Ltd.
-
-This module is free software; you can redistribute it and/or
-modify it under the same terms as Perl itself.
-
-=cut
-
-# Local Variables:
-# mode: perl
-# perl-indent-level: 4
-# indent-tabs-mode: nil
-# End:
-#
-# vim: expandtab shiftwidth=4:
-
-
---
-
-
-1;
-
-__END__
-
-=head1 NAME
-
-Template::Parser - base class parser module
-
-=head1 SYNOPSIS
-
- TODO
-
-=head1 DESCRIPTION
-
-TODO
-
-=head1 METHODS
-
-TODO
-
-=head1 AUTHOR
-
-Andy Wardley E<lt>abw@wardley.orgE<gt>
-
-=head1 VERSION
-$Revision: 1.4 $
+$Revision: 1.5 $
=head1 COPYRIGHT
- Copyright (C) 1996-2004 Andy Wardley. All Rights Reserved.
- Copyright (C) 1998-2002 Canon Research Centre Europe Ltd.
- Copyright (C) 2002-2004 Fotango Ltd.
+Copyright (C) 1996-2004 Andy Wardley. All Rights Reserved.
This module is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.