[Templates-cvs] cvs commit: TT3/lib/Template Parser.pm

cvs@template-toolkit.org cvs@template-toolkit.org
Wed, 17 Nov 2004 14:52:32 +0000


cvs         04/11/17 14:52:32

  Modified:    lib/Template Parser.pm
  Log:
  * various fixes to improve detection of end tag in regexen for matching
    comments, whitespace, operators, etc.
  
  Revision  Changes    Path
  1.5       +185 -132  TT3/lib/Template/Parser.pm
  
  Index: Parser.pm
  ===================================================================
  RCS file: /template-toolkit/TT3/lib/Template/Parser.pm,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- Parser.pm	2004/11/15 19:29:40	1.4
  +++ Parser.pm	2004/11/17 14:52:32	1.5
  @@ -20,7 +20,7 @@
   #   modify it under the same terms as Perl itself.
   #
   # REVISION
  -#   $Id: Parser.pm,v 1.4 2004/11/15 19:29:40 abw Exp $
  +#   $Id: Parser.pm,v 1.5 2004/11/17 14:52:32 abw Exp $
   #
   #========================================================================
   
  @@ -31,7 +31,7 @@
   use Template::Base;
   use base qw( Template::Base );
   
  -our $VERSION = sprintf("%d.%02d", q$Revision: 1.4 $ =~ /(\d+)\.(\d+)/);
  +our $VERSION = sprintf("%d.%02d", q$Revision: 1.5 $ =~ /(\d+)\.(\d+)/);
   our $DEBUG   = 0 unless defined $DEBUG;
   our $ERROR   = '';
   our $THROW   = 'parser';
  @@ -57,9 +57,6 @@
       } keys %$BRACKETS,
   };
   
  -# default regex to match comments, redefined in init() if tag_end is defined
  -our $COMMENT  = qr/ \#[^\n]*\n /x;
  -
   # regexen to match the assignment operators (in both forms, '=' and '=>'), 
   # the list range operator, '..', unary ops and binary ops in various 
   # different forms (mathematical, comparison and boolean logic ops)
  @@ -134,26 +131,26 @@
   
   sub init {
       my ($self, $config) = @_;
  -    my ($comment, $tag_end);
  +    my ($tag_end, $eol);
   
       if (defined ($tag_end = $config->{ tag_end })) {
  -        # construct regex to match comments starting with '#'
  -        # and continuing up to the end of the line, or the 
  -        # end of the tag
  -        $comment = qr/      
  -            \# .*?         # capture everything after '#', but non-greedily
  +        # construct regex to match everything up to the end of the current
  +        # line or the end of tag token, whichever comes first
  +        $eol = qr/
  +            .*?            # capture everything non-greedily
               (?: \n         # either match and consumer a newline character
                 | (?=        # or look ahead for the end-of-tag marker
  -                 $config->{ tag_end } 
  +                 $tag_end 
                   )
               ) 
           /sx;
  -        $self->{ tag_end } = qr/ (?= \G $config->{ tag_end } ) /x;
       }
       else {
  -        $comment = $self->pkgvar( COMMENT => $COMMENT );
  +        # no tag end, so just match up to the end of line
  +        $eol = qr/ [^\n]*\n /x;
       }
   
  +    my $comment = qr/ \# $eol /sx;
       my $wspace  = qr/ \s* (?:$comment\s*)* /sx;
       my $assign  = $config->{ assign  } || $self->pkgvar( ASSIGN  => $ASSIGN );
       my $range   = $config->{ range   } || $self->pkgvar( RANGE   => $RANGE );
  @@ -166,17 +163,25 @@
       # '..' range operator.  all are defined to start from the current
       # regex match point \G
   
  -    $self->{ wspace   } = qr/ \G $wspace /x;
  -    $self->{ comma    } = qr/ \G (?:$wspace,)? $wspace /x;
  -    $self->{ question } = qr/ \G $wspace \? $wspace /x;
  -    $self->{ colon    } = qr/ \G $wspace : $wspace /x;
  -    $self->{ assign   } = qr/ \G $wspace ($assign) $wspace /x;
  -    $self->{ range    } = qr/ \G $wspace ($range) $wspace /x;
  -    $self->{ unary    } = qr/ \G $wspace ($unary) $wspace /x;
  -    $self->{ binary   } = qr/ \G $wspace ($binary) $wspace /x;
  +    $self->{ eol       } = qr/ \G $eol /x;
  +    $self->{ wspace    } = qr/ \G $wspace /x;
  +    $self->{ comma     } = qr/ \G (?:$wspace,)? $wspace /x;
  +    $self->{ question  } = qr/ \G $wspace \? $wspace /x;
  +    $self->{ colon     } = qr/ \G $wspace : $wspace /x;
  +    $self->{ semicolon } = qr/ \G $wspace ; $wspace /x;
  +    $self->{ assign    } = qr/ \G $wspace ($assign) $wspace /x;
  +    $self->{ range     } = qr/ \G $wspace ($range) $wspace /x;
  +    $self->{ unary     } = qr/ \G $wspace ($unary) $wspace /x;
  +    $self->{ binary    } = qr/ \G $wspace ($binary) $wspace /x;
  +    $self->{ tag_end   } = qr/ (?= \G $wspace $tag_end ) /sx
  +        if $tag_end;
   
       # TODO: embed and interp?
   
  +    # need to know keywords of any directives in play so that we can look
  +    # out for them 
  +    $self->{ directives } = $config->{ directives } || { };
  +
       return $self;
   }
   
  @@ -219,7 +224,7 @@
               last CHUNK;
           }
           else {
  -            $self->debug("end_regex not found before unaryop: '", $self->next_text($textref), "'\n") if $DEBUG;
  +            $self->debug("end_regex ($tag_end) not found before unaryop: '", $self->next_token($textref), "'\n") if $DEBUG;
           }
   
           if ($$textref =~ /$self->{ unary }/cg ) {
  @@ -258,17 +263,18 @@
           push(@tokens, $term);
   
           # check for end_regex
  -        if ($tag_end && ($$textref =~ /$tag_end/cg)) {
  +        if ($tag_end && ($$textref =~ /$tag_end/cgx)) {
               $self->debug("matched end_regex before binaryop\n") if $DEBUG;
               $stop = 1;
               last CHUNK;
           }
           else {
  -            $self->debug("end regex not found before binary op\n") if $DEBUG;
  +            $self->debug("end regex not found before binary op [", 
  +                         $self->next_token($textref), "]\n") if $DEBUG;
           }
           
           # is there a binary operator indicating more to come?
  -        if ($$textref =~ /$self->{ binary }/cg) {
  +        if ($$textref =~ /$self->{ binary }/cgx) {
               $binop = $1;
               $self->debug("expr binop: $binop\n") if $DEBUG;
               push(@tokens, $binop);
  @@ -333,7 +339,7 @@
       my ($self, $textref, $options) = @_;
       my ($term);
   
  -    $self->debug("parse_term(", $self->next_text($textref), ")\n")
  +    $self->debug("parse_term(", $self->next_token($textref), ")\n")
           if $DEBUG;
   
       # skip any leading whitespace
  @@ -405,7 +411,7 @@
   sub parse_number {
       my ($self, $textref) = @_;
   
  -    $self->debug("parse_number(", $self->next_text($textref), ")\n")
  +    $self->debug("parse_number(", $self->next_token($textref), ")\n")
           if $DEBUG;
   
       if  ($$textref =~ /$NUMBER/cog) {
  @@ -426,7 +432,7 @@
   sub parse_squote {
       my ($self, $textref, $options) = @_;
   
  -    $self->debug("parse_squote(", $self->next_text($textref), ")\n")
  +    $self->debug("parse_squote(", $self->next_token($textref), ")\n")
           if $DEBUG;
   
       if ($$textref =~ /$SQUOTE/cog) {
  @@ -447,7 +453,7 @@
   sub parse_dquote {
       my ($self, $textref, $options) = @_;
   
  -    $self->debug("parse_dquote(", $self->next_text($textref), ")\n")
  +    $self->debug("parse_dquote(", $self->next_token($textref), ")\n")
           if $DEBUG;
   
       if ($$textref =~ /$DQUOTE/cog) {
  @@ -519,7 +525,7 @@
       my ($self, $textref, $options) = @_;
       my ($term, $terms, $args);
   
  -    $self->debug("parse_variable(", $self->next_text($textref), ")\n")
  +    $self->debug("parse_variable(", $self->next_token($textref), ")\n")
           if $DEBUG;
   
       my $pos = pos $$textref;
  @@ -578,7 +584,7 @@
       $nodes ||= [ ];
       my $node;
   
  -    $self->debug("parse_varnodes(", $self->next_text($textref), ")\n")
  +    $self->debug("parse_varnodes(", $self->next_token($textref), ")\n")
           if $DEBUG;
   
       do {
  @@ -615,7 +621,7 @@
       my ($self, $textref, $options) = @_;
       my ($term, $args);
   
  -    $self->debug("parse_varnode(", $self->next_text($textref), ")\n")
  +    $self->debug("parse_varnode(", $self->next_token($textref), ")\n")
           if $DEBUG;
   
       if ($$textref =~ /$INTEGER/cog) {
  @@ -693,7 +699,7 @@
       my ($pos, $op, $key, $value);
       my $args = [ ];
   
  -    $self->debug("parse_args(", $self->next_text($textref), ")\n")
  +    $self->debug("parse_args(", $self->next_token($textref), ")\n")
           if $DEBUG;
   
       # skip any leading whitespace
  @@ -703,6 +709,8 @@
           # save current string position
           $pos = pos $$textref;
   
  +        # TODO: check for keywords?
  +
           # look for something that can be the LHS of an assignment
           if ( ($$textref =~ /$IDENT/cog) || ($$textref =~ /$SQUOTE/cog) ) {
               $key = $1;
  @@ -749,6 +757,68 @@
   
   
   #------------------------------------------------------------------------
  +# parse_params($textref, $options)
  +#------------------------------------------------------------------------
  +
  +sub parse_params {
  +    my ($self, $textref, $options) = @_;
  +    my ($pos, $op, $key, $value);
  +    my $params = [ ];
  +
  +    $self->debug("parse_params(", $self->next_token($textref), ")\n")
  +        if $DEBUG;
  +
  +    # skip any leading whitespace
  +    $$textref =~ /$self->{ wspace }/cg;
  +
  +    while (1) {
  +        # save current string position
  +        $pos = pos $$textref;
  +
  +        if ($$textref =~ /$IDENT/cog) {
  +            $key = $1;
  +            if ($self->{ directives }->{ $key }) {
  +                $self->debug("found keyword '$key' in args, ending\n") if $DEBUG;
  +                pos $$textref = $pos;
  +                return $params;
  +            }
  +        }
  +        elsif ($$textref =~ /$SQUOTE/cog) {
  +            $key = $1;
  +        }
  +        else {
  +            last;
  +        }
  +
  +        $self->debug("possible parameter name: $key\n") if $DEBUG;
  +            
  +        if ($$textref =~ /$self->{ assign }/cg) {
  +            # named parameter
  +            $op = $1;
  +            ($value = $self->parse_expression($textref))
  +                || return $self->missing( $textref, 
  +                                          "missing expression after '$op'");
  +            push(@$params, [ $key, $value ]);
  +        }
  +        else {
  +            # doesn't look like an assignment, so reset and return
  +            $self->debug("not a parameter, rewinding\n") if $DEBUG;
  +            pos $$textref = $pos;
  +            return $params;
  +        }
  +
  +        # skip comma and/or whitespace
  +        $$textref =~ /$self->{ comma }/cg;
  +    }
  +    
  +    # remove trailing comma/whitespace
  +    $$textref =~ /$self->{ comma }/cg;
  +    
  +    return $params;
  +}
  +
  +
  +#------------------------------------------------------------------------
   # parse_qwlist($textref, $left)
   # 
   # Parses a quoted word list, e.g. qw[ ]  qw( foo bar baz ) qw< x y z >.
  @@ -936,6 +1006,44 @@
   
   
   #------------------------------------------------------------------------
  +# parse_template_name($textref, $options)
  +#
  +# filename: FILENAME
  +#         | QUOTED
  +#         | LITERAL
  +#         | $variable
  +#------------------------------------------------------------------------
  +
  +sub parse_template_name {
  +    my ($self, $textref) = @_;
  +    my $term;
  +
  +    if ($$textref =~ /$FILENAME/cog) {
  +        $term = [ filename => $1 ];
  +    }
  +    elsif ($$textref =~ /$SQUOTE/cog) {
  +        $term = [ squote => $1 ];
  +    }
  +    elsif ($$textref =~ /$DQUOTE/cog) {
  +        my $text = $1;
  +        $term = $self->parse_dstring(\$text);
  +    }
  +    elsif ($$textref =~ /$INTERP/cog) {
  +        $self->debug("parsing filename variable\n") if $DEBUG;
  +        $term = $self->parse_variable($textref) 
  +            || return $self->missing( $textref, $term, 
  +                                      "missing variable after '\$'");
  +    }
  +    else {
  +        return 0; #$self->decline('not a filename');
  +    }
  +
  +    return $term;
  +}
  +
  +
  +
  +#------------------------------------------------------------------------
   # parse_whitespace($text)
   #
   # Scan over any whitespace and/or other ignorable items (e.g. comments). 
  @@ -954,6 +1062,45 @@
   
   
   #------------------------------------------------------------------------
  +# parse_semicolon($text)
  +#
  +# Scan for a semicolon surrounding by optional whitespace, comments, etc.
  +#------------------------------------------------------------------------
  +
  +sub parse_semicolon {
  +    my ($self, $textref) = @_;
  +    $$textref =~ /$self->{ semicolon }/cgx;
  +}
  +
  +
  +#------------------------------------------------------------------------
  +# parse_tag_end($text)
  +#
  +# Scan for the token marking the end of a tag, as defined by the tag_end 
  +# configuration parameter.  Also accounts for whitespace, comments, etc.
  +#------------------------------------------------------------------------
  +
  +sub parse_tag_end {
  +    my ($self, $textref) = @_;
  +
  +    return $self->declined('no tag_end defined')
  +        unless $self->{ tag_end };
  +
  +    # scan for tag end
  +    return $$textref =~ /$self->{ tag_end }/cgx;
  +}
  +
  +
  +
  +# parse to end of line or tag end
  +
  +sub parse_end_of_line {
  +    my ($self, $textref) = @_;
  +    return $$textref =~ /$self->{ eol }/cgx;
  +}
  +
  +
  +#------------------------------------------------------------------------
   # unexpected(\$text, $message)
   #
   # Error reporting method.
  @@ -1015,7 +1162,7 @@
       my ($self, $textref) = @_;
       return '' unless length $$textref;
       my $pos = pos $$textref;
  -    my $got = ($$textref =~ / \G \s* (\w+|.) /gcsx);
  +    my $got = ($$textref =~ / \G (\s* \w+|.{1,5}) /gcsx);
       pos $$textref = $pos;
       return $got ? $1 : '';
   }
  @@ -1158,53 +1305,12 @@
   }
   
   
  -#------------------------------------------------------------------------
  -# parse_filename($textref, $options)
  -#
  -# filename: FILENAME
  -#         | QUOTED
  -#         | LITERAL
  -#         | $variable
  -#------------------------------------------------------------------------
  -
  -sub parse_filename {
  -    my ($self, $textref, $options) = @_;
  -    my $term;
  -
  -    # skip any leading whitespace
  -    $$textref =~ / \G $WSPACE /gcx;
  -
  -    if ($$textref =~ /$FILENAME/cog) {
  -        $term = [ filename => $1 ];
  -    }
  -    elsif ($$textref =~ /$SQUOTE/cog) {
  -        $term = [ squote => $1 ];
  -    }
  -    elsif ($$textref =~ /$DQUOTE/cog) {
  -        my $text = $1;
  -        $term = $self->parse_dstring(\$text, $options);
  -    }
  -    elsif ($$textref =~ /$INTERP/cog) {
  -        $self->debug("parsing filename variable\n") if $DEBUG;
  -        $term = $self->parse_variable($textref, $options) 
  -            || return $self->missing( $textref, $term, 
  -                                      "missing variable after '\$'");
  -    }
  -    else {
  -        return 0; #$self->decline('not a filename');
  -    }
  -
  -    return $term;
  -}
  -
  -
  -
   1;
   __END__
   
   =head1 NAME
   
  -Template::TT3::Parser - parser basic language elements
  +Template::Parser - parser basic language elements
   
   =head1 SYNOPSIS
   
  @@ -1291,65 +1397,12 @@
   Andy Wardley  E<lt>abw@wardley.orgE<gt>
   
   =head1 VERSION
  -
  -$Revision: 1.4 $
  -
  -=head1 COPYRIGHT
  -
  -  Copyright (C) 1996-2004 Andy Wardley.  All Rights Reserved.
  -  Copyright (C) 1998-2002 Canon Research Centre Europe Ltd.
  -  Copyright (C) 2003-2004 Fotango Ltd.
  -
  -This module is free software; you can redistribute it and/or
  -modify it under the same terms as Perl itself.
  -
  -=cut
  -
  -# Local Variables:
  -# mode: perl
  -# perl-indent-level: 4
  -# indent-tabs-mode: nil
  -# End:
  -#
  -# vim: expandtab shiftwidth=4:
  -
  -
  ---
  -
  -
  -1;
  -
  -__END__
  -
  -=head1 NAME
  -
  -Template::Parser - base class parser module
  -
  -=head1 SYNOPSIS
  -
  -    TODO
  -
  -=head1 DESCRIPTION
  -
  -TODO
  -
  -=head1 METHODS
  -
  -TODO
  -
  -=head1 AUTHOR
  -
  -Andy Wardley  E<lt>abw@wardley.orgE<gt>
  -
  -=head1 VERSION
   
  -$Revision: 1.4 $
  +$Revision: 1.5 $
   
   =head1 COPYRIGHT
   
  -  Copyright (C) 1996-2004 Andy Wardley.  All Rights Reserved.
  -  Copyright (C) 1998-2002 Canon Research Centre Europe Ltd.
  -  Copyright (C) 2002-2004 Fotango Ltd.
  +Copyright (C) 1996-2004 Andy Wardley.  All Rights Reserved.
   
   This module is free software; you can redistribute it and/or
   modify it under the same terms as Perl itself.