[Templates-cvs] cvs commit: TT3/lib/Template Scanner.pm

cvs@template-toolkit.org cvs@template-toolkit.org
Mon, 08 Nov 2004 18:48:26 +0000


cvs         04/11/08 18:48:26

  Added:       lib/Template Scanner.pm
  Log:
  * added Template::Scanner
  
  Revision  Changes    Path
  1.1                  TT3/lib/Template/Scanner.pm
  
  Index: Scanner.pm
  ===================================================================
  #========================================================================
  #
  # Template::Scanner
  #
  # DESCRIPTION
  #   The scanner sits in front of the parser.  It scans the source text
  #   of a template to identify plain text blocks and embedded directives
  #   in various different forms (e.g. [% blah %], $blah, ${blah}, etc.)
  # 
  # AUTHOR
  #   Andy Wardley <abw@wardley.org>
  #
  # COPYRIGHT
  #   Copyright (C) 1996-2004 Andy Wardley.  All Rights Reserved.
  #
  #   This module is free software; you can redistribute it and/or
  #   modify it under the same terms as Perl itself.
  #
  # TODO
  #   Handle multiple lines for nested calls.
  #
  # REVISION
  #   $Id: Scanner.pm,v 1.1 2004/11/08 18:48:25 abw Exp $
  #
  #========================================================================
  
  package Template::Scanner;
  
  use strict;
  use warnings;
  use Template::Base;
  use Template::Tagset;
  use base qw( Template::Base );
  
  our $VERSION = sprintf("%d.%02d", q$Revision: 1.1 $ =~ /(\d+)\.(\d+)/);
  our $DEBUG   = 0 unless defined $DEBUG;
  our $ERROR   = '';
  our $TAGSET  = 'Template::Tagset';
  
  
  #------------------------------------------------------------------------
  # init(\%config)
  #
  # Initialiser method called by base class new() method.
  #------------------------------------------------------------------------
  
  sub init {
      my ($self, $config) = @_;
      
      if (defined $config->{ tagset }) {
          # tagset object provided in config
          $self->{ tagset } = $config->{ tagset };
      }
      elsif (defined $config->{ tags }) {
          # construct tagset from options passed
          $self->{ tagset } = $self->new_tagset($config) || return;
      }
  
      # text option can be used to set a text handler
      $self->{ text } = $config->{ text }
          if defined $config->{ text };
  
      return $self;
  }
  
  
  #------------------------------------------------------------------------
  # tagset()
  # tagset($new_tagset)
  #
  # Method to get/set the current tagset.  Any access to the tagset sets
  # the reset flag to ensure that changes to the tagset midway through a
  # scan get picked up by the scan() method.
  #------------------------------------------------------------------------
  
  sub tagset {
      my $self = shift;
  
      if (@_) {
          die "TODO: cannot set new tagset at present, maybe ever\n";
          $self->{ reset } = 1;
          return ($self->{ tagset } = shift);
      }
      else {
          $self->{ reset } = 1;
          return $self->{ tagset } 
              || $self->decline('no tagset defined in scanner');
      }
  }
  
  
  sub tags {
      my $self = shift;
      my $tagset = $self->{ tagset }
          || return $self->decline('no tagset defined in scanner');
      return $tagset->tags(@_)
          || $self->error($tagset->error());
  }
  
  
  sub new_tagset {
      my $self = shift;
      my $tagsetpkg = $self->pkgvar( TAGSET => $TAGSET );
      return $tagsetpkg->new(@_)
          || $self->error( "scanner failed to create tagset: ", 
                           $tagsetpkg->error() );
  }
  
  
  #------------------------------------------------------------------------
  # scan($text, $handler, %options)
  #
  # Scan $text to identify plain text and embedded directives that match any
  # of the tags defined for the scanner.
  #------------------------------------------------------------------------
  
  # TODO: accept hash ref of options
  
  sub scan {
      my ($self, $text, $handler, %options) = @_;
      my ($pretext, $start, $match, $chunk, $lines, $start_pos, $end_pos);
      my $line = $options{ line } || 1;
  
      my $textref = ref $text ? $text : \$text;
      my $lineref = ref $line ? $line : \$line;
      my $texttag = $self->{ text };
  
      # create local scan state to ensure scan() method is re-entrant
      # TODO: is this still required, given that everything is now 
      # stuff in $match?
      local $self->{ info } = {
          text => $textref,
          line => $lineref,
  #       tags => $tagset,
      };
  
      # TODO: do we need to do this?
      # notify handler that we're about to begin scanning
  #    $handler = $handler->start_scanner($self)
  #        || return $self->scan_error($handler->error());
  
      # clear reset flag and initialise start position for first pass 
      $self->{ reset } = 0;
      $start_pos = pos($$textref) || 0;
  
      $self->debug("start scan\n") if $DEBUG;
  
      SCAN_FOR_TAGS: {
          $texttag   = $self->{ text };
          my $tagset = $self->{ tagset };
          my $tagmap = $tagset->tagmap() || return $self->error($tagset->error());
          my $regex  = $tagmap->{ regex };
            
          while ($$textref =~ /$regex/cg) { 
              # each time around the loop, the above regex returns
              # a chunk of text (possibly of zero length) in $1 and 
              # a token starting a tag in $2.
              my ($pretext, $start) = ($1, $2);
              
              if (defined $pretext && length $pretext) { 
                  $self->debug("text: ", $self->dump_text($pretext), "\n")
                      if $DEBUG;
                  
                  # count the newlines in the text
                  $lines = ($pretext =~ tr/\n//);
  
                  local $self->{ match } = $match = {
                      line    => $$lineref,
                      lines   => $lines,
                      offset  => $start_pos,
                      scanner => $self,      # TODO: do we need this?
                  };
  
                  # if a text tag is defined, we delegate to that, otherwise
                  # we call the handler text() method directly
                  if ($texttag) {
                      $handler = $texttag->scan(\$pretext, $handler, $match)
                          || return $self->scan_error($texttag->error());
                  }
                  else {
                      $handler = $handler->text(\$pretext, $match) 
                          || return $self->scan_error($handler->error());
                  }
                  
                  # update line counter
                  $$lineref += $lines;
              }
              
              $self->debug("tag: $start\n") if $DEBUG;
  
              # look in the tagmap fixed_start hash for a tag which matches
              # this start token (this is effectively a cache for the sake of
              # efficiency), or ask the tagset to match it for us (and update 
              # the fixed_start hash at the same time, to save us asking again)
              my $tag = $tagmap->{ fixed_start }->{ $start }
                  || $tagset->match($start)
                  || return $self->error($tagset->error());
  
              # setup data structure containing info about current match
              $start_pos = pos($$textref) || 0;
              local $self->{ match } = $match = {
                  start   => $start,
                  line    => $$lineref,
                  offset  => $start_pos,
                  scanner => $self,
              };
  
              # call the tag to scan the text
              $handler = $tag->scan($textref, $handler, $match)
                  || return $self->scan_error($tag->error());
              
              # check regex position to find where tag ended and next part
              $end_pos = pos($$textref) || 0;
  
              # update line count
              if ($match->{ lines }) {
                  # tag handler can set $match->{ lines } to indicate how many
                  # lines it consumed
                  $$lineref += $match->{ lines };
                  $self->debug("updated line count from tag: $match->{ lines } lines\n")
                      if $DEBUG;
              }
              else {
                  # otherwise we use the regex position to work it out, counting
                  # the number of newlines in the start token (unlikely, but not 
                  # impossible) and the following chunk of tag content up to where
                  # it ends at the current regex position
                  $chunk     = substr($$textref, $start_pos, $end_pos - $start_pos);
                  $lines     = ( $start =~ tr/\n// )
                             + ( $chunk =~ tr/\n// );
                  $$lineref += $lines;
                  $self->debug("updated line count from regex position: $lines lines\n")
                      if $DEBUG;
              }
  
              # previous end position becomes next start position
              $start_pos = $end_pos;
  
              # check to see if scanner has been reset, e.g. by updating tags 
              if ($match->{ reset_scanner } || $self->{ reset }) {
                  $self->debug("scanner reset\n") if $DEBUG;
                  redo SCAN_FOR_TAGS;
              }
          }
      }   # SCAN_FOR_TAGS
  
      if ($$textref =~ / \G (.+) /sgx) {
          # any text coming after the last tag is handled as above
          my $pretext = $1;
          $lines = ($pretext =~ tr/\n//);
          local $self->{ match } = $match = {
              line    => $$lineref,
              lines   => $lines,
              offset  => $start_pos,
              scanner => $self,
          };
          if ($texttag) {
              $handler = $texttag->scan(\$pretext, $handler, $match)
                  || return $self->scan_error($texttag->error());
          }
          else {
              $handler = $handler->text(\$pretext, $match) 
                  || return $self->scan_error($handler->error());
          }
          
          # update line counter
          $$lineref += $lines;
      }
  
      # tell handler we're done scanning
  #    return $handler->end_scanner($self)
  #        || $self->scan_error($handler->error());
  
      return $handler;
  }
  
  
  #------------------------------------------------------------------------
  # text()
  # text($handler)
  #
  # Method to get/set the current (optional) tag handler for text blocks.
  #------------------------------------------------------------------------
  
  sub text {
      my $self = shift;
      return @_ ? ($self->{ text } = shift) : $self->{ text };
  }
  
  
  
  sub info {
      my $self = shift;
      return $self->{ info } 
          || $self->error('no information available');
  }
  
  sub line {
      my $self = shift;
      my $info = $self->{ info } 
          || return $self->error('no information available');
      return $$info->{ line };
  }
  
  
  sub match {
      my $self = shift;
      return $self->{ match } 
          || $self->error('no current match');
  }
      
  
  sub location {
      my $self = shift;
      my $match = shift || $self->{ match } 
          || return $self->error('no current match');
  
      if (defined $match->{ location }) {
          return $match->{ location };
      }
      elsif ($match->{ lines }) {
          return 'lines ' 
               . join( '-', 
                       $match->{ line },
                       $match->{ line } + $match->{ lines } );
      }
      else {
          return 'line ' . $match->{ line };
      }
  }
  
  
  sub scan_error {
      my $self = shift;
      return $self->error($self->location(), ': ', @_);
  }
  
  
  
  1;
  __END__
  
  =head1 NAME
  
  Template::TT3::Scanner - template scanner
  
  =head1 SYNOPSIS
  
      package Template::TT3::Scanner;
  
      # TODO
  
  =head1 DESCRIPTION
  
  # TODO
  
  =head1 ISSUES OUTSTANDING
  
  Line number reporting - need to figure out who reports what errors 
  and who adds line numbers, etc.
  
  =head1 METHODS
  
  =head2 new()
  
  # TODO
  
  =head1 AUTHOR
  
  Andy Wardley  E<lt>abw@wardley.orgE<gt>
  
  =head1 VERSION
  
  $Revision: 1.1 $
  
  =head1 COPYRIGHT
  
    Copyright (C) 1996-2003 Andy Wardley.  All Rights Reserved.
    Copyright (C) 1998-2002 Canon Research Centre Europe Ltd.
  
  This module is free software; you can redistribute it and/or
  modify it under the same terms as Perl itself.
  
  =cut
  
  # Local Variables:
  # mode: perl
  # perl-indent-level: 4
  # indent-tabs-mode: nil
  # End:
  #
  # vim: expandtab shiftwidth=4: