404

package Pod::Simple::BlackBox;
#
# "What's in the box?"  "Pain."
#
###########################################################################
#
# This is where all the scary things happen: parsing lines into
#  paragraphs; and then into directives, verbatims, and then also
#  turning formatting sequences into treelets.
#
# Are you really sure you want to read this code?
#
#-----------------------------------------------------------------------------
#
# The basic work of this module Pod::Simple::BlackBox is doing the dirty work
# of parsing Pod into treelets (generally one per non-verbatim paragraph), and
# to call the proper callbacks on the treelets.
#
# Every node in a treelet is a ['name', {attrhash}, ...children...]

use integer; # vroom!
use strict;
use Carp ();
use vars qw($VERSION );
$VERSION = '3.28';
#use constant DEBUG => 7;
BEGIN {
  require Pod::Simple;
  *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG
}

#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

sub parse_line { shift->parse_lines(@_) } # alias

# - - -  Turn back now!  Run away!  - - -

sub parse_lines {             # Usage: $parser->parse_lines(@lines)
  # an undef means end-of-stream
  my $self = shift;

  my $code_handler = $self->{'code_handler'};
  my $cut_handler  = $self->{'cut_handler'};
  my $wl_handler   = $self->{'whiteline_handler'};
  $self->{'line_count'} ||= 0;
 
  my $scratch;

  DEBUG > 4 and 
   print "# Parsing starting at line ", $self->{'line_count'}, ".\n";

  DEBUG > 5 and
   print "#  About to parse lines: ",
     join(' ', map defined($_) ? "[$_]" : "EOF", @_), "\n";

  my $paras = ($self->{'paras'} ||= []);
   # paragraph buffer.  Because we need to defer processing of =over
   # directives and verbatim paragraphs.  We call _ponder_paragraph_buffer
   # to process this.
  
  $self->{'pod_para_count'} ||= 0;

  my $line;
  foreach my $source_line (@_) {
    if( $self->{'source_dead'} ) {
      DEBUG > 4 and print "# Source is dead.\n";
      last;
    }

    unless( defined $source_line ) {
      DEBUG > 4 and print "# Undef-line seen.\n";

      push @$paras, ['~end', {'start_line' => $self->{'line_count'}}];
      push @$paras, $paras->[-1], $paras->[-1];
       # So that it definitely fills the buffer.
      $self->{'source_dead'} = 1;
      $self->_ponder_paragraph_buffer;
      next;
    }


    if( $self->{'line_count'}++ ) {
      ($line = $source_line) =~ tr/\n\r//d;
       # If we don't have two vars, we'll end up with that there
       # tr/// modding the (potentially read-only) original source line!
    
    } else {
      DEBUG > 2 and print "First line: [$source_line]\n";

      if( ($line = $source_line) =~ s/^\xEF\xBB\xBF//s ) {
        DEBUG and print "UTF-8 BOM seen.  Faking a '=encoding utf8'.\n";
        $self->_handle_encoding_line( "=encoding utf8" );
        delete $self->{'_processed_encoding'};
        $line =~ tr/\n\r//d;
        
      } elsif( $line =~ s/^\xFE\xFF//s ) {
        DEBUG and print "Big-endian UTF-16 BOM seen.  Aborting parsing.\n";
        $self->scream(
          $self->{'line_count'},
          "UTF16-BE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet."
        );
        splice @_;
        push @_, undef;
        next;

        # TODO: implement somehow?

      } elsif( $line =~ s/^\xFF\xFE//s ) {
        DEBUG and print "Little-endian UTF-16 BOM seen.  Aborting parsing.\n";
        $self->scream(
          $self->{'line_count'},
          "UTF16-LE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet."
        );
        splice @_;
        push @_, undef;
        next;

        # TODO: implement somehow?
        
      } else {
        DEBUG > 2 and print "First line is BOM-less.\n";
        ($line = $source_line) =~ tr/\n\r//d;
      }
    }

    # Try to guess encoding. Inlined for performance reasons.
    if(!$self->{'parse_characters'} && !$self->{'encoding'}
      && ($self->{'in_pod'} || $line =~ /^=/s)
      && $line =~ /[^\x00-\x7f]/
    ) {
      my $encoding = $line =~ /^[\x00-\x7f]*[\xC0-\xFD][\x80-\xBF]/ ? 'UTF-8' : 'ISO8859-1';
      $self->_handle_encoding_line( "=encoding $encoding" );
      $self->{'_transcoder'} && $self->{'_transcoder'}->($line);

      my ($word) = $line =~ /(\S*[^\x00-\x7f]\S*)/;

      $self->whine(
        $self->{'line_count'},
        "Non-ASCII character seen before =encoding in '$word'. Assuming $encoding"
      );
    }

    DEBUG > 5 and print "# Parsing line: [$line]\n";

    if(!$self->{'in_pod'}) {
      if($line =~ m/^=([a-zA-Z]+)/s) {
        if($1 eq 'cut') {
          $self->scream(
            $self->{'line_count'},
            "=cut found outside a pod block.  Skipping to next block."
          );
          
          ## Before there were errata sections in the world, it was
          ## least-pessimal to abort processing the file.  But now we can
          ## just barrel on thru (but still not start a pod block).
          #splice @_;
          #push @_, undef;
          
          next;
        } else {
          $self->{'in_pod'} = $self->{'start_of_pod_block'}
                            = $self->{'last_was_blank'}     = 1;
          # And fall thru to the pod-mode block further down
        }
      } else {
        DEBUG > 5 and print "# It's a code-line.\n";
        $code_handler->(map $_, $line, $self->{'line_count'}, $self)
         if $code_handler;
        # Note: this may cause code to be processed out of order relative
        #  to pods, but in order relative to cuts.
        
        # Note also that we haven't yet applied the transcoding to $line
        #  by time we call $code_handler!

        if( $line =~ m/^#\s*line\s+(\d+)\s*(?:\s"([^"]+)")?\s*$/ ) {
          # That RE is from perlsyn, section "Plain Old Comments (Not!)",
          #$fname = $2 if defined $2;
          #DEBUG > 1 and defined $2 and print "# Setting fname to \"$fname\"\n";
          DEBUG > 1 and print "# Setting nextline to $1\n";
          $self->{'line_count'} = $1 - 1;
        }
        
        next;
      }
    }
    
    # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
    # Else we're in pod mode:

    # Apply any necessary transcoding:
    $self->{'_transcoder'} && $self->{'_transcoder'}->($line);

    # HERE WE CATCH =encoding EARLY!
    if( $line =~ m/^=encoding\s+\S+\s*$/s ) {
      next if $self->parse_characters;   # Ignore this line
      $line = $self->_handle_encoding_line( $line );
    }

    if($line =~ m/^=cut/s) {
      # here ends the pod block, and therefore the previous pod para
      DEBUG > 1 and print "Noting =cut at line ${$self}{'line_count'}\n";
      $self->{'in_pod'} = 0;
      # ++$self->{'pod_para_count'};
      $self->_ponder_paragraph_buffer();
       # by now it's safe to consider the previous paragraph as done.
      $cut_handler->(map $_, $line, $self->{'line_count'}, $self)
       if $cut_handler;

      # TODO: add to docs: Note: this may cause cuts to be processed out
      #  of order relative to pods, but in order relative to code.
      
    } elsif($line =~ m/^(\s*)$/s) {  # it's a blank line
      if (defined $1 and $1 =~ /[^\S\r\n]/) { # it's a white line
        $wl_handler->(map $_, $line, $self->{'line_count'}, $self)
          if $wl_handler;
      }

      if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') {
        DEBUG > 1 and print "Saving blank line at line ${$self}{'line_count'}\n";
        push @{$paras->[-1]}, $line;
      }  # otherwise it's not interesting
      
      if(!$self->{'start_of_pod_block'} and !$self->{'last_was_blank'}) {
        DEBUG > 1 and print "Noting para ends with blank line at ${$self}{'line_count'}\n"; 
      }
      
      $self->{'last_was_blank'} = 1;
      
    } elsif($self->{'last_was_blank'}) {  # A non-blank line starting a new para...
      
      if($line =~ m/^(=[a-zA-Z][a-zA-Z0-9]*)(?:\s+|$)(.*)/s) {
        # THIS IS THE ONE PLACE WHERE WE CONSTRUCT NEW DIRECTIVE OBJECTS
        my $new = [$1, {'start_line' => $self->{'line_count'}}, $2];
         # Note that in "=head1 foo", the WS is lost.
         # Example: ['=head1', {'start_line' => 123}, ' foo']
        
        ++$self->{'pod_para_count'};
        
        $self->_ponder_paragraph_buffer();
         # by now it's safe to consider the previous paragraph as done.
                
        push @$paras, $new; # the new incipient paragraph
        DEBUG > 1 and print "Starting new ${$paras}[-1][0] para at line ${$self}{'line_count'}\n";
        
      } elsif($line =~ m/^\s/s) {

        if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') {
          DEBUG > 1 and print "Resuming verbatim para at line ${$self}{'line_count'}\n";
          push @{$paras->[-1]}, $line;
        } else {
          ++$self->{'pod_para_count'};
          $self->_ponder_paragraph_buffer();
           # by now it's safe to consider the previous paragraph as done.
          DEBUG > 1 and print "Starting verbatim para at line ${$self}{'line_count'}\n";
          push @$paras, ['~Verbatim', {'start_line' => $self->{'line_count'}}, $line];
        }
      } else {
        ++$self->{'pod_para_count'};
        $self->_ponder_paragraph_buffer();
         # by now it's safe to consider the previous paragraph as done.
        push @$paras, ['~Para',  {'start_line' => $self->{'line_count'}}, $line];
        DEBUG > 1 and print "Starting plain para at line ${$self}{'line_count'}\n";
      }
      $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0;

    } else {
      # It's a non-blank line /continuing/ the current para
      if(@$paras) {
        DEBUG > 2 and print "Line ${$self}{'line_count'} continues current paragraph\n";
        push @{$paras->[-1]}, $line;
      } else {
        # Unexpected case!
        die "Continuing a paragraph but \@\$paras is empty?";
      }
      $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0;
    }
    
  } # ends the big while loop

  DEBUG > 1 and print(pretty(@$paras), "\n");
  return $self;
}

#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

sub _handle_encoding_line {
  my($self, $line) = @_;
  
  return if $self->parse_characters;

  # The point of this routine is to set $self->{'_transcoder'} as indicated.

  return $line unless $line =~ m/^=encoding\s+(\S+)\s*$/s;
  DEBUG > 1 and print "Found an encoding line \"=encoding $1\"\n";

  my $e    = $1;
  my $orig = $e;
  push @{ $self->{'encoding_command_reqs'} }, "=encoding $orig";

  my $enc_error;

  # Cf.   perldoc Encode   and   perldoc Encode::Supported

  require Pod::Simple::Transcode;

  if( $self->{'encoding'} ) {
    my $norm_current = $self->{'encoding'};
    my $norm_e = $e;
    foreach my $that ($norm_current, $norm_e) {
      $that =  lc($that);
      $that =~ s/[-_]//g;
    }
    if($norm_current eq $norm_e) {
      DEBUG > 1 and print "The '=encoding $orig' line is ",
       "redundant.  ($norm_current eq $norm_e).  Ignoring.\n";
      $enc_error = '';
       # But that doesn't necessarily mean that the earlier one went okay
    } else {
      $enc_error = "Encoding is already set to " . $self->{'encoding'};
      DEBUG > 1 and print $enc_error;
    }
  } elsif (
    # OK, let's turn on the encoding
    do {
      DEBUG > 1 and print " Setting encoding to $e\n";
      $self->{'encoding'} = $e;
      1;
    }
    and $e eq 'HACKRAW'
  ) {
    DEBUG and print " Putting in HACKRAW (no-op) encoding mode.\n";

  } elsif( Pod::Simple::Transcode::->encoding_is_available($e) ) {

    die($enc_error = "WHAT? _transcoder is already set?!")
     if $self->{'_transcoder'};   # should never happen
    require Pod::Simple::Transcode;
    $self->{'_transcoder'} = Pod::Simple::Transcode::->make_transcoder($e);
    eval {
      my @x = ('', "abc", "123");
      $self->{'_transcoder'}->(@x);
    };
    $@ && die( $enc_error =
      "Really unexpected error setting up encoding $e: $@\nAborting"
    );
    $self->{'detected_encoding'} = $e;

  } else {
    my @supported = Pod::Simple::Transcode::->all_encodings;

    # Note unsupported, and complain
    DEBUG and print " Encoding [$e] is unsupported.",
      "\nSupporteds: @supported\n";
    my $suggestion = '';

    # Look for a near match:
    my $norm = lc($e);
    $norm =~ tr[-_][]d;
    my $n;
    foreach my $enc (@supported) {
      $n = lc($enc);
      $n =~ tr[-_][]d;
      next unless $n eq $norm;
      $suggestion = "  (Maybe \"$e\" should be \"$enc\"?)";
      last;
    }
    my $encmodver = Pod::Simple::Transcode::->encmodver;
    $enc_error = join '' =>
      "This document probably does not appear as it should, because its ",
      "\"=encoding $e\" line calls for an unsupported encoding.",
      $suggestion, "  [$encmodver\'s supported encodings are: @supported]"
    ;

    $self->scream( $self->{'line_count'}, $enc_error );
  }
  push @{ $self->{'encoding_command_statuses'} }, $enc_error;
  if (defined($self->{'_processed_encoding'})) {
    # Should never happen
    die "Nested processed encoding.";
  }
  $self->{'_processed_encoding'} = $orig;

  return $line;
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

sub _handle_encoding_second_level {
  # By time this is called, the encoding (if well formed) will already
  #  have been acted one.
  my($self, $para) = @_;
  my @x = @$para;
  my $content = join ' ', splice @x, 2;
  $content =~ s/^\s+//s;
  $content =~ s/\s+$//s;

  DEBUG > 2 and print "Ogling encoding directive: =encoding $content\n";
  
  if (defined($self->{'_processed_encoding'})) {
    #if($content ne $self->{'_processed_encoding'}) {
    #  Could it happen?
    #}
    delete $self->{'_processed_encoding'};
    # It's already been handled.  Check for errors.
    if(! $self->{'encoding_command_statuses'} ) {
      DEBUG > 2 and print " CRAZY ERROR: It wasn't really handled?!\n";
    } elsif( $self->{'encoding_command_statuses'}[-1] ) {
      $self->whine( $para->[1]{'start_line'},
        sprintf "Couldn't do %s: %s",
          $self->{'encoding_command_reqs'  }[-1],
          $self->{'encoding_command_statuses'}[-1],
      );
    } else {
      DEBUG > 2 and print " (Yup, it was successfully handled already.)\n";
    }
    
  } else {
    # Otherwise it's a syntax error
    $self->whine( $para->[1]{'start_line'},
      "Invalid =encoding syntax: $content"
    );
  }
  
  return;
}

#~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`

{
my $m = -321;   # magic line number

sub _gen_errata {
  my $self = $_[0];
  # Return 0 or more fake-o paragraphs explaining the accumulated
  #  errors on this document.

  return() unless $self->{'errata'} and keys %{$self->{'errata'}};

  my @out;
  
  foreach my $line (sort {$a <=> $b} keys %{$self->{'errata'}}) {
    push @out,
      ['=item', {'start_line' => $m}, "Around line $line:"],
      map( ['~Para', {'start_line' => $m, '~cooked' => 1},
        #['~Top', {'start_line' => $m},
        $_
        #]
        ],
        @{$self->{'errata'}{$line}}
      )
    ;
  }
  
  # TODO: report of unknown entities? unrenderable characters?

  unshift @out,
    ['=head1', {'start_line' => $m, 'errata' => 1}, 'POD ERRORS'],
    ['~Para', {'start_line' => $m, '~cooked' => 1, 'errata' => 1},
     "Hey! ",
     ['B', {},
      'The above document had some coding errors, which are explained below:'
     ]
    ],
    ['=over',  {'start_line' => $m, 'errata' => 1}, ''],
  ;

  push @out, 
    ['=back',  {'start_line' => $m, 'errata' => 1}, ''],
  ;

  DEBUG and print "\n<<\n", pretty(\@out), "\n>>\n\n";

  return @out;
}

}

#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

##############################################################################
##
##  stop reading now stop reading now stop reading now stop reading now stop
##
##                         HERE IT BECOMES REALLY SCARY
##
##  stop reading now stop reading now stop reading now stop reading now stop
##
##############################################################################

sub _ponder_paragraph_buffer {

  # Para-token types as found in the buffer.
  #   ~Verbatim, ~Para, ~end, =head1..4, =for, =begin, =end,
  #   =over, =back, =item
  #   and the null =pod (to be complained about if over one line)
  #
  # "~data" paragraphs are something we generate at this level, depending on
  # a currently open =over region

  # Events fired:  Begin and end for:
  #                   directivename (like head1 .. head4), item, extend,
  #                   for (from =begin...=end, =for),
  #                   over-bullet, over-number, over-text, over-block,
  #                   item-bullet, item-number, item-text,
  #                   Document,
  #                   Data, Para, Verbatim
  #                   B, C, longdirname (TODO -- wha?), etc. for all directives
  # 

  my $self = $_[0];
  my $paras;
  return unless @{$paras = $self->{'paras'}};
  my $curr_open = ($self->{'curr_open'} ||= []);

  my $scratch;

  DEBUG > 10 and print "# Paragraph buffer: <<", pretty($paras), ">>\n";

  # We have something in our buffer.  So apparently the document has started.
  unless($self->{'doc_has_started'}) {
    $self->{'doc_has_started'} = 1;
    
    my $starting_contentless;
    $starting_contentless =
     (
       !@$curr_open  
       and @$paras and ! grep $_->[0] ne '~end', @$paras
        # i.e., if the paras is all ~ends
     )
    ;
    DEBUG and print "# Starting ", 
      $starting_contentless ? 'contentless' : 'contentful',
      " document\n"
    ;
    
    $self->_handle_element_start(
      ($scratch = 'Document'),
      {
        'start_line' => $paras->[0][1]{'start_line'},
        $starting_contentless ? ( 'contentless' => 1 ) : (),
      },
    );
  }

  my($para, $para_type);
  while(@$paras) {
    last if @$paras == 1 and
      ( $paras->[0][0] eq '=over' or $paras->[0][0] eq '~Verbatim'
        or $paras->[0][0] eq '=item' )
    ;
    # Those're the three kinds of paragraphs that require lookahead.
    #   Actually, an "=item Foo" inside an <over type=text> region
    #   and any =item inside an <over type=block> region (rare)
    #   don't require any lookahead, but all others (bullets
    #   and numbers) do.

# TODO: whinge about many kinds of directives in non-resolving =for regions?
# TODO: many?  like what?  =head1 etc?

    $para = shift @$paras;
    $para_type = $para->[0];

    DEBUG > 1 and print "Pondering a $para_type paragraph, given the stack: (",
      $self->_dump_curr_open(), ")\n";
    
    if($para_type eq '=for') {
      next if $self->_ponder_for($para,$curr_open,$paras);

    } elsif($para_type eq '=begin') {
      next if $self->_ponder_begin($para,$curr_open,$paras);

    } elsif($para_type eq '=end') {
      next if $self->_ponder_end($para,$curr_open,$paras);

    } elsif($para_type eq '~end') { # The virtual end-document signal
      next if $self->_ponder_doc_end($para,$curr_open,$paras);
    }


    # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
    #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
    if(grep $_->[1]{'~ignore'}, @$curr_open) {
      DEBUG > 1 and
       print "Skipping $para_type paragraph because in ignore mode.\n";
      next;
    }
    #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
    # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~

    if($para_type eq '=pod') {
      $self->_ponder_pod($para,$curr_open,$paras);

    } elsif($para_type eq '=over') {
      next if $self->_ponder_over($para,$curr_open,$paras);

    } elsif($para_type eq '=back') {
      next if $self->_ponder_back($para,$curr_open,$paras);

    } else {

      # All non-magical codes!!!
      
      # Here we start using $para_type for our own twisted purposes, to
      #  mean how it should get treated, not as what the element name
      #  should be.

      DEBUG > 1 and print "Pondering non-magical $para_type\n";

      my $i;

      # Enforce some =headN discipline
      if($para_type =~ m/^=head\d$/s
         and ! $self->{'accept_heads_anywhere'}
         and @$curr_open
         and $curr_open->[-1][0] eq '=over'
      ) {
        DEBUG > 2 and print "'=$para_type' inside an '=over'!\n";
        $self->whine(
          $para->[1]{'start_line'},
          "You forgot a '=back' before '$para_type'"
        );
        unshift @$paras, ['=back', {}, ''], $para;   # close the =over
        next;
      }


      if($para_type eq '=item') {

        my $over;
        unless(@$curr_open and
               $over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) {
          $self->whine(
            $para->[1]{'start_line'},
            "'=item' outside of any '=over'"
          );
          unshift @$paras,
            ['=over', {'start_line' => $para->[1]{'start_line'}}, ''],
            $para
          ;
          next;
        }
        
        
        my $over_type = $over->[1]{'~type'};
        
        if(!$over_type) {
          # Shouldn't happen1
          die "Typeless over in stack, starting at line "
           . $over->[1]{'start_line'};

        } elsif($over_type eq 'block') {
          unless($curr_open->[-1][1]{'~bitched_about'}) {
            $curr_open->[-1][1]{'~bitched_about'} = 1;
            $self->whine(
              $curr_open->[-1][1]{'start_line'},
              "You can't have =items (as at line "
              . $para->[1]{'start_line'}
              . ") unless the first thing after the =over is an =item"
            );
          }
          # Just turn it into a paragraph and reconsider it
          $para->[0] = '~Para';
          unshift @$paras, $para;
          next;

        } elsif($over_type eq 'text') {
          my $item_type = $self->_get_item_type($para);
            # That kills the content of the item if it's a number or bullet.
          DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
          
          if($item_type eq 'text') {
            # Nothing special needs doing for 'text'
          } elsif($item_type eq 'number' or $item_type eq 'bullet') {
            $self->whine(
              $para->[1]{'start_line'},
              "Expected text after =item, not a $item_type"
            );
            # Undo our clobbering:
            push @$para, $para->[1]{'~orig_content'};
            delete $para->[1]{'number'};
             # Only a PROPER item-number element is allowed
             #  to have a number attribute.
          } else {
            die "Unhandled item type $item_type"; # should never happen
          }
          
          # =item-text thingies don't need any assimilation, it seems.

        } elsif($over_type eq 'number') {
          my $item_type = $self->_get_item_type($para);
            # That kills the content of the item if it's a number or bullet.
          DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
          
          my $expected_value = ++ $curr_open->[-1][1]{'~counter'};
          
          if($item_type eq 'bullet') {
            # Hm, it's not numeric.  Correct for this.
            $para->[1]{'number'} = $expected_value;
            $self->whine(
              $para->[1]{'start_line'},
              "Expected '=item $expected_value'"
            );
            push @$para, $para->[1]{'~orig_content'};
              # restore the bullet, blocking the assimilation of next para

          } elsif($item_type eq 'text') {
            # Hm, it's not numeric.  Correct for this.
            $para->[1]{'number'} = $expected_value;
            $self->whine(
              $para->[1]{'start_line'},
              "Expected '=item $expected_value'"
            );
            # Text content will still be there and will block next ~Para

          } elsif($item_type ne 'number') {
            die "Unknown item type $item_type"; # should never happen

          } elsif($expected_value == $para->[1]{'number'}) {
            DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n";
            
          } else {
            DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'},
             " instead of the expected value of $expected_value\n";
            $self->whine(
              $para->[1]{'start_line'},
              "You have '=item " . $para->[1]{'number'} .
              "' instead of the expected '=item $expected_value'"
            );
            $para->[1]{'number'} = $expected_value;  # correcting!!
          }
            
          if(@$para == 2) {
            # For the cases where we /didn't/ push to @$para
            if($paras->[0][0] eq '~Para') {
              DEBUG and print "Assimilating following ~Para content into $over_type item\n";
              push @$para, splice @{shift @$paras},2;
            } else {
              DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
              push @$para, '';  # Just so it's not contentless
            }
          }


        } elsif($over_type eq 'bullet') {
          my $item_type = $self->_get_item_type($para);
            # That kills the content of the item if it's a number or bullet.
          DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
          
          if($item_type eq 'bullet') {
            # as expected!

            if( $para->[1]{'~_freaky_para_hack'} ) {
              DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n";
              push @$para, delete $para->[1]{'~_freaky_para_hack'};
            }

          } elsif($item_type eq 'number') {
            $self->whine(
              $para->[1]{'start_line'},
              "Expected '=item *'"
            );
            push @$para, $para->[1]{'~orig_content'};
             # and block assimilation of the next paragraph
            delete $para->[1]{'number'};
             # Only a PROPER item-number element is allowed
             #  to have a number attribute.
          } elsif($item_type eq 'text') {
            $self->whine(
              $para->[1]{'start_line'},
              "Expected '=item *'"
            );
             # But doesn't need processing.  But it'll block assimilation
             #  of the next para.
          } else {
            die "Unhandled item type $item_type"; # should never happen
          }

          if(@$para == 2) {
            # For the cases where we /didn't/ push to @$para
            if($paras->[0][0] eq '~Para') {
              DEBUG and print "Assimilating following ~Para content into $over_type item\n";
              push @$para, splice @{shift @$paras},2;
            } else {
              DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
              push @$para, '';  # Just so it's not contentless
            }
          }

        } else {
          die "Unhandled =over type \"$over_type\"?";
          # Shouldn't happen!
        }

        $para_type = 'Plain';
        $para->[0] .= '-' . $over_type;
        # Whew.  Now fall thru and process it.


      } elsif($para_type eq '=extend') {
        # Well, might as well implement it here.
        $self->_ponder_extend($para);
        next;  # and skip
      } elsif($para_type eq '=encoding') {
        # Not actually acted on here, but we catch errors here.
        $self->_handle_encoding_second_level($para);
        next unless $self->keep_encoding_directive;
        $para_type = 'Plain';
      } elsif($para_type eq '~Verbatim') {
        $para->[0] = 'Verbatim';
        $para_type = '?Verbatim';
      } elsif($para_type eq '~Para') {
        $para->[0] = 'Para';
        $para_type = '?Plain';
      } elsif($para_type eq 'Data') {
        $para->[0] = 'Data';
        $para_type = '?Data';
      } elsif( $para_type =~ s/^=//s
        and defined( $para_type = $self->{'accept_directives'}{$para_type} )
      ) {
        DEBUG > 1 and print " Pondering known directive ${$para}[0] as $para_type\n";
      } else {
        # An unknown directive!
        DEBUG > 1 and printf "Unhandled directive %s (Handled: %s)\n",
         $para->[0], join(' ', sort keys %{$self->{'accept_directives'}} )
        ;
        $self->whine(
          $para->[1]{'start_line'},
          "Unknown directive: $para->[0]"
        );

        # And maybe treat it as text instead of just letting it go?
        next;
      }

      if($para_type =~ s/^\?//s) {
        if(! @$curr_open) {  # usual case
          DEBUG and print "Treating $para_type paragraph as such because stack is empty.\n";
        } else {
          my @fors = grep $_->[0] eq '=for', @$curr_open;
          DEBUG > 1 and print "Containing fors: ",
            join(',', map $_->[1]{'target'}, @fors), "\n";
          
          if(! @fors) {
            DEBUG and print "Treating $para_type paragraph as such because stack has no =for's\n";
            
          #} elsif(grep $_->[1]{'~resolve'}, @fors) {
          #} elsif(not grep !$_->[1]{'~resolve'}, @fors) {
          } elsif( $fors[-1][1]{'~resolve'} ) {
            # Look to the immediately containing for
          
            if($para_type eq 'Data') {
              DEBUG and print "Treating Data paragraph as Plain/Verbatim because the containing =for ($fors[-1][1]{'target'}) is a resolver\n";
              $para->[0] = 'Para';
              $para_type = 'Plain';
            } else {
              DEBUG and print "Treating $para_type paragraph as such because the containing =for ($fors[-1][1]{'target'}) is a resolver\n";
            }
          } else {
            DEBUG and print "Treating $para_type paragraph as Data because the containing =for ($fors[-1][1]{'target'}) is a non-resolver\n";
            $para->[0] = $para_type = 'Data';
          }
        }
      }

      #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      if($para_type eq 'Plain') {
        $self->_ponder_Plain($para);
      } elsif($para_type eq 'Verbatim') {
        $self->_ponder_Verbatim($para);        
      } elsif($para_type eq 'Data') {
        $self->_ponder_Data($para);
      } else {
        die "\$para type is $para_type -- how did that happen?";
        # Shouldn't happen.
      }

      #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      $para->[0] =~ s/^[~=]//s;

      DEBUG and print "\n", pretty($para), "\n";

      # traverse the treelet (which might well be just one string scalar)
      $self->{'content_seen'} ||= 1;
      $self->_traverse_treelet_bit(@$para);
    }
  }
  
  return;
}

###########################################################################
# The sub-ponderers...



sub _ponder_for {
  my ($self,$para,$curr_open,$paras) = @_;

  # Fake it out as a begin/end
  my $target;

  if(grep $_->[1]{'~ignore'}, @$curr_open) {
    DEBUG > 1 and print "Ignoring ignorable =for\n";
    return 1;
  }

  for(my $i = 2; $i < @$para; ++$i) {
    if($para->[$i] =~ s/^\s*(\S+)\s*//s) {
      $target = $1;
      last;
    }
  }
  unless(defined $target) {
    $self->whine(
      $para->[1]{'start_line'},
      "=for without a target?"
    );
    return 1;
  }
  DEBUG > 1 and
   print "Faking out a =for $target as a =begin $target / =end $target\n";
  
  $para->[0] = 'Data';
  
  unshift @$paras,
    ['=begin',
      {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
      $target,
    ],
    $para,
    ['=end',
      {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
      $target,
    ],
  ;
  
  return 1;
}

sub _ponder_begin {
  my ($self,$para,$curr_open,$paras) = @_;
  my $content = join ' ', splice @$para, 2;
  $content =~ s/^\s+//s;
  $content =~ s/\s+$//s;
  unless(length($content)) {
    $self->whine(
      $para->[1]{'start_line'},
      "=begin without a target?"
    );
    DEBUG and print "Ignoring targetless =begin\n";
    return 1;
  }
  
  my ($target, $title) = $content =~ m/^(\S+)\s*(.*)$/;
  $para->[1]{'title'} = $title if ($title);
  $para->[1]{'target'} = $target;  # without any ':'
  $content = $target; # strip off the title
  
  $content =~ s/^:!/!:/s;
  my $neg;  # whether this is a negation-match
  $neg = 1        if $content =~ s/^!//s;
  my $to_resolve;  # whether to process formatting codes
  $to_resolve = 1 if $content =~ s/^://s;
  
  my $dont_ignore; # whether this target matches us
  
  foreach my $target_name (
    split(',', $content, -1),
    $neg ? () : '*'
  ) {
    DEBUG > 2 and
     print " Considering whether =begin $content matches $target_name\n";
    next unless $self->{'accept_targets'}{$target_name};
    
    DEBUG > 2 and
     print "  It DOES match the acceptable target $target_name!\n";
    $to_resolve = 1
      if $self->{'accept_targets'}{$target_name} eq 'force_resolve';
    $dont_ignore = 1;
    $para->[1]{'target_matching'} = $target_name;
    last; # stop looking at other target names
  }

  if($neg) {
    if( $dont_ignore ) {
      $dont_ignore = '';
      delete $para->[1]{'target_matching'};
      DEBUG > 2 and print " But the leading ! means that this is a NON-match!\n";
    } else {
      $dont_ignore = 1;
      $para->[1]{'target_matching'} = '!';
      DEBUG > 2 and print " But the leading ! means that this IS a match!\n";
    }
  }

  $para->[0] = '=for';  # Just what we happen to call these, internally
  $para->[1]{'~really'} ||= '=begin';
  $para->[1]{'~ignore'}   = (! $dont_ignore) || 0;
  $para->[1]{'~resolve'}  = $to_resolve || 0;

  DEBUG > 1 and print " Making note to ", $dont_ignore ? 'not ' : '',
    "ignore contents of this region\n";
  DEBUG > 1 and $dont_ignore and print " Making note to treat contents as ",
    ($to_resolve ? 'verbatim/plain' : 'data'), " paragraphs\n";
  DEBUG > 1 and print " (Stack now: ", $self->_dump_curr_open(), ")\n";

  push @$curr_open, $para;
  if(!$dont_ignore or scalar grep $_->[1]{'~ignore'}, @$curr_open) {
    DEBUG > 1 and print "Ignoring ignorable =begin\n";
  } else {
    $self->{'content_seen'} ||= 1;
    $self->_handle_element_start((my $scratch='for'), $para->[1]);
  }

  return 1;
}

sub _ponder_end {
  my ($self,$para,$curr_open,$paras) = @_;
  my $content = join ' ', splice @$para, 2;
  $content =~ s/^\s+//s;
  $content =~ s/\s+$//s;
  DEBUG and print "Ogling '=end $content' directive\n";

  unless(length($content)) {
    $self->whine(
      $para->[1]{'start_line'},
      "'=end' without a target?" . (
        ( @$curr_open and $curr_open->[-1][0] eq '=for' )
        ? ( " (Should be \"=end " . $curr_open->[-1][1]{'target'} . '")' )
        : ''
      )
    );
    DEBUG and print "Ignoring targetless =end\n";
    return 1;
  }
  
  unless($content =~ m/^\S+$/) {  # i.e., unless it's one word
    $self->whine(
      $para->[1]{'start_line'},
      "'=end $content' is invalid.  (Stack: "
      . $self->_dump_curr_open() . ')'
    );
    DEBUG and print "Ignoring mistargetted =end $content\n";
    return 1;
  }
  
  unless(@$curr_open and $curr_open->[-1][0] eq '=for') {
    $self->whine(
      $para->[1]{'start_line'},
      "=end $content without matching =begin.  (Stack: "
      . $self->_dump_curr_open() . ')'
    );
    DEBUG and print "Ignoring mistargetted =end $content\n";
    return 1;
  }
  
  unless($content eq $curr_open->[-1][1]{'target'}) {
    $self->whine(
      $para->[1]{'start_line'},
      "=end $content doesn't match =begin " 
      . $curr_open->[-1][1]{'target'}
      . ".  (Stack: "
      . $self->_dump_curr_open() . ')'
    );
    DEBUG and print "Ignoring mistargetted =end $content at line $para->[1]{'start_line'}\n";
    return 1;
  }

  # Else it's okay to close...
  if(grep $_->[1]{'~ignore'}, @$curr_open) {
    DEBUG > 1 and print "Not firing any event for this =end $content because in an ignored region\n";
    # And that may be because of this to-be-closed =for region, or some
    #  other one, but it doesn't matter.
  } else {
    $curr_open->[-1][1]{'start_line'} = $para->[1]{'start_line'};
      # what's that for?
    
    $self->{'content_seen'} ||= 1;
    $self->_handle_element_end( my $scratch = 'for', $para->[1]);
  }
  DEBUG > 1 and print "Popping $curr_open->[-1][0] $curr_open->[-1][1]{'target'} because of =end $content\n";
  pop @$curr_open;

  return 1;
} 

sub _ponder_doc_end {
  my ($self,$para,$curr_open,$paras) = @_;
  if(@$curr_open) { # Deal with things left open
    DEBUG and print "Stack is nonempty at end-document: (",
      $self->_dump_curr_open(), ")\n";
      
    DEBUG > 9 and print "Stack: ", pretty($curr_open), "\n";
    unshift @$paras, $self->_closers_for_all_curr_open;
    # Make sure there is exactly one ~end in the parastack, at the end:
    @$paras = grep $_->[0] ne '~end', @$paras;
    push @$paras, $para, $para;
     # We need two -- once for the next cycle where we
     #  generate errata, and then another to be at the end
     #  when that loop back around to process the errata.
    return 1;
    
  } else {
    DEBUG and print "Okay, stack is empty now.\n";
  }
  
  # Try generating errata section, if applicable
  unless($self->{'~tried_gen_errata'}) {
    $self->{'~tried_gen_errata'} = 1;
    my @extras = $self->_gen_errata();
    if(@extras) {
      unshift @$paras, @extras;
      DEBUG and print "Generated errata... relooping...\n";
      return 1;  # I.e., loop around again to process these fake-o paragraphs
    }
  }
  
  splice @$paras; # Well, that's that for this paragraph buffer.
  DEBUG and print "Throwing end-document event.\n";

  $self->_handle_element_end( my $scratch = 'Document' );
  return 1; # Hasta la byebye
}

sub _ponder_pod {
  my ($self,$para,$curr_open,$paras) = @_;
  $self->whine(
    $para->[1]{'start_line'},
    "=pod directives shouldn't be over one line long!  Ignoring all "
     . (@$para - 2) . " lines of content"
  ) if @$para > 3;

  # Content ignored unless 'pod_handler' is set
  if (my $pod_handler = $self->{'pod_handler'}) {
      my ($line_num, $line) = map $_, $para->[1]{'start_line'}, $para->[2];
      $line = $line eq '' ? "=pod" : "=pod $line"; # imitate cut_handler output
      $pod_handler->($line, $line_num, $self);
  }

  # The surrounding methods set content_seen, so let us remain consistent.
  # I do not know why it was not here before -- should it not be here?
  # $self->{'content_seen'} ||= 1;

  return;
}

sub _ponder_over {
  my ($self,$para,$curr_open,$paras) = @_;
  return 1 unless @$paras;
  my $list_type;

  if($paras->[0][0] eq '=item') { # most common case
    $list_type = $self->_get_initial_item_type($paras->[0]);

  } elsif($paras->[0][0] eq '=back') {
    # Ignore empty lists by default
    if ($self->{'parse_empty_lists'}) {
      $list_type = 'empty';
    } else {
      shift @$paras;
      return 1;
    }
  } elsif($paras->[0][0] eq '~end') {
    $self->whine(
      $para->[1]{'start_line'},
      "=over is the last thing in the document?!"
    );
    return 1; # But feh, ignore it.
  } else {
    $list_type = 'block';
  }
  $para->[1]{'~type'} = $list_type;
  push @$curr_open, $para;
   # yes, we reuse the paragraph as a stack item
  
  my $content = join ' ', splice @$para, 2;
  my $overness;
  if($content =~ m/^\s*$/s) {
    $para->[1]{'indent'} = 4;
  } elsif($content =~ m/^\s*((?:\d*\.)?\d+)\s*$/s) {
    no integer;
    $para->[1]{'indent'} = $1;
    if($1 == 0) {
      $self->whine(
        $para->[1]{'start_line'},
        "Can't have a 0 in =over $content"
      );
      $para->[1]{'indent'} = 4;
    }
  } else {
    $self->whine(
      $para->[1]{'start_line'},
      "=over should be: '=over' or '=over positive_number'"
    );
    $para->[1]{'indent'} = 4;
  }
  DEBUG > 1 and print "=over found of type $list_type\n";
  
  $self->{'content_seen'} ||= 1;
  $self->_handle_element_start((my $scratch = 'over-' . $list_type), $para->[1]);

  return;
}
      
sub _ponder_back {
  my ($self,$para,$curr_open,$paras) = @_;
  # TODO: fire off </item-number> or </item-bullet> or </item-text> ??

  my $content = join ' ', splice @$para, 2;
  if($content =~ m/\S/) {
    $self->whine(
      $para->[1]{'start_line'},
      "=back doesn't take any parameters, but you said =back $content"
    );
  }

  if(@$curr_open and $curr_open->[-1][0] eq '=over') {
    DEBUG > 1 and print "=back happily closes matching =over\n";
    # Expected case: we're closing the most recently opened thing
    #my $over = pop @$curr_open;
    $self->{'content_seen'} ||= 1;
    $self->_handle_element_end( my $scratch =
      'over-' . ( (pop @$curr_open)->[1]{'~type'} ), $para->[1]
    );
  } else {
    DEBUG > 1 and print "=back found without a matching =over.  Stack: (",
        join(', ', map $_->[0], @$curr_open), ").\n";
    $self->whine(
      $para->[1]{'start_line'},
      '=back without =over'
    );
    return 1; # and ignore it
  }
}

sub _ponder_item {
  my ($self,$para,$curr_open,$paras) = @_;
  my $over;
  unless(@$curr_open and
         $over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) {
    $self->whine(
      $para->[1]{'start_line'},
      "'=item' outside of any '=over'"
    );
    unshift @$paras,
      ['=over', {'start_line' => $para->[1]{'start_line'}}, ''],
      $para
    ;
    return 1;
  }
  
  
  my $over_type = $over->[1]{'~type'};
  
  if(!$over_type) {
    # Shouldn't happen1
    die "Typeless over in stack, starting at line "
     . $over->[1]{'start_line'};

  } elsif($over_type eq 'block') {
    unless($curr_open->[-1][1]{'~bitched_about'}) {
      $curr_open->[-1][1]{'~bitched_about'} = 1;
      $self->whine(
        $curr_open->[-1][1]{'start_line'},
        "You can't have =items (as at line "
        . $para->[1]{'start_line'}
        . ") unless the first thing after the =over is an =item"
      );
    }
    # Just turn it into a paragraph and reconsider it
    $para->[0] = '~Para';
    unshift @$paras, $para;
    return 1;

  } elsif($over_type eq 'text') {
    my $item_type = $self->_get_item_type($para);
      # That kills the content of the item if it's a number or bullet.
    DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
    
    if($item_type eq 'text') {
      # Nothing special needs doing for 'text'
    } elsif($item_type eq 'number' or $item_type eq 'bullet') {
      $self->whine(
          $para->[1]{'start_line'},
          "Expected text after =item, not a $item_type"
      );
      # Undo our clobbering:
      push @$para, $para->[1]{'~orig_content'};
      delete $para->[1]{'number'};
       # Only a PROPER item-number element is allowed
       #  to have a number attribute.
    } else {
      die "Unhandled item type $item_type"; # should never happen
    }
    
    # =item-text thingies don't need any assimilation, it seems.

  } elsif($over_type eq 'number') {
    my $item_type = $self->_get_item_type($para);
      # That kills the content of the item if it's a number or bullet.
    DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
    
    my $expected_value = ++ $curr_open->[-1][1]{'~counter'};
    
    if($item_type eq 'bullet') {
      # Hm, it's not numeric.  Correct for this.
      $para->[1]{'number'} = $expected_value;
      $self->whine(
        $para->[1]{'start_line'},
        "Expected '=item $expected_value'"
      );
      push @$para, $para->[1]{'~orig_content'};
        # restore the bullet, blocking the assimilation of next para

    } elsif($item_type eq 'text') {
      # Hm, it's not numeric.  Correct for this.
      $para->[1]{'number'} = $expected_value;
      $self->whine(
        $para->[1]{'start_line'},
        "Expected '=item $expected_value'"
      );
      # Text content will still be there and will block next ~Para

    } elsif($item_type ne 'number') {
      die "Unknown item type $item_type"; # should never happen

    } elsif($expected_value == $para->[1]{'number'}) {
      DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n";
      
    } else {
      DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'},
       " instead of the expected value of $expected_value\n";
      $self->whine(
        $para->[1]{'start_line'},
        "You have '=item " . $para->[1]{'number'} .
        "' instead of the expected '=item $expected_value'"
      );
      $para->[1]{'number'} = $expected_value;  # correcting!!
    }
      
    if(@$para == 2) {
      # For the cases where we /didn't/ push to @$para
      if($paras->[0][0] eq '~Para') {
        DEBUG and print "Assimilating following ~Para content into $over_type item\n";
        push @$para, splice @{shift @$paras},2;
      } else {
        DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
        push @$para, '';  # Just so it's not contentless
      }
    }


  } elsif($over_type eq 'bullet') {
    my $item_type = $self->_get_item_type($para);
      # That kills the content of the item if it's a number or bullet.
    DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
    
    if($item_type eq 'bullet') {
      # as expected!

      if( $para->[1]{'~_freaky_para_hack'} ) {
        DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n";
        push @$para, delete $para->[1]{'~_freaky_para_hack'};
      }

    } elsif($item_type eq 'number') {
      $self->whine(
        $para->[1]{'start_line'},
        "Expected '=item *'"
      );
      push @$para, $para->[1]{'~orig_content'};
       # and block assimilation of the next paragraph
      delete $para->[1]{'number'};
       # Only a PROPER item-number element is allowed
       #  to have a number attribute.
    } elsif($item_type eq 'text') {
      $self->whine(
        $para->[1]{'start_line'},
        "Expected '=item *'"
      );
       # But doesn't need processing.  But it'll block assimilation
       #  of the next para.
    } else {
      die "Unhandled item type $item_type"; # should never happen
    }

    if(@$para == 2) {
      # For the cases where we /didn't/ push to @$para
      if($paras->[0][0] eq '~Para') {
        DEBUG and print "Assimilating following ~Para content into $over_type item\n";
        push @$para, splice @{shift @$paras},2;
      } else {
        DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
        push @$para, '';  # Just so it's not contentless
      }
    }

  } else {
    die "Unhandled =over type \"$over_type\"?";
    # Shouldn't happen!
  }
  $para->[0] .= '-' . $over_type;

  return;
}

sub _ponder_Plain {
  my ($self,$para) = @_;
  DEBUG and print " giving plain treatment...\n";
  unless( @$para == 2 or ( @$para == 3 and $para->[2] eq '' )
    or $para->[1]{'~cooked'}
  ) {
    push @$para,
    @{$self->_make_treelet(
      join("\n", splice(@$para, 2)),
      $para->[1]{'start_line'}
    )};
  }
  # Empty paragraphs don't need a treelet for any reason I can see.
  # And precooked paragraphs already have a treelet.
  return;
}

sub _ponder_Verbatim {
  my ($self,$para) = @_;
  DEBUG and print " giving verbatim treatment...\n";

  $para->[1]{'xml:space'} = 'preserve';

  my $indent = $self->strip_verbatim_indent;
  if ($indent && ref $indent eq 'CODE') {
      my @shifted = (shift @{$para}, shift @{$para});
      $indent = $indent->($para);
      unshift @{$para}, @shifted;
  }

  for(my $i = 2; $i < @$para; $i++) {
    foreach my $line ($para->[$i]) { # just for aliasing
      # Strip indentation.
      $line =~ s/^\Q$indent// if $indent
          && !($self->{accept_codes} && $self->{accept_codes}{VerbatimFormatted});
      while( $line =~
        # Sort of adapted from Text::Tabs -- yes, it's hardwired in that
        # tabs are at every EIGHTH column.  For portability, it has to be
        # one setting everywhere, and 8th wins.
        s/^([^\t]*)(\t+)/$1.(" " x ((length($2)<<3)-(length($1)&7)))/e
      ) {}

      # TODO: whinge about (or otherwise treat) unindented or overlong lines

    }
  }
  
  # Now the VerbatimFormatted hoodoo...
  if( $self->{'accept_codes'} and
      $self->{'accept_codes'}{'VerbatimFormatted'}
  ) {
    while(@$para > 3 and $para->[-1] !~ m/\S/) { pop @$para }
     # Kill any number of terminal newlines
    $self->_verbatim_format($para);
  } elsif ($self->{'codes_in_verbatim'}) {
    push @$para,
    @{$self->_make_treelet(
      join("\n", splice(@$para, 2)),
      $para->[1]{'start_line'}, $para->[1]{'xml:space'}
    )};
    $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines
  } else {
    push @$para, join "\n", splice(@$para, 2) if @$para > 3;
    $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines
  }
  return;
}

sub _ponder_Data {
  my ($self,$para) = @_;
  DEBUG and print " giving data treatment...\n";
  $para->[1]{'xml:space'} = 'preserve';
  push @$para, join "\n", splice(@$para, 2) if @$para > 3;
  return;
}




###########################################################################

sub _traverse_treelet_bit {  # for use only by the routine above
  my($self, $name) = splice @_,0,2;

  my $scratch;
  $self->_handle_element_start(($scratch=$name), shift @_);
  
  while (@_) {
    my $x = shift;
    if (ref($x)) {
      &_traverse_treelet_bit($self, @$x);
    } else {
      $x .= shift while @_ && !ref($_[0]);
      $self->_handle_text($x);
    }
  }
  
  $self->_handle_element_end($scratch=$name);
  return;
}

#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

sub _closers_for_all_curr_open {
  my $self = $_[0];
  my @closers;
  foreach my $still_open (@{  $self->{'curr_open'} || return  }) {
    my @copy = @$still_open;
    $copy[1] = {%{ $copy[1] }};
    #$copy[1]{'start_line'} = -1;
    if($copy[0] eq '=for') {
      $copy[0] = '=end';
    } elsif($copy[0] eq '=over') {
      $self->whine(
        $still_open->[1]{start_line} ,
        "=over without closing =back"
      );

      $copy[0] = '=back';
    } else {
      die "I don't know how to auto-close an open $copy[0] region";
    }

    unless( @copy > 2 ) {
      push @copy, $copy[1]{'target'};
      $copy[-1] = '' unless defined $copy[-1];
       # since =over's don't have targets
    }

    $copy[1]{'fake-closer'} = 1;

    DEBUG and print "Queuing up fake-o event: ", pretty(\@copy), "\n";
    unshift @closers, \@copy;
  }
  return @closers;
}

#--------------------------------------------------------------------------

sub _verbatim_format {
  my($it, $p) = @_;
  
  my $formatting;

  for(my $i = 2; $i < @$p; $i++) { # work backwards over the lines
    DEBUG and print "_verbatim_format appends a newline to $i: $p->[$i]\n";
    $p->[$i] .= "\n";
     # Unlike with simple Verbatim blocks, we don't end up just doing
     # a join("\n", ...) on the contents, so we have to append a
     # newline to ever line, and then nix the last one later.
  }

  if( DEBUG > 4 ) {
    print "<<\n";
    for(my $i = $#$p; $i >= 2; $i--) { # work backwards over the lines
      print "_verbatim_format $i: $p->[$i]";
    }
    print ">>\n";
  }

  for(my $i = $#$p; $i > 2; $i--) {
    # work backwards over the lines, except the first (#2)
    
    #next unless $p->[$i]   =~ m{^#:([ \^\/\%]*)\n?$}s
    #        and $p->[$i-1] !~ m{^#:[ \^\/\%]*\n?$}s;
     # look at a formatty line preceding a nonformatty one
    DEBUG > 5 and print "Scrutinizing line $i: $$p[$i]\n";
    if($p->[$i]   =~ m{^#:([ \^\/\%]*)\n?$}s) {
      DEBUG > 5 and print "  It's a formatty line.  ",
       "Peeking at previous line ", $i-1, ": $$p[$i-1]: \n";
      
      if( $p->[$i-1] =~ m{^#:[ \^\/\%]*\n?$}s ) {
        DEBUG > 5 and print "  Previous line is formatty!  Skipping this one.\n";
        next;
      } else {
        DEBUG > 5 and print "  Previous line is non-formatty!  Yay!\n";
      }
    } else {
      DEBUG > 5 and print "  It's not a formatty line.  Ignoring\n";
      next;
    }

    # A formatty line has to have #: in the first two columns, and uses
    # "^" to mean bold, "/" to mean underline, and "%" to mean bold italic.
    # Example:
    #   What do you want?  i like pie. [or whatever]
    # #:^^^^^^^^^^^^^^^^^              /////////////         
    

    DEBUG > 4 and print "_verbatim_format considers:\n<$p->[$i-1]>\n<$p->[$i]>\n";
    
    $formatting = '  ' . $1;
    $formatting =~ s/\s+$//s; # nix trailing whitespace
    unless(length $formatting and $p->[$i-1] =~ m/\S/) { # no-op
      splice @$p,$i,1; # remove this line
      $i--; # don't consider next line
      next;
    }

    if( length($formatting) >= length($p->[$i-1]) ) {
      $formatting = substr($formatting, 0, length($p->[$i-1]) - 1) . ' ';
    } else {
      $formatting .= ' ' x (length($p->[$i-1]) - length($formatting));
    }
    # Make $formatting and the previous line be exactly the same length,
    # with $formatting having a " " as the last character.
 
    DEBUG > 4 and print "Formatting <$formatting>    on <", $p->[$i-1], ">\n";


    my @new_line;
    while( $formatting =~ m{\G(( +)|(\^+)|(\/+)|(\%+))}g ) {
      #print "Format matches $1\n";

      if($2) {
        #print "SKIPPING <$2>\n";
        push @new_line,
          substr($p->[$i-1], pos($formatting)-length($1), length($1));
      } else {
        #print "SNARING $+\n";
        push @new_line, [
          (
            $3 ? 'VerbatimB'  :
            $4 ? 'VerbatimI'  :
            $5 ? 'VerbatimBI' : die("Should never get called")
          ), {},
          substr($p->[$i-1], pos($formatting)-length($1), length($1))
        ];
        #print "Formatting <$new_line[-1][-1]> as $new_line[-1][0]\n";
      }
    }
    my @nixed =    
      splice @$p, $i-1, 2, @new_line; # replace myself and the next line
    DEBUG > 10 and print "Nixed count: ", scalar(@nixed), "\n";
    
    DEBUG > 6 and print "New version of the above line is these tokens (",
      scalar(@new_line), "):",
      map( ref($_)?"<@$_> ":"<$_>", @new_line ), "\n";
    $i--; # So the next line we scrutinize is the line before the one
          #  that we just went and formatted
  }

  $p->[0] = 'VerbatimFormatted';

  # Collapse adjacent text nodes, just for kicks.
  for( my $i = 2; $i > $#$p; $i++ ) { # work forwards over the tokens except for the last
    if( !ref($p->[$i]) and !ref($p->[$i + 1]) ) {
      DEBUG > 5 and print "_verbatim_format merges {$p->[$i]} and {$p->[$i+1]}\n";
      $p->[$i] .= splice @$p, $i+1, 1; # merge
      --$i;  # and back up
    }
  }

  # Now look for the last text token, and remove the terminal newline
  for( my $i = $#$p; $i >= 2; $i-- ) {
    # work backwards over the tokens, even the first
    if( !ref($p->[$i]) ) {
      if($p->[$i] =~ s/\n$//s) {
        DEBUG > 5 and print "_verbatim_format killed the terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]}\n";
      } else {
        DEBUG > 5 and print
         "No terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]} !?\n";
      }
      last; # we only want the next one
    }
  }

  return;
}


#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@


sub _treelet_from_formatting_codes {
  # Given a paragraph, returns a treelet.  Full of scary tokenizing code.
  #  Like [ '~Top', {'start_line' => $start_line},
  #            "I like ",
  #            [ 'B', {}, "pie" ],
  #            "!"
  #       ]
  
  my($self, $para, $start_line, $preserve_space) = @_;
  
  my $treelet = ['~Top', {'start_line' => $start_line},];
  
  unless ($preserve_space || $self->{'preserve_whitespace'}) {
    $para =~ s/\s+/ /g; # collapse and trim all whitespace first.
    $para =~ s/ $//;
    $para =~ s/^ //;
  }
  
  # Only apparent problem the above code is that N<<  >> turns into
  # N<< >>.  But then, word wrapping does that too!  So don't do that!
  
  my @stack;
  my @lineage = ($treelet);
  my $raw = ''; # raw content of L<> fcode before splitting/processing
    # XXX 'raw' is not 100% accurate: all surrounding whitespace is condensed
    # into just 1 ' '. Is this the regex's doing or 'raw's?
  my $inL = 0;

  DEBUG > 4 and print "Paragraph:\n$para\n\n";
 
  # Here begins our frightening tokenizer RE.  The following regex matches
  # text in four main parts:
  #
  #  * Start-codes.  The first alternative matches C< or C<<, the latter
  #    followed by some whitespace.  $1 will hold the entire start code
  #    (including any space following a multiple-angle-bracket delimiter),
  #    and $2 will hold only the additional brackets past the first in a
  #    multiple-bracket delimiter.  length($2) + 1 will be the number of
  #    closing brackets we have to find.
  #
  #  * Closing brackets.  Match some amount of whitespace followed by
  #    multiple close brackets.  The logic to see if this closes anything
  #    is down below.  Note that in order to parse C<<  >> correctly, we
  #    have to use look-behind (?<=\s\s), since the match of the starting
  #    code will have consumed the whitespace.
  #
  #  * A single closing bracket, to close a simple code like C<>.
  #
  #  * Something that isn't a start or end code.  We have to be careful
  #    about accepting whitespace, since perlpodspec says that any whitespace
  #    before a multiple-bracket closing delimiter should be ignored.
  #
  while($para =~
    m/\G
      (?:
        # Match starting codes, including the whitespace following a
        # multiple-delimiter start code.  $1 gets the whole start code and
        # $2 gets all but one of the <s in the multiple-bracket case.
        ([A-Z]<(?:(<+)\s+)?)
        |
        # Match multiple-bracket end codes.  $3 gets the whitespace that
        # should be discarded before an end bracket but kept in other cases
        # and $4 gets the end brackets themselves.
        (\s+|(?<=\s\s))(>{2,})
        |
        (\s?>)          # $5: simple end-codes
        |
        (               # $6: stuff containing no start-codes or end-codes
          (?:
            [^A-Z\s>]
            |
            (?:
              [A-Z](?!<)
            )
            |
            # whitespace is ok, but we don't want to eat the whitespace before
            # a multiple-bracket end code.
            # NOTE: we may still have problems with e.g. S<<    >>
            (?:
              \s(?!\s*>{2,})
            )
          )+
        )
      )
    /xgo
  ) {
    DEBUG > 4 and print "\nParagraphic tokenstack = (@stack)\n";
    if(defined $1) {
      if(defined $2) {
        DEBUG > 3 and print "Found complex start-text code \"$1\"\n";
        push @stack, length($2) + 1; 
          # length of the necessary complex end-code string
      } else {
        DEBUG > 3 and print "Found simple start-text code \"$1\"\n";
        push @stack, 0;  # signal that we're looking for simple
      }
      push @lineage, [ substr($1,0,1), {}, ];  # new node object
      push @{ $lineage[-2] }, $lineage[-1];
      if ('L' eq substr($1,0,1)) {
        $raw = $inL ? $raw.$1 : ''; # reset raw content accumulator
        $inL = 1;
      } else {
        $raw .= $1 if $inL;
      }

    } elsif(defined $4) {
      DEBUG > 3 and print "Found apparent complex end-text code \"$3$4\"\n";
      # This is where it gets messy...
      if(! @stack) {
        # We saw " >>>>" but needed nothing.  This is ALL just stuff then.
        DEBUG > 4 and print " But it's really just stuff.\n";
        push @{ $lineage[-1] }, $3, $4;
        next;
      } elsif(!$stack[-1]) {
        # We saw " >>>>" but needed only ">".  Back pos up.
        DEBUG > 4 and print " And that's more than we needed to close simple.\n";
        push @{ $lineage[-1] }, $3; # That was a for-real space, too.
        pos($para) = pos($para) - length($4) + 1;
      } elsif($stack[-1] == length($4)) {
        # We found " >>>>", and it was exactly what we needed.  Commonest case.
        DEBUG > 4 and print " And that's exactly what we needed to close complex.\n";
      } elsif($stack[-1] < length($4)) {
        # We saw " >>>>" but needed only " >>".  Back pos up.
        DEBUG > 4 and print " And that's more than we needed to close complex.\n";
        pos($para) = pos($para) - length($4) + $stack[-1];
      } else {
        # We saw " >>>>" but needed " >>>>>>".  So this is all just stuff!
        DEBUG > 4 and print " But it's really just stuff, because we needed more.\n";
        push @{ $lineage[-1] }, $3, $4;
        next;
      }
      #print "\nHOOBOY ", scalar(@{$lineage[-1]}), "!!!\n";

      push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] };
      # Keep the element from being childless
      
      pop @stack;
      pop @lineage;

      unless (@stack) { # not in an L if there are no open fcodes
        $inL = 0;
        if (ref $lineage[-1][-1] && $lineage[-1][-1][0] eq 'L') {
          $lineage[-1][-1][1]{'raw'} = $raw
        }
      }
      $raw .= $3.$4 if $inL;
      
    } elsif(defined $5) {
      DEBUG > 3 and print "Found apparent simple end-text code \"$5\"\n";

      if(@stack and ! $stack[-1]) {
        # We're indeed expecting a simple end-code
        DEBUG > 4 and print " It's indeed an end-code.\n";

        if(length($5) == 2) { # There was a space there: " >"
          push @{ $lineage[-1] }, ' ';
        } elsif( 2 == @{ $lineage[-1] } ) { # Closing a childless element
          push @{ $lineage[-1] }, ''; # keep it from being really childless
        }

        pop @stack;
        pop @lineage;
      } else {
        DEBUG > 4 and print " It's just stuff.\n";
        push @{ $lineage[-1] }, $5;
      }

      unless (@stack) { # not in an L if there are no open fcodes
        $inL = 0;
        if (ref $lineage[-1][-1] && $lineage[-1][-1][0] eq 'L') {
          $lineage[-1][-1][1]{'raw'} = $raw
        }
      }
      $raw .= $5 if $inL;

    } elsif(defined $6) {
      DEBUG > 3 and print "Found stuff \"$6\"\n";
      push @{ $lineage[-1] }, $6;
      $raw .= $6 if $inL;
        # XXX does not capture multiplace whitespaces -- 'raw' ends up with
        #     at most 1 leading/trailing whitespace, why not all of it?

    } else {
      # should never ever ever ever happen
      DEBUG and print "AYYAYAAAAA at line ", __LINE__, "\n";
      die "SPORK 512512!";
    }
  }

  if(@stack) { # Uhoh, some sequences weren't closed.
    my $x= "...";
    while(@stack) {
      push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] };
      # Hmmmmm!

      my $code         = (pop @lineage)->[0];
      my $ender_length =  pop @stack;
      if($ender_length) {
        --$ender_length;
        $x = $code . ("<" x $ender_length) . " $x " . (">" x $ender_length);
      } else {
        $x = $code . "<$x>";
      }
    }
    DEBUG > 1 and print "Unterminated $x sequence\n";
    $self->whine($start_line,
      "Unterminated $x sequence",
    );
  }

  return $treelet;
}

#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

sub text_content_of_treelet {  # method: $parser->text_content_of_treelet($lol)
  return stringify_lol($_[1]);
}

sub stringify_lol {  # function: stringify_lol($lol)
  my $string_form = '';
  _stringify_lol( $_[0] => \$string_form );
  return $string_form;
}

sub _stringify_lol {  # the real recursor
  my($lol, $to) = @_;
  for(my $i = 2; $i < @$lol; ++$i) {
    if( ref($lol->[$i] || '') and UNIVERSAL::isa($lol->[$i], 'ARRAY') ) {
      _stringify_lol( $lol->[$i], $to);  # recurse!
    } else {
      $$to .= $lol->[$i];
    }
  }
  return;
}

#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

sub _dump_curr_open { # return a string representation of the stack
  my $curr_open = $_[0]{'curr_open'};

  return '[empty]' unless @$curr_open;
  return join '; ',
    map {;
           ($_->[0] eq '=for')
             ? ( ($_->[1]{'~really'} || '=over')
               . ' ' . $_->[1]{'target'})
             : $_->[0]
        }
    @$curr_open
  ;
}

###########################################################################
my %pretty_form = (
  "\a" => '\a', # ding!
  "\b" => '\b', # BS
  "\e" => '\e', # ESC
  "\f" => '\f', # FF
  "\t" => '\t', # tab
  "\cm" => '\cm',
  "\cj" => '\cj',
  "\n" => '\n', # probably overrides one of either \cm or \cj
  '"' => '\"',
  '\\' => '\\\\',
  '$' => '\\$',
  '@' => '\\@',
  '%' => '\\%',
  '#' => '\\#',
);

sub pretty { # adopted from Class::Classless
  # Not the most brilliant routine, but passable.
  # Don't give it a cyclic data structure!
  my @stuff = @_; # copy
  my $x;
  my $out =
    # join ",\n" .
    join ", ",
    map {;
    if(!defined($_)) {
      "undef";
    } elsif(ref($_) eq 'ARRAY' or ref($_) eq 'Pod::Simple::LinkSection') {
      $x = "[ " . pretty(@$_) . " ]" ;
      $x;
    } elsif(ref($_) eq 'SCALAR') {
      $x = "\\" . pretty($$_) ;
      $x;
    } elsif(ref($_) eq 'HASH') {
      my $hr = $_;
      $x = "{" . join(", ",
        map(pretty($_) . '=>' . pretty($hr->{$_}),
            sort keys %$hr ) ) . "}" ;
      $x;
    } elsif(!length($_)) { q{''} # empty string
    } elsif(
      $_ eq '0' # very common case
      or(
         m/^-?(?:[123456789]\d*|0)(?:\.\d+)?$/s
         and $_ ne '-0' # the strange case that that RE lets thru
      )
    ) { $_;
    } else {
      if( chr(65) eq 'A' ) {
        s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])>
         #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg;
         <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg;
      } else {
        # We're in some crazy non-ASCII world!
        s<([^abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])>
         #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg;
         <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg;
      }
      qq{"$_"};
    }
  } @stuff;
  # $out =~ s/\n */ /g if length($out) < 75;
  return $out;
}

#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

# A rather unsubtle method of blowing away all the state information
# from a parser object so it can be reused. Provided as a utility for
# backward compatibility in Pod::Man, etc. but not recommended for
# general use.

sub reinit {
  my $self = shift;
  foreach (qw(source_dead source_filename doc_has_started
start_of_pod_block content_seen last_was_blank paras curr_open
line_count pod_para_count in_pod ~tried_gen_errata errata errors_seen
Title)) {

    delete $self->{$_};
  }
}

#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1;
Name	Type	Size	Permission
BlackBox.pm	File	64.69 KB	0644
Checker.pm	File	5.24 KB	0644
Debug.pm	File	4.5 KB	0644
DumpAsText.pm	File	3.92 KB	0644
DumpAsXML.pm	File	4.29 KB	0644
HTML.pm	File	32.69 KB	0644
HTMLBatch.pm	File	39.55 KB	0644
HTMLLegacy.pm	File	2.69 KB	0644
LinkSection.pm	File	4.24 KB	0644
Methody.pm	File	3.48 KB	0644
Progress.pm	File	2.36 KB	0644
PullParser.pm	File	24.76 KB	0644
PullParserEndToken.pm	File	2.81 KB	0644
PullParserStartToken.pm	File	4.04 KB	0644
PullParserTextToken.pm	File	3.08 KB	0644
PullParserToken.pm	File	3.89 KB	0644
RTF.pm	File	21.14 KB	0644
Search.pm	File	32.7 KB	0644
SimpleTree.pm	File	4.47 KB	0644
Subclassing.pod	File	32.33 KB	0644
Text.pm	File	5.01 KB	0644
TextContent.pm	File	2.48 KB	0644
TiedOutFH.pm	File	2.67 KB	0644
Transcode.pm	File	699 B	0644
TranscodeDumb.pm	File	1.16 KB	0644
TranscodeSmart.pm	File	715 B	0644
XHTML.pm	File	24.93 KB	0644
XMLOutStream.pm	File	4.86 KB	0644
[ Avaa Bypassed ]

Upload:

Command:

Filemanager

Server Info

System Info