forked from openkylin/libxml-sax-base-perl
848 lines
28 KiB
Perl
848 lines
28 KiB
Perl
#!/usr/bin/perl
|
|
#
|
|
# This file is used to generate lib/XML/SAX/Base.pm. There is a pre-generated
|
|
# Base.pm file included in the distribution so you don't need to run this
|
|
# script unless you are attempting to modify the code.
|
|
#
|
|
# The code in this file was adapted from the Makefile.PL when XML::SAX::Base
|
|
# was split back out into its own distribution.
|
|
#
|
|
# You can manually run this file:
|
|
#
|
|
# perl ./BuildSAXBase.pl
|
|
#
|
|
# or better yet it will be invoked by automatically Dist::Zilla when building
|
|
# a release from the git repository.
|
|
#
|
|
# dzil build
|
|
#
|
|
|
|
package SAX::Base::Builder;
|
|
|
|
use strict;
|
|
use warnings;
|
|
|
|
use File::Spec;
|
|
|
|
write_xml_sax_base() unless caller();
|
|
|
|
sub build_xml_sax_base {
|
|
my $code = <<'EOHEADER';
|
|
package XML::SAX::Base;
|
|
|
|
# version 0.10 - Kip Hampton <khampton@totalcinema.com>
|
|
# version 0.13 - Robin Berjon <robin@knowscape.com>
|
|
# version 0.15 - Kip Hampton <khampton@totalcinema.com>
|
|
# version 0.17 - Kip Hampton <khampton@totalcinema.com>
|
|
# version 0.19 - Kip Hampton <khampton@totalcinema.com>
|
|
# version 0.21 - Kip Hampton <khampton@totalcinema.com>
|
|
# version 0.22 - Robin Berjon <robin@knowscape.com>
|
|
# version 0.23 - Matt Sergeant <matt@sergeant.org>
|
|
# version 0.24 - Robin Berjon <robin@knowscape.com>
|
|
# version 0.25 - Kip Hampton <khampton@totalcinema.com>
|
|
# version 1.00 - Kip Hampton <khampton@totalcinema.com>
|
|
# version 1.01 - Kip Hampton <khampton@totalcinema.com>
|
|
# version 1.02 - Robin Berjon <robin@knowscape.com>
|
|
# version 1.03 - Matt Sergeant <matt@sergeant.org>
|
|
# version 1.04 - Kip Hampton <khampton@totalcinema.com>
|
|
# version 1.05 - Grant McLean <grantm@cpan.org>
|
|
# version 1.06 - Grant McLean <grantm@cpan.org>
|
|
# version 1.07 - Grant McLean <grantm@cpan.org>
|
|
# version 1.08 - Grant McLean <grantm@cpan.org>
|
|
|
|
#-----------------------------------------------------#
|
|
# STOP!!!!!
|
|
#
|
|
# This file is generated by the 'BuildSAXBase.pl' file
|
|
# that ships with the XML::SAX::Base distribution.
|
|
# If you need to make changes, patch that file NOT
|
|
# XML/SAX/Base.pm Better yet, fork the git repository
|
|
# commit your changes and send a pull request:
|
|
# https://github.com/grantm/XML-SAX-Base
|
|
#-----------------------------------------------------#
|
|
|
|
use strict;
|
|
|
|
use XML::SAX::Exception qw();
|
|
|
|
EOHEADER
|
|
|
|
my %EVENT_SPEC = (
|
|
start_document => [qw(ContentHandler DocumentHandler Handler)],
|
|
end_document => [qw(ContentHandler DocumentHandler Handler)],
|
|
start_element => [qw(ContentHandler DocumentHandler Handler)],
|
|
end_element => [qw(ContentHandler DocumentHandler Handler)],
|
|
characters => [qw(ContentHandler DocumentHandler Handler)],
|
|
processing_instruction => [qw(ContentHandler DocumentHandler Handler)],
|
|
ignorable_whitespace => [qw(ContentHandler DocumentHandler Handler)],
|
|
set_document_locator => [qw(ContentHandler DocumentHandler Handler)],
|
|
start_prefix_mapping => [qw(ContentHandler Handler)],
|
|
end_prefix_mapping => [qw(ContentHandler Handler)],
|
|
skipped_entity => [qw(ContentHandler Handler)],
|
|
start_cdata => [qw(DocumentHandler LexicalHandler Handler)],
|
|
end_cdata => [qw(DocumentHandler LexicalHandler Handler)],
|
|
comment => [qw(DocumentHandler LexicalHandler Handler)],
|
|
entity_reference => [qw(DocumentHandler Handler)],
|
|
notation_decl => [qw(DTDHandler Handler)],
|
|
unparsed_entity_decl => [qw(DTDHandler Handler)],
|
|
element_decl => [qw(DeclHandler Handler)],
|
|
attlist_decl => [qw(DTDHandler Handler)],
|
|
doctype_decl => [qw(DTDHandler Handler)],
|
|
xml_decl => [qw(DTDHandler Handler)],
|
|
entity_decl => [qw(DTDHandler Handler)],
|
|
attribute_decl => [qw(DeclHandler Handler)],
|
|
internal_entity_decl => [qw(DeclHandler Handler)],
|
|
external_entity_decl => [qw(DeclHandler Handler)],
|
|
resolve_entity => [qw(EntityResolver Handler)],
|
|
start_dtd => [qw(LexicalHandler Handler)],
|
|
end_dtd => [qw(LexicalHandler Handler)],
|
|
start_entity => [qw(LexicalHandler Handler)],
|
|
end_entity => [qw(LexicalHandler Handler)],
|
|
warning => [qw(ErrorHandler Handler)],
|
|
error => [qw(ErrorHandler Handler)],
|
|
fatal_error => [qw(ErrorHandler Handler)],
|
|
);
|
|
|
|
for my $ev (keys %EVENT_SPEC) {
|
|
$code .= <<" EOTOPCODE";
|
|
sub $ev {
|
|
my \$self = shift;
|
|
if (defined \$self->{Methods}->{'$ev'}) {
|
|
\$self->{Methods}->{'$ev'}->(\@_);
|
|
}
|
|
else {
|
|
my \$method;
|
|
my \$callbacks;
|
|
if (exists \$self->{ParseOptions}) {
|
|
\$callbacks = \$self->{ParseOptions};
|
|
}
|
|
else {
|
|
\$callbacks = \$self;
|
|
}
|
|
if (0) { # dummy to make elsif's below compile
|
|
}
|
|
EOTOPCODE
|
|
|
|
my ($can_string, $aload_string);
|
|
for my $h (@{$EVENT_SPEC{$ev}}) {
|
|
$can_string .= <<" EOCANBLOCK";
|
|
elsif (defined \$callbacks->{'$h'} and \$method = \$callbacks->{'$h'}->can('$ev') ) {
|
|
my \$handler = \$callbacks->{'$h'};
|
|
\$self->{Methods}->{'$ev'} = sub { \$method->(\$handler, \@_) };
|
|
return \$method->(\$handler, \@_);
|
|
}
|
|
EOCANBLOCK
|
|
$aload_string .= <<" EOALOADBLOCK";
|
|
elsif (defined \$callbacks->{'$h'}
|
|
and \$callbacks->{'$h'}->can('AUTOLOAD')
|
|
and \$callbacks->{'$h'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '')
|
|
)
|
|
{
|
|
my \$res = eval { \$callbacks->{'$h'}->$ev(\@_) };
|
|
if (\$@) {
|
|
die \$@;
|
|
}
|
|
else {
|
|
# I think there's a buggette here...
|
|
# if the first call throws an exception, we don't set it up right.
|
|
# Not fatal, but we might want to address it.
|
|
my \$handler = \$callbacks->{'$h'};
|
|
\$self->{Methods}->{'$ev'} = sub { \$handler->$ev(\@_) };
|
|
}
|
|
return \$res;
|
|
}
|
|
EOALOADBLOCK
|
|
}
|
|
|
|
$code .= $can_string . $aload_string;
|
|
|
|
$code .= <<" EOFALLTHROUGH";
|
|
else {
|
|
\$self->{Methods}->{'$ev'} = sub { };
|
|
}
|
|
}
|
|
EOFALLTHROUGH
|
|
|
|
$code .= "\n}\n\n";
|
|
}
|
|
|
|
$code .= <<'BODY';
|
|
#-------------------------------------------------------------------#
|
|
# Class->new(%options)
|
|
#-------------------------------------------------------------------#
|
|
sub new {
|
|
my $proto = shift;
|
|
my $class = ref($proto) || $proto;
|
|
my $options = ($#_ == 0) ? shift : { @_ };
|
|
|
|
unless ( defined( $options->{Handler} ) or
|
|
defined( $options->{ContentHandler} ) or
|
|
defined( $options->{DTDHandler} ) or
|
|
defined( $options->{DocumentHandler} ) or
|
|
defined( $options->{LexicalHandler} ) or
|
|
defined( $options->{ErrorHandler} ) or
|
|
defined( $options->{DeclHandler} ) ) {
|
|
|
|
$options->{Handler} = XML::SAX::Base::NoHandler->new;
|
|
}
|
|
|
|
my $self = bless $options, $class;
|
|
# turn NS processing on by default
|
|
$self->set_feature('http://xml.org/sax/features/namespaces', 1);
|
|
return $self;
|
|
}
|
|
#-------------------------------------------------------------------#
|
|
|
|
#-------------------------------------------------------------------#
|
|
# $p->parse(%options)
|
|
#-------------------------------------------------------------------#
|
|
sub parse {
|
|
my $self = shift;
|
|
my $parse_options = $self->get_options(@_);
|
|
local $self->{ParseOptions} = $parse_options;
|
|
if ($self->{Parent}) { # calling parse on a filter for some reason
|
|
return $self->{Parent}->parse($parse_options);
|
|
}
|
|
else {
|
|
my $method;
|
|
if (defined $parse_options->{Source}{CharacterStream} and $method = $self->can('_parse_characterstream')) {
|
|
warn("parse charstream???\n");
|
|
return $method->($self, $parse_options->{Source}{CharacterStream});
|
|
}
|
|
elsif (defined $parse_options->{Source}{ByteStream} and $method = $self->can('_parse_bytestream')) {
|
|
return $method->($self, $parse_options->{Source}{ByteStream});
|
|
}
|
|
elsif (defined $parse_options->{Source}{String} and $method = $self->can('_parse_string')) {
|
|
return $method->($self, $parse_options->{Source}{String});
|
|
}
|
|
elsif (defined $parse_options->{Source}{SystemId} and $method = $self->can('_parse_systemid')) {
|
|
return $method->($self, $parse_options->{Source}{SystemId});
|
|
}
|
|
else {
|
|
die "No _parse_* routine defined on this driver (If it is a filter, remember to set the Parent property. If you call the parse() method, make sure to set a Source. You may want to call parse_uri, parse_string or parse_file instead.) [$self]";
|
|
}
|
|
}
|
|
}
|
|
#-------------------------------------------------------------------#
|
|
|
|
#-------------------------------------------------------------------#
|
|
# $p->parse_file(%options)
|
|
#-------------------------------------------------------------------#
|
|
sub parse_file {
|
|
my $self = shift;
|
|
my $file = shift;
|
|
return $self->parse_uri($file, @_) if ref(\$file) eq 'SCALAR';
|
|
my $parse_options = $self->get_options(@_);
|
|
$parse_options->{Source}{ByteStream} = $file;
|
|
return $self->parse($parse_options);
|
|
}
|
|
#-------------------------------------------------------------------#
|
|
|
|
#-------------------------------------------------------------------#
|
|
# $p->parse_uri(%options)
|
|
#-------------------------------------------------------------------#
|
|
sub parse_uri {
|
|
my $self = shift;
|
|
my $file = shift;
|
|
my $parse_options = $self->get_options(@_);
|
|
$parse_options->{Source}{SystemId} = $file;
|
|
return $self->parse($parse_options);
|
|
}
|
|
#-------------------------------------------------------------------#
|
|
|
|
#-------------------------------------------------------------------#
|
|
# $p->parse_string(%options)
|
|
#-------------------------------------------------------------------#
|
|
sub parse_string {
|
|
my $self = shift;
|
|
my $string = shift;
|
|
my $parse_options = $self->get_options(@_);
|
|
$parse_options->{Source}{String} = $string;
|
|
return $self->parse($parse_options);
|
|
}
|
|
#-------------------------------------------------------------------#
|
|
|
|
#-------------------------------------------------------------------#
|
|
# get_options
|
|
#-------------------------------------------------------------------#
|
|
sub get_options {
|
|
my $self = shift;
|
|
|
|
if (@_ == 1) {
|
|
return { %$self, %{$_[0]} };
|
|
} else {
|
|
return { %$self, @_ };
|
|
}
|
|
}
|
|
#-------------------------------------------------------------------#
|
|
|
|
#-------------------------------------------------------------------#
|
|
# get_features
|
|
#-------------------------------------------------------------------#
|
|
sub get_features {
|
|
return (
|
|
'http://xml.org/sax/features/external-general-entities' => undef,
|
|
'http://xml.org/sax/features/external-parameter-entities' => undef,
|
|
'http://xml.org/sax/features/is-standalone' => undef,
|
|
'http://xml.org/sax/features/lexical-handler' => undef,
|
|
'http://xml.org/sax/features/parameter-entities' => undef,
|
|
'http://xml.org/sax/features/namespaces' => 1,
|
|
'http://xml.org/sax/features/namespace-prefixes' => 0,
|
|
'http://xml.org/sax/features/string-interning' => undef,
|
|
'http://xml.org/sax/features/use-attributes2' => undef,
|
|
'http://xml.org/sax/features/use-locator2' => undef,
|
|
'http://xml.org/sax/features/validation' => undef,
|
|
|
|
'http://xml.org/sax/properties/dom-node' => undef,
|
|
'http://xml.org/sax/properties/xml-string' => undef,
|
|
);
|
|
}
|
|
#-------------------------------------------------------------------#
|
|
|
|
#-------------------------------------------------------------------#
|
|
# get_feature
|
|
#-------------------------------------------------------------------#
|
|
sub get_feature {
|
|
my $self = shift;
|
|
my $feat = shift;
|
|
|
|
# check %FEATURES to see if it's there, and return it if so
|
|
# throw XML::SAX::Exception::NotRecognized if it's not there
|
|
# throw XML::SAX::Exception::NotSupported if it's there but we
|
|
# don't support it
|
|
|
|
my %features = $self->get_features();
|
|
if (exists $features{$feat}) {
|
|
my %supported = map { $_ => 1 } $self->supported_features();
|
|
if ($supported{$feat}) {
|
|
return $self->{__PACKAGE__ . "::Features"}{$feat};
|
|
}
|
|
throw XML::SAX::Exception::NotSupported(
|
|
Message => "The feature '$feat' is not supported by " . ref($self),
|
|
Exception => undef,
|
|
);
|
|
}
|
|
throw XML::SAX::Exception::NotRecognized(
|
|
Message => "The feature '$feat' is not recognized by " . ref($self),
|
|
Exception => undef,
|
|
);
|
|
}
|
|
#-------------------------------------------------------------------#
|
|
|
|
#-------------------------------------------------------------------#
|
|
# set_feature
|
|
#-------------------------------------------------------------------#
|
|
sub set_feature {
|
|
my $self = shift;
|
|
my $feat = shift;
|
|
my $value = shift;
|
|
# check %FEATURES to see if it's there, and set it if so
|
|
# throw XML::SAX::Exception::NotRecognized if it's not there
|
|
# throw XML::SAX::Exception::NotSupported if it's there but we
|
|
# don't support it
|
|
|
|
my %features = $self->get_features();
|
|
if (exists $features{$feat}) {
|
|
my %supported = map { $_ => 1 } $self->supported_features();
|
|
if ($supported{$feat}) {
|
|
return $self->{__PACKAGE__ . "::Features"}{$feat} = $value;
|
|
}
|
|
throw XML::SAX::Exception::NotSupported(
|
|
Message => "The feature '$feat' is not supported by " . ref($self),
|
|
Exception => undef,
|
|
);
|
|
}
|
|
throw XML::SAX::Exception::NotRecognized(
|
|
Message => "The feature '$feat' is not recognized by " . ref($self),
|
|
Exception => undef,
|
|
);
|
|
}
|
|
#-------------------------------------------------------------------#
|
|
|
|
#-------------------------------------------------------------------#
|
|
# get_handler and friends
|
|
#-------------------------------------------------------------------#
|
|
sub get_handler {
|
|
my $self = shift;
|
|
my $handler_type = shift;
|
|
$handler_type ||= 'Handler';
|
|
return defined( $self->{$handler_type} ) ? $self->{$handler_type} : undef;
|
|
}
|
|
|
|
sub get_document_handler {
|
|
my $self = shift;
|
|
return $self->get_handler('DocumentHandler', @_);
|
|
}
|
|
|
|
sub get_content_handler {
|
|
my $self = shift;
|
|
return $self->get_handler('ContentHandler', @_);
|
|
}
|
|
|
|
sub get_dtd_handler {
|
|
my $self = shift;
|
|
return $self->get_handler('DTDHandler', @_);
|
|
}
|
|
|
|
sub get_lexical_handler {
|
|
my $self = shift;
|
|
return $self->get_handler('LexicalHandler', @_);
|
|
}
|
|
|
|
sub get_decl_handler {
|
|
my $self = shift;
|
|
return $self->get_handler('DeclHandler', @_);
|
|
}
|
|
|
|
sub get_error_handler {
|
|
my $self = shift;
|
|
return $self->get_handler('ErrorHandler', @_);
|
|
}
|
|
|
|
sub get_entity_resolver {
|
|
my $self = shift;
|
|
return $self->get_handler('EntityResolver', @_);
|
|
}
|
|
#-------------------------------------------------------------------#
|
|
|
|
#-------------------------------------------------------------------#
|
|
# set_handler and friends
|
|
#-------------------------------------------------------------------#
|
|
sub set_handler {
|
|
my $self = shift;
|
|
my ($new_handler, $handler_type) = reverse @_;
|
|
$handler_type ||= 'Handler';
|
|
$self->{Methods} = {} if $self->{Methods};
|
|
$self->{$handler_type} = $new_handler;
|
|
$self->{ParseOptions}->{$handler_type} = $new_handler;
|
|
return 1;
|
|
}
|
|
|
|
sub set_document_handler {
|
|
my $self = shift;
|
|
return $self->set_handler('DocumentHandler', @_);
|
|
}
|
|
|
|
sub set_content_handler {
|
|
my $self = shift;
|
|
return $self->set_handler('ContentHandler', @_);
|
|
}
|
|
sub set_dtd_handler {
|
|
my $self = shift;
|
|
return $self->set_handler('DTDHandler', @_);
|
|
}
|
|
sub set_lexical_handler {
|
|
my $self = shift;
|
|
return $self->set_handler('LexicalHandler', @_);
|
|
}
|
|
sub set_decl_handler {
|
|
my $self = shift;
|
|
return $self->set_handler('DeclHandler', @_);
|
|
}
|
|
sub set_error_handler {
|
|
my $self = shift;
|
|
return $self->set_handler('ErrorHandler', @_);
|
|
}
|
|
sub set_entity_resolver {
|
|
my $self = shift;
|
|
return $self->set_handler('EntityResolver', @_);
|
|
}
|
|
|
|
#-------------------------------------------------------------------#
|
|
|
|
#-------------------------------------------------------------------#
|
|
# supported_features
|
|
#-------------------------------------------------------------------#
|
|
sub supported_features {
|
|
my $self = shift;
|
|
# Only namespaces are required by all parsers
|
|
return (
|
|
'http://xml.org/sax/features/namespaces',
|
|
);
|
|
}
|
|
#-------------------------------------------------------------------#
|
|
|
|
sub no_op {
|
|
# this space intentionally blank
|
|
}
|
|
|
|
|
|
package XML::SAX::Base::NoHandler;
|
|
|
|
# we need a fake handler that doesn't implement anything, this
|
|
# simplifies the code a lot (though given the recent changes,
|
|
# it may be better to do without)
|
|
sub new {
|
|
#warn "no handler called\n";
|
|
return bless {};
|
|
}
|
|
|
|
1;
|
|
|
|
BODY
|
|
|
|
$code .= "__END__\n";
|
|
|
|
$code .= <<'FOOTER';
|
|
|
|
=head1 NAME
|
|
|
|
XML::SAX::Base - Base class SAX Drivers and Filters
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
package MyFilter;
|
|
use XML::SAX::Base;
|
|
@ISA = ('XML::SAX::Base');
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
This module has a very simple task - to be a base class for PerlSAX
|
|
drivers and filters. It's default behaviour is to pass the input directly
|
|
to the output unchanged. It can be useful to use this module as a base class
|
|
so you don't have to, for example, implement the characters() callback.
|
|
|
|
The main advantages that it provides are easy dispatching of events the right
|
|
way (ie it takes care for you of checking that the handler has implemented
|
|
that method, or has defined an AUTOLOAD), and the guarantee that filters
|
|
will pass along events that they aren't implementing to handlers downstream
|
|
that might nevertheless be interested in them.
|
|
|
|
=head1 WRITING SAX DRIVERS AND FILTERS
|
|
|
|
The Perl Sax API Reference is at L<http://perl-xml.sourceforge.net/perl-sax/>.
|
|
|
|
Writing SAX Filters is tremendously easy: all you need to do is
|
|
inherit from this module, and define the events you want to handle. A
|
|
more detailed explanation can be found at
|
|
http://www.xml.com/pub/a/2001/10/10/sax-filters.html.
|
|
|
|
Writing Drivers is equally simple. The one thing you need to pay
|
|
attention to is B<NOT> to call events yourself (this applies to Filters
|
|
as well). For instance:
|
|
|
|
package MyFilter;
|
|
use base qw(XML::SAX::Base);
|
|
|
|
sub start_element {
|
|
my $self = shift;
|
|
my $data = shift;
|
|
# do something
|
|
$self->{Handler}->start_element($data); # BAD
|
|
}
|
|
|
|
The above example works well as precisely that: an example. But it has
|
|
several faults: 1) it doesn't test to see whether the handler defines
|
|
start_element. Perhaps it doesn't want to see that event, in which
|
|
case you shouldn't throw it (otherwise it'll die). 2) it doesn't check
|
|
ContentHandler and then Handler (ie it doesn't look to see that the
|
|
user hasn't requested events on a specific handler, and if not on the
|
|
default one), 3) if it did check all that, not only would the code be
|
|
cumbersome (see this module's source to get an idea) but it would also
|
|
probably have to check for a DocumentHandler (in case this were SAX1)
|
|
and for AUTOLOADs potentially defined in all these packages. As you can
|
|
tell, that would be fairly painful. Instead of going through that,
|
|
simply remember to use code similar to the following instead:
|
|
|
|
package MyFilter;
|
|
use base qw(XML::SAX::Base);
|
|
|
|
sub start_element {
|
|
my $self = shift;
|
|
my $data = shift;
|
|
# do something to filter
|
|
$self->SUPER::start_element($data); # GOOD (and easy) !
|
|
}
|
|
|
|
This way, once you've done your job you hand the ball back to
|
|
XML::SAX::Base and it takes care of all those problems for you!
|
|
|
|
Note that the above example doesn't apply to filters only, drivers
|
|
will benefit from the exact same feature.
|
|
|
|
=head1 METHODS
|
|
|
|
A number of methods are defined within this class for the purpose of
|
|
inheritance. Some probably don't need to be overridden (eg parse_file)
|
|
but some clearly should be (eg parse). Options for these methods are
|
|
described in the PerlSAX2 specification available from
|
|
http://cvs.sourceforge.net/cgi-bin/viewcvs.cgi/~checkout~/perl-xml/libxml-perl/doc/sax-2.0.html?rev=HEAD&content-type=text/html.
|
|
|
|
=over 4
|
|
|
|
=item * parse
|
|
|
|
The parse method is the main entry point to parsing documents. Internally
|
|
the parse method will detect what type of "thing" you are parsing, and
|
|
call the appropriate method in your implementation class. Here is the
|
|
mapping table of what is in the Source options (see the Perl SAX 2.0
|
|
specification for the meaning of these values):
|
|
|
|
Source Contains parse() calls
|
|
=============== =============
|
|
CharacterStream (*) _parse_characterstream($stream, $options)
|
|
ByteStream _parse_bytestream($stream, $options)
|
|
String _parse_string($string, $options)
|
|
SystemId _parse_systemid($string, $options)
|
|
|
|
However note that these methods may not be sensible if your driver class
|
|
is not for parsing XML. An example might be a DBI driver that generates
|
|
XML/SAX from a database table. If that is the case, you likely want to
|
|
write your own parse() method.
|
|
|
|
Also note that the Source may contain both a PublicId entry, and an
|
|
Encoding entry. To get at these, examine $options->{Source} as passed
|
|
to your method.
|
|
|
|
(*) A CharacterStream is a filehandle that does not need any encoding
|
|
translation done on it. This is implemented as a regular filehandle
|
|
and only works under Perl 5.7.2 or higher using PerlIO. To get a single
|
|
character, or number of characters from it, use the perl core read()
|
|
function. To get a single byte from it (or number of bytes), you can
|
|
use sysread(). The encoding of the stream should be in the Encoding
|
|
entry for the Source.
|
|
|
|
=item * parse_file, parse_uri, parse_string
|
|
|
|
These are all convenience variations on parse(), and in fact simply
|
|
set up the options before calling it. You probably don't need to
|
|
override these.
|
|
|
|
=item * get_options
|
|
|
|
This is a convenience method to get options in SAX2 style, or more
|
|
generically either as hashes or as hashrefs (it returns a hashref).
|
|
You will probably want to use this method in your own implementations
|
|
of parse() and of new().
|
|
|
|
=item * get_feature, set_feature
|
|
|
|
These simply get and set features, and throw the
|
|
appropriate exceptions defined in the specification if need be.
|
|
|
|
If your subclass defines features not defined in this one,
|
|
then you should override these methods in such a way that they check for
|
|
your features first, and then call the base class's methods
|
|
for features not defined by your class. An example would be:
|
|
|
|
sub get_feature {
|
|
my $self = shift;
|
|
my $feat = shift;
|
|
if (exists $MY_FEATURES{$feat}) {
|
|
# handle the feature in various ways
|
|
}
|
|
else {
|
|
return $self->SUPER::get_feature($feat);
|
|
}
|
|
}
|
|
|
|
Currently this part is unimplemented.
|
|
|
|
|
|
=item * set_handler
|
|
|
|
This method takes a handler type (Handler, ContentHandler, etc.) and a
|
|
handler object as arguments, and changes the current handler for that
|
|
handler type, while taking care of resetting the internal state that
|
|
needs to be reset. This allows one to change a handler during parse
|
|
without running into problems (changing it on the parser object
|
|
directly will most likely cause trouble).
|
|
|
|
=item * set_document_handler, set_content_handler, set_dtd_handler, set_lexical_handler, set_decl_handler, set_error_handler, set_entity_resolver
|
|
|
|
These are just simple wrappers around the former method, and take a
|
|
handler object as their argument. Internally they simply call
|
|
set_handler with the correct arguments.
|
|
|
|
=item * get_handler
|
|
|
|
The inverse of set_handler, this method takes a an optional string containing a handler type (DTDHandler,
|
|
ContentHandler, etc. 'Handler' is used if no type is passed). It returns a reference to the object that implements
|
|
that class, or undef if that handler type is not set for the current driver/filter.
|
|
|
|
=item * get_document_handler, get_content_handler, get_dtd_handler, get_lexical_handler, get_decl_handler,
|
|
get_error_handler, get_entity_resolver
|
|
|
|
These are just simple wrappers around the get_handler() method, and take no arguments. Internally
|
|
they simply call get_handler with the correct handler type name.
|
|
|
|
=back
|
|
|
|
It would be rather useless to describe all the methods that this
|
|
module implements here. They are all the methods supported in SAX1 and
|
|
SAX2. In case your memory is a little short, here is a list. The
|
|
apparent duplicates are there so that both versions of SAX can be
|
|
supported.
|
|
|
|
=over 4
|
|
|
|
=item * start_document
|
|
|
|
=item * end_document
|
|
|
|
=item * start_element
|
|
|
|
=item * start_document
|
|
|
|
=item * end_document
|
|
|
|
=item * start_element
|
|
|
|
=item * end_element
|
|
|
|
=item * characters
|
|
|
|
=item * processing_instruction
|
|
|
|
=item * ignorable_whitespace
|
|
|
|
=item * set_document_locator
|
|
|
|
=item * start_prefix_mapping
|
|
|
|
=item * end_prefix_mapping
|
|
|
|
=item * skipped_entity
|
|
|
|
=item * start_cdata
|
|
|
|
=item * end_cdata
|
|
|
|
=item * comment
|
|
|
|
=item * entity_reference
|
|
|
|
=item * notation_decl
|
|
|
|
=item * unparsed_entity_decl
|
|
|
|
=item * element_decl
|
|
|
|
=item * attlist_decl
|
|
|
|
=item * doctype_decl
|
|
|
|
=item * xml_decl
|
|
|
|
=item * entity_decl
|
|
|
|
=item * attribute_decl
|
|
|
|
=item * internal_entity_decl
|
|
|
|
=item * external_entity_decl
|
|
|
|
=item * resolve_entity
|
|
|
|
=item * start_dtd
|
|
|
|
=item * end_dtd
|
|
|
|
=item * start_entity
|
|
|
|
=item * end_entity
|
|
|
|
=item * warning
|
|
|
|
=item * error
|
|
|
|
=item * fatal_error
|
|
|
|
=back
|
|
|
|
=head1 TODO
|
|
|
|
- more tests
|
|
- conform to the "SAX Filters" and "Java and DOM compatibility"
|
|
sections of the SAX2 document.
|
|
|
|
=head1 AUTHOR
|
|
|
|
Kip Hampton (khampton@totalcinema.com) did most of the work, after porting
|
|
it from XML::Filter::Base.
|
|
|
|
Robin Berjon (robin@knowscape.com) pitched in with patches to make it
|
|
usable as a base for drivers as well as filters, along with other patches.
|
|
|
|
Matt Sergeant (matt@sergeant.org) wrote the original XML::Filter::Base,
|
|
and patched a few things here and there, and imported it into
|
|
the XML::SAX distribution.
|
|
|
|
=head1 SEE ALSO
|
|
|
|
L<XML::SAX>
|
|
|
|
=cut
|
|
|
|
FOOTER
|
|
|
|
|
|
return $code;
|
|
}
|
|
|
|
|
|
sub write_xml_sax_base {
|
|
confirm_forced_update();
|
|
|
|
my $path = File::Spec->catfile("lib", "XML", "SAX", "Base.pm");
|
|
save_original_xml_sax_base($path);
|
|
|
|
my $code = build_xml_sax_base();
|
|
$code = add_version_stanzas($code);
|
|
|
|
open my $fh, ">", $path or die "Cannot write $path: $!";
|
|
print $fh $code;
|
|
close $fh or die "Error writing $path: $!";
|
|
print "Wrote $path\n";
|
|
}
|
|
|
|
|
|
sub confirm_forced_update {
|
|
return if grep { $_ eq '--force' } @ARGV;
|
|
|
|
print <<'EOF';
|
|
*** WARNING ***
|
|
|
|
The BuildSAXBase.pl script is used to generate the lib/XML/SAX/Base.pm file.
|
|
However a pre-generated version of Base.pm is included in the distribution
|
|
so you do not need to run this script unless you intend to modify the code.
|
|
|
|
You must use the --force option to deliberately overwrite the distributed
|
|
version of lib/XML/SAX/Base.pm
|
|
|
|
EOF
|
|
|
|
exit;
|
|
}
|
|
|
|
|
|
sub save_original_xml_sax_base {
|
|
my($path) = @_;
|
|
|
|
return unless -e $path;
|
|
(my $save_path = $path) =~ s{Base}{Base-orig};
|
|
return if -e $save_path;
|
|
print "Saving $path to $save_path\n";
|
|
rename($path, $save_path);
|
|
}
|
|
|
|
|
|
sub add_version_stanzas {
|
|
my($code) = @_;
|
|
|
|
my $version = get_xml_sax_base_version();
|
|
$code =~ s<^(package\s+(\w[:\w]+).*?\n)>
|
|
<${1}BEGIN {\n \$${2}::VERSION = '$version';\n}\n>mg;
|
|
return $code;
|
|
}
|
|
|
|
|
|
sub get_xml_sax_base_version {
|
|
open my $fh, '<', 'dist.ini' or die "open(<dist.ini): $!";
|
|
while(<$fh>) {
|
|
m{^\s*version\s*=\s*(\S+)} && return $1;
|
|
}
|
|
die "Failed to find version in dist.ini";
|
|
}
|
|
|