Import Upstream version 2.46
This commit is contained in:
commit
062d493759
|
@ -0,0 +1,544 @@
|
|||
Revision history for Perl extension XML::Parser.
|
||||
|
||||
2.46 2019-09-24 (by Todd Rinaldo)
|
||||
- use foreach not for for loops
|
||||
- produce README.md so travis will show up on github
|
||||
- remove use vars and switch to our.
|
||||
- travis-ci testing from 5.8..5.28
|
||||
- Convert XML::Parser to use 3 arg opens with no barewords.
|
||||
- Migrate tracker to github
|
||||
- Switch to XSLoader
|
||||
- Fix a buffer overwrite in parse_stream()
|
||||
|
||||
2.44 2015-01-12 (by Todd Rinaldo)
|
||||
- RT 99098 - Revert "Add more useful error message on parse to Expat". It breaks
|
||||
XML::Twig. Calling code will need to do this if it's needed.
|
||||
- RT 100959 - Add use FileHandle to t/astress.t - Make perl 5.10.0 happy.
|
||||
|
||||
2.43 2014-12-11 (by Todd Rinaldo)
|
||||
- POD patch to man from Debian via Nicholas Bamber
|
||||
- POD patch from Debian via gregor herrmann.
|
||||
- Add more useful error message on parse to Expat
|
||||
- Fix LWP dependency to be LWP::Useragent
|
||||
- Bump to 2.43 for overdue release to CPAN.
|
||||
|
||||
2.42_01 2013-07-12 (by Todd Rinaldo)
|
||||
- Added instructions to README for OSX
|
||||
- XS changes: stop using SvPV(string, PL_na)
|
||||
- Fix documentation typos
|
||||
|
||||
2.41 2011-06-01 (by Todd Rinaldo)
|
||||
- Tests are cleaned. promoting to stable. No changes since 2.40_02
|
||||
|
||||
2.40_02 2011-05-31 (by Todd Rinaldo)
|
||||
- TODO some tests which fail in Free BSD due to improper expat CVE patch
|
||||
http://www.freebsd.org/cgi/query-pr.cgi?pr=157469
|
||||
|
||||
2.40_01 2011-05-24 (by Todd Rinaldo)
|
||||
- better installation instructions
|
||||
- Small spelling patches from Debian package - Thanks Nicholas Bamber
|
||||
- RT 68399 - Upgrade Devel::CheckLib to 0.93 to make it
|
||||
perl 5.14 compliant - qw()
|
||||
- RT 67207 - Stop doing tied on globs - Thanks sprout
|
||||
- RT 31319 - Fix doc links in POD for XML/Parser.pm
|
||||
|
||||
2.40 2010-09-16 (by Alexandr Ciornii)
|
||||
- Add windows-1251.enc, ibm866.enc, koi8-r.enc (Russian)
|
||||
- Add windows-1255.enc (Hebrew)
|
||||
- Update iso-8859-7.enc (RT#40712)
|
||||
- Use Devel::CheckLib
|
||||
- Better description of expat packages
|
||||
- Better Perl style in both code and docs
|
||||
|
||||
2.36
|
||||
- Fix for Carp::Heavy bugs
|
||||
|
||||
2.35 (mostly by Alexandr Ciornii)
|
||||
- Works in 5.10 (Andreas J. Koenig)
|
||||
- Added license in Makefile.PL (Alexandr Ciornii)
|
||||
- Makefile.PL also searches for expat in C:/lib/Expat-2.0.0 (Alexandr Ciornii)
|
||||
- No longer uses variable named 'namespace' in Expat.xs (Jeff Hunter)
|
||||
|
||||
2.33
|
||||
- Fixed Tree style (grantm)
|
||||
- Fixed some non-utf8 stuff in DTDs (patch in XML::DOM tarball)
|
||||
|
||||
2.32
|
||||
- Memory leak fix (Juerd Waalboer).
|
||||
- Added windows-1252 encoding
|
||||
- Styles moved to separate .pm files to make loading faster and
|
||||
ease maintainence
|
||||
- Don't load IO::Handle unless we really need to
|
||||
|
||||
2.31 Tue Apr 2 13:39:51 EST 2002
|
||||
- Ilya Zakharevich <ilya@math.ohio-state.edu> and
|
||||
Dave Mitchell <davem@fdgroup.com> both provided patches to
|
||||
fix problems module had with 5.8.0
|
||||
- Dave Mitchell also made some UTF-8 related fixes to the test suite.
|
||||
2.30 Thu Oct 5 12:47:36 EDT 2000
|
||||
- Get rid of ContentStash global. Not that big a deal looking it up
|
||||
everytime and gets rid of a potential threading problem.
|
||||
- Switch to shareable library version of expat from sourceforge
|
||||
(i.e. no longer include expat source and require that libexpat
|
||||
be installed)
|
||||
- Bob Tribit <btribit@traffic.com> demonstrated a fix for problems
|
||||
in compiling under perl 5.6.0 with 5.005 threading.
|
||||
- Matt Sergeant <matt@sergeant.org> discovered a typo ('IO::Handler'
|
||||
instead of 'IO::Handle') in Expat.pm that caused IO::Handle objects
|
||||
to be treated as strings instead of handles.
|
||||
- Matt Sergeant also provided a patch to allow tied handles to work
|
||||
properly in calls to parse.
|
||||
- Eric Bohlman <ebohlman@netcom.com> reported a failure when
|
||||
incremental parsing and external parsing were used together.
|
||||
Need to give explicit package when calling Do_External_Parse
|
||||
from externalEntityRef otherwise fails when called through ExpatNB.
|
||||
2.29 Sun May 21 21:19:45 EDT 2000
|
||||
- In expat, notation declaration handler registration wasn't
|
||||
surviving through external entity references.
|
||||
- Chase Tingley <tingley@sundell.net> discovered that text
|
||||
accumulation in the Stream style wasn't working across processing
|
||||
instructions and recommended the appropriate fix.
|
||||
- Jochen Wiedmann <jochen.wiedmann@softwareag.com>, noted that
|
||||
you couldn't use ExpatNB directly because it wasn't setting
|
||||
the protective _State_ variable. Now doing this in the
|
||||
parse_more method of ExpatNB.
|
||||
- At the suggestion of Grant Hopwood <hopwoodg@valero.com>, now
|
||||
calling the env_proxy method on the LWP::UserAgent in the LWP
|
||||
external entity handler when it's created to set any proxies
|
||||
from environment variables.
|
||||
- Grant McLean, Matt Sergeant (& others I may have missed) noted that
|
||||
loading the LWP & URI modules slowed startup of the module, even
|
||||
if the application didn't need it. The default LWP handler is now
|
||||
dynamicly loaded (along with LWP & URI modules) the first time an
|
||||
external entity is referenced. Also provided a NoLWP option to
|
||||
XML::Parser that forces the file based external entity handler.
|
||||
- Fixed allocation errors in element declaration patches in expat
|
||||
- The Expat base method now works, even before expat starts parsing.
|
||||
- Changed the canonical script to take an optional file argument.
|
||||
- Enno Derksen <enno@att.com> reported that the attlist handler
|
||||
was not returning NOTATION type attlist information.
|
||||
- Michel Rodriguez <mrodrigu@ieee.org>, noted that the constructor
|
||||
for XML::Parser objects no longer checked for the existence of
|
||||
applications installed external entity handlers before installing
|
||||
the default ones.
|
||||
- Burkhard Meier <burkhard.meier@ixos.de> sent in a fix for
|
||||
compiler directives in Expat/Makefile.PL for Win32 machines.
|
||||
A change in 5.6.0 caused the old conditional to fail.
|
||||
- Forgot to document changes to the Entity declaration handler:
|
||||
there is an additional "IsParam" argument that indicates whether
|
||||
or not the entity is a parameter entity. This information is
|
||||
no longer passed on in the name.
|
||||
- Ben Low <ben@snrc.uow.edu.au> reported an undefined macro with
|
||||
version 5.004_04.
|
||||
2.28 Mon Mar 27 21:21:50 EST 2000
|
||||
- Junked local (Expat.xs) declaration parsing and patched expat to
|
||||
handle XML declarations, element declarations, attlist declarations,
|
||||
and all entity declarations. By eliminating both shadow buffers and
|
||||
local declaration parsing in Expat.xs, I've eliminated the two most
|
||||
common sources of serious bugs in the expat interface.
|
||||
o thus fixed the segfault and parse position bugs reported by
|
||||
Ivan Kurmanov <iku@fnmail.com>
|
||||
o and the doctype bug reported by Kevin Lund
|
||||
<Kevin.Lund@westgroup.com>
|
||||
o The element declaration handler no longer receives a string,
|
||||
but an XML::Parser::ContentModel object that represents the
|
||||
parsed model, but still looks like a string if referred to as
|
||||
a string. This class is documented in the XML::Parser::Expat
|
||||
pod under "XML::Parser::ContentModel Methods".
|
||||
o The doctype declaration handler no longer receives the internal
|
||||
subset as a string, but in its place a true or undef value
|
||||
indicating whether or not there is an internal subset. Also,
|
||||
it's called prior to processing either the internal or external
|
||||
DTD subset (as suggested by Enno Derksen <enno@att.com>.)
|
||||
o There is a new DoctypeFin handler that's called after finishing
|
||||
parsing all of the DOCTYPE declaration, including any internal
|
||||
or external DTD declarations.
|
||||
o One bit of lossage is that recognized_string, original_string,
|
||||
and default_current no longer work inside declaration handlers.
|
||||
- Added a handler that gets called after parsing external entities:
|
||||
ExternEntFin. Suggested by Jeff Horner <jhorner@netcentral.net>.
|
||||
- parsefile, file_ext_ent_handler, & lwp_ext_ent_handler now all
|
||||
set the base path. This problem has been raised more than once
|
||||
and I'm not sure to whom credit should be given.
|
||||
- The file_ext_ent_handler now opens a file handle instead of
|
||||
reading the entire entity at once.
|
||||
- Merged patches supplied by Larry Wall to (for perl 5.6 and beyond)
|
||||
tag generated strings as UTF-8, where appropriate.
|
||||
- Fixed a bug in xml_escape reported by Jerry Geiger <jgeiger@rios.de>.
|
||||
It failed when requesting escaping of perl regex meta-characters.
|
||||
- Laurent Caprani <caprani@pop.multimania.com> reported a bug in the
|
||||
Proc handler for the Debug style.
|
||||
- <chocolateboy@usa.net> sent in a patch for the element index
|
||||
mechanism. I was popping the stack too soon in the endElement fcn.
|
||||
- Jim Miner <jfm@winternet.com> sent in a patch to fix a warning in
|
||||
Expat.pm.
|
||||
- Kurt Starsinic pointed out that the eval used to check for string
|
||||
versus IO handle was leaving $@ dirty, thereby foiling higher
|
||||
level exception handlers
|
||||
- An expat question by Paul Prescod <paul@prescod.net> helped me
|
||||
see that exeptions in the parse call bypass the Expat release method,
|
||||
causing memory leaks.
|
||||
- Mark D. Anderson <mda@discerning.com> noted that calling
|
||||
recognized_string from the Final method caused a dump. There are
|
||||
a bunch of methods that should not be called after parsing has
|
||||
finished. These now have protective if statements around them.
|
||||
- Updated canonical utility to conform to newer version of Canonical
|
||||
XML working draft.
|
||||
2.27 Sat Sep 25 18:26:44 EDT 1999
|
||||
- Corrected documentation in Parser.pm
|
||||
- Deal with XML_NS and XML_BYTE_ORDER macros in Expat/Makefile.PL
|
||||
- Chris Thorman <chris@thorman.com> noted that "require 'URI::URL.pm'"
|
||||
in Parser.pm was in error (should be "require 'URI/URL.pm'")
|
||||
- Andrew McNaughton <andrew@scoop.co.nz> noted "use English" and
|
||||
use of '$&' slowed down regex handling for whole application, so
|
||||
they were excised from XML::Parser::Expat.
|
||||
- Work around "modification of read-only value" bug in perl 5.004
|
||||
- Enno Derksen <enno@att.com> reported that the Doctype handler
|
||||
wasn't being called when ParseParamEnt was set.
|
||||
- Now using Version 19990728 of expat, with local patches.
|
||||
- Got rid of shadow buffer
|
||||
o thus fixed the error reported by Ashley Sanders
|
||||
<a.sanders@mcc.ac.uk>
|
||||
o and removed ExpatNB limitations that Peter Billam
|
||||
<music@pjb.com.au> noted.
|
||||
- Vadim Konovalov <vkonovalov@lucent.com> had a problem compiling
|
||||
for multi-threading that was fixed by changing Perl_sv_setsv to
|
||||
sv_setsv.
|
||||
- Added new Expat method: skip_until(index)
|
||||
- Backward incompatible change to method xml_escape: to get former
|
||||
behavior use $xp->xml_escape($string, '>', ...)
|
||||
- Added utility, canonical, to samples
|
||||
2.26 Sun Jul 25 19:06:41 EDT 1999
|
||||
- Ken Beesley <ken.beesley@xrce.xerox.com> discovered that
|
||||
declarations in the external subset are not sent to registered
|
||||
handlers when there is no internal subset.
|
||||
- Fixed parse_dtd to work when entity values or attribute defaults
|
||||
are so large that they might be broken across multiple calls to
|
||||
the default handler.
|
||||
- For lwp_ext_ent_handler, use URI::URL instead of URI so that old
|
||||
5.004 installations will work with it.
|
||||
2.25 Fri Jul 23 06:23:43 EDT 1999
|
||||
- Now using Version 1990709 of expat. No local patches.
|
||||
- Numerous people reported a SEGV problem when running t/cdata
|
||||
on various platforms and versions of perl. The problem was
|
||||
introduced with the setHandlers change. In some cases an
|
||||
un-initialized value was being returned.
|
||||
- Added an additional external entity handler, lwp_ext_ent_handler,
|
||||
that deals with general URIs. It is installed instead of the
|
||||
"file only" handler if the LWP package is installed.
|
||||
2.24 Thu Jul 8 23:05:50 EDT 1999
|
||||
- KangChan Lee <dolphin@comeng.chungnam.ac.kr> supplied the
|
||||
EUC-KR encoding map.
|
||||
- Enno Derksen <enno@att.com> forwarded reports by Jon Eisenzopf
|
||||
<eisen@pobox.com> and Stefaan Onderbeke <onderbes@bec.bel.alcatel.be>
|
||||
about a core dump using XML::DOM. This was due to a bug in the
|
||||
prolog parsing part of XML::Parser.
|
||||
- Loic Dachary <loic@ceic.com> discovered that changing G_DISCARD to
|
||||
G_VOID introduced a small memory leak. Changed G_VOID back to
|
||||
G_DISCARD.
|
||||
- As suggested by Ben Holzman <bholzman@earthlink.net>, the
|
||||
setHandlers methods of both Parser and Expat now return lists that
|
||||
consist of type, handler pairs that correspond to the input, but
|
||||
the handlers returned are the ones that were in effect prior to
|
||||
the call.
|
||||
- Now using Version 19990626 of expat with a local patch (provided
|
||||
by James Clark.)
|
||||
- Added option ParseParamEnt. When set to a true value, parameter
|
||||
entities are parsed and the external DTD is read (unless standalone
|
||||
set to "Yes" in document).
|
||||
2.23 Mon Apr 26 21:30:28 EDT 1999
|
||||
- Fixed a bug in the ExpatNB class reported by Gabe Beged-Dov
|
||||
<begeddov@jfinity.com>. The ErrorMessage attribute wasn't
|
||||
being initialized for ExpatNB. This should have been done in
|
||||
the Expat constructor.
|
||||
- Applied patch provided by Nathan Kurz <nate@valleytel.net> to
|
||||
fix more perl stack manipulation errors in Expat.xs.
|
||||
- Applied another patch by Nathan to change perl_call_sv flag
|
||||
from G_DISCARD to G_VOID for callbacks, which helps performance.
|
||||
- Murata Makoto <murata@apsdc.ksp.fujixerox.co.jp> reported a
|
||||
problem on Win32 platforms that only showed up when UTF-16 was
|
||||
being used. The needed call to binmode was added to the parsefile
|
||||
methods.
|
||||
- Added documentation for release method that was added in release
|
||||
2.20 to Expat pod. (Point raised by <mookie@undef.com>)
|
||||
- Now using Version 19990425 of expat. No local patches.
|
||||
- Added specified_attr method and made ineffective the is_defaulted
|
||||
method.
|
||||
2.22 Sun Apr 4 11:47:25 EDT 1999
|
||||
- Loic Dachary <loic@ceic.com> reported a core dump with a small
|
||||
file with a comment that wasn't properly closed. Fixed in expat
|
||||
by updating positionPtr properly in final call of XML_Parse.
|
||||
(Reported to & acknowledged by James Clark.)
|
||||
- Made more fixes to Expat.xs position calculation.
|
||||
- Loic Dachary <loic@ceic.com> provided patches for fixing a
|
||||
memory growth problem with large documents. (Garbage collection
|
||||
wasn't happening frequently enough.)
|
||||
- As suggested by Gabe Beged-Dov <begeddov@jfinity.com>, added
|
||||
a non-blocking parse mechanism:
|
||||
- Added parse_start method to XML::Parser, which returns a
|
||||
XML::Parser::ExpatNB object.
|
||||
- Added XML::Parser::ExpatNB class, which is a subclass of
|
||||
Expat and has the additional methods parse_more & parse_done
|
||||
- Made some performance tweaks as suggested by performance thread
|
||||
on perl-xml discussion list. [With negligible results]
|
||||
- Tried to clarify Tree style structure in Parser pod
|
||||
2.21 Sun Mar 21 17:42:04 EST 1999
|
||||
- Warren Vik <wvik@whitebarn.com> provided patches for a bug
|
||||
introduced with the is_defaulted method. It manifested itself
|
||||
by bogusly reporting duplicate attributes.
|
||||
- Now using latest expat from ftp://ftp.jclark.com/pub/test/expat.zip,
|
||||
Version 19990307. (Plus any patches in Expat/expat.patches.)
|
||||
- As suggested by Tim Bray, added an xml_escape method to
|
||||
Expat.
|
||||
- Murray Nesbitt <murray@activestate.com> had build problems
|
||||
on Win32 that were solved by swapping 2 include files in
|
||||
Expat.xs
|
||||
- Added following Expat namespace methods:
|
||||
new_ns_prefixes
|
||||
expand_ns_prefix
|
||||
current_ns_prefixes
|
||||
- Fixed memory handling in recognized_string method to get rid
|
||||
of "Attempt to free unreferenced scalar" bug.
|
||||
2.20 Sun Feb 28 15:35:52 EST 1999
|
||||
- Fixed miscellaneous bugs in xmlfilter.
|
||||
- In the default external entity handler, prepend the base only
|
||||
for relative URLs.
|
||||
- Chris Nandor <pudge@pobox.com> provided patches for building
|
||||
on Macintosh.
|
||||
- As suggested by Matt Sergeant <Matthew.Sergeant@eml.ericsson.se>,
|
||||
added the finish method to Expat.
|
||||
- Matt also provided a fix to a bug he discovered in the Streams
|
||||
style.
|
||||
- Fixed a parse position bug reported by Enno Derksen <enno@att.com>
|
||||
that was affecting both original_string and position_in_context.
|
||||
- Fixed a gross memory leak reported by David Megginson,
|
||||
<david@megginson.com>: there was a circular reference to the Expat
|
||||
object and the internal end handler for context was not freeing
|
||||
element names after they were removed from the context stack.
|
||||
- Now using expat Version 19990109
|
||||
(Plus any patches in Expat/expat.patches)
|
||||
- Added is_defaulted method to Expat to tell if an attribute
|
||||
was defaulted. (Requested by Enno Derksen for XML::DOM.)
|
||||
- Matt Sergeant <Matthew.Sergeant@eml.ericcson.se> reported that
|
||||
the XML::Parser parse methods weren't propagating array context
|
||||
to the Final handler. Now they are.
|
||||
- Fixed more memory leaks (again reported by David Megginson).
|
||||
The SVs pointing to the handlers weren't being reclaimed when
|
||||
the callback vector was freed.
|
||||
- Added the element_index method to Expat.
|
||||
2.19 Sun Jan 3 11:23:45 EST 1999
|
||||
- When the recognized string is long enough, expat uses multiple
|
||||
calls to reportDefault. Fixed recString handler in Expat.xs to
|
||||
deal with this properly.
|
||||
- Added original_string method to Expat. This returns the untranslated
|
||||
string (i.e. original encoding) that caused current event.
|
||||
- Alberto Accomazzi <alberto@cfa0.harvard.edu> sent in more patches
|
||||
for perl5.005_54 incompatibilities.
|
||||
- Alberto also fingered a nasty memory bug in Expat.xs that arose
|
||||
sometimes when you registered a declaration handler but no
|
||||
default handler. It would give you a "Not a CODE reference"
|
||||
error in a place that wasn't using any CODE references.
|
||||
- <schinder@pobox.com> reported a problem with compiling expat
|
||||
on a Sun 4 due to non-exsitance of memmove on that OS. Provided
|
||||
a workaround in Makefile.PL
|
||||
- Now using expat Version 19981231 from James Clark's test directory.
|
||||
- Made patch to this version in order to support original_string
|
||||
(see Expat/expat.patches.)
|
||||
- Added CdataStart and CdataEnd handlers to expat.
|
||||
2.18 Sun Dec 27 07:39:23 EST 1998
|
||||
- Alberto Accomazzi <alberto@cfa0.harvard.edu> pointed out that
|
||||
the DESTROY sub in the new XML::Parser::Encinfo package was
|
||||
pointing to the wrong package for calling FreeEncoding.
|
||||
- Tarang Kumar Patel <mombasa@ptolemy.arc.nasa.gov> reported
|
||||
the mis-declaration of an integer as unsigned in the
|
||||
convert_to_unicode function in Expat.xs.
|
||||
- Glenn R. Kronschnabl <grk@arlut.utexas.edu> reported a problem
|
||||
with ExternEnt handlers when using parsefile. Turned out to be
|
||||
an unmatched ENTER; SAVETMPS pair that screwed up the Perl stack.
|
||||
- Tom Hughes <tom@compton.demon.co.uk> reported that the fix I put
|
||||
in for the swith to PL_sv.. names failed with 5.0005_54, since
|
||||
these became real variables instead of macros. Switched to just
|
||||
checking the PATCHLEVEL macro.
|
||||
- Yoshida Masato <yoshidam@inse.co.jp> provided the EUC-JP encodings
|
||||
(the corresponding XML files are in XML::Encoding 1.01 or later.)
|
||||
- With the advice of MURATA Makoto <murata@apsdc.ksp.fujixerox.co.jp>,
|
||||
removed the Shift_JIS encoding and replaced it with 4 variations
|
||||
he provided. He also provided an explanatory message.
|
||||
- Added the recognized_string method to Expat, deprecating
|
||||
default_current.
|
||||
- Now using expat Version 19981122 from James Clark's test directory
|
||||
(this fixes another bug with external entity reference handlers)
|
||||
- Added a default external entity handler that only accesses file:
|
||||
based URLs.
|
||||
2.17 Sun Dec 13 17:39:58 EST 1998
|
||||
- Replaced uses of malloc, realloc, and free with New, Renew,
|
||||
and Safefree respectively
|
||||
- In Expat.pm, fixed methods in_element and within_element to
|
||||
work correctly with namespaces.
|
||||
- xmlfilter - Substitute quoted equivalents for special characters
|
||||
in attribute values.
|
||||
- position_in_context was off by one line when position was at
|
||||
the end of line.
|
||||
- For the context methods in Expat.pm, do the right thing when
|
||||
the context list is empty.
|
||||
- Added methods xpcroak and xpcarp to Expat.
|
||||
- Alberto Accomazzi <alberto@cfa0.harvard.edu> noted that perl
|
||||
releases 5.005_5* (the pre 5.006 development versions) won't
|
||||
accept sv_undef (and related constants) anymore and we have
|
||||
to switch to PL_sv_...
|
||||
- Alberto also reported a warning in the newer versions of
|
||||
IO::Handle about input_record_separator not being treated on
|
||||
a per-handle basis.
|
||||
- Fixed bug that Jon Udell <udell@top.monad.net> reported in
|
||||
Stream style: Text handler most of the time didn't see proper
|
||||
context.
|
||||
- Added XML::Parser::Expat::load_encoding function and support
|
||||
for external encodings.
|
||||
2.16 Tue Oct 27 22:27:33 EST 1998
|
||||
- Fixed bug reported by Enno Derksen <enno@att.com>:
|
||||
Now treats parameter entity declarations correctly. The entity
|
||||
handler sees the name beginning with '%' if it's a parameter
|
||||
entity declaration.
|
||||
- Nigel Hutchison <nwoh@software-ag.de> pointed out that stream.t
|
||||
wasn't portable off Unix systems. Replaced with portable version.
|
||||
- Fixed bug reported by Enno Derksen <enno@att.com>:
|
||||
XML Declaration was firing off both XMLDecl handler *and* Default
|
||||
handler.
|
||||
- Added option NoExpand to Expat to turn off expansion of entity
|
||||
references when a default handler is set.
|
||||
2.15 Tue Oct 20 14:50:11 EDT 1998
|
||||
- In Expat's parse method, account for undefined previous
|
||||
record separators.
|
||||
- Simplify a couple of Expat methods.
|
||||
- Re-ordered Changes entries to put latest changes first.
|
||||
- In XML::Parser::new, set Handlers if not already set
|
||||
- New Handler (XMLDecl) for handling XML declarations
|
||||
- New Handler (Doctype) for handling DOCTYPE declarations
|
||||
- New Handler (Entity) for handling ENTITY declarations in
|
||||
the internal subset.
|
||||
- New Handler (Element) for handling ELEMENT declarations in
|
||||
the internal subset.
|
||||
- New Handler (Attlist) for handling ATTLIST declarations in
|
||||
the internal subset.
|
||||
- Documented new handlers
|
||||
- Added t/decl.t to test new handlers
|
||||
2.14 Sun Oct 11 22:17:15 EDT 1998
|
||||
- Always use method calls for streams.
|
||||
- Use perl's input_record_separator to find delimiter (i.e. each
|
||||
"line" is an entire XML doc with delimiter appended)
|
||||
- Deal with line being longer than buffer.
|
||||
2.13 Thu Oct 8 16:58:39 EDT 1998
|
||||
- Fixed a major oops in Expat.xs where I was trying to decrement
|
||||
a refcnt on an unallocated SV, leading to a segment violation.
|
||||
(Why did this show up on HPUX but not Linux?)
|
||||
2.12 Thu Oct 8 00:05:10 EDT 1998
|
||||
- Incorporated fix to t/astress.t from <fletch@phydeaux.org> (Mike
|
||||
Fletcher).
|
||||
- Change to xmlstats from <dblack@candle.superlink.net> (David
|
||||
Alan Black)
|
||||
- Access Handlers_Setters in Expat and Handler_Types in Parser
|
||||
through object reference (following admonition in perltoot
|
||||
about class data.)
|
||||
- Added Stream_Delimiter option to Expat.
|
||||
- In the parse_stream function in Expat.xs, if we either have a
|
||||
Stream_Delimiter or if there's no file descriptor, use method
|
||||
calls instead. For Stream_Delimiter in particular, the function
|
||||
now uses the getline method so it can check for the delimiter
|
||||
without consuming stuff past the delimiter from the stream.
|
||||
2.11 Sun Oct 4 22:15:53 EDT 1998
|
||||
- Swapped out local patch for expat and swapped in James Clark's
|
||||
patch.
|
||||
- Pass on all Parser attributes (other than those excluded by
|
||||
Non_Expat_Options) to the instance of Expat created at parse time.
|
||||
- New method for Expat: generate_ns_name
|
||||
- Split test.pl into t/*.t and change Makefile.PL so we don't do a
|
||||
useless descent into Expat subdir for testing.
|
||||
- Stop the numeric warning for eq_name and namespace method.
|
||||
2.10 Fri Sep 25 18:36:46 EDT 1998
|
||||
- Uses expat Version 19980924
|
||||
(with local patch - see Expat/expat/xmlparse/xmlparse.c.diff)
|
||||
- Use newSVpvn when PERL_VERSION >= 5.005
|
||||
- Completed xmlfilter
|
||||
- Added support for namespace processing:
|
||||
o Namespaces option to XML::Parser and XML::Parser::Expat
|
||||
o Two new methods in Expat:
|
||||
namespace - to return namespace associated with name
|
||||
eq_name - compare 2 names for equality across namespaces.
|
||||
- Use expat's new SetDefaultHandlerExpand instead of SetDefaultHandler
|
||||
so that entity expansion may continue even if the default handler
|
||||
is set.
|
||||
- Moved test.pl back up main level and changed to work with XML::Parser
|
||||
- Added tests for namespaces
|
||||
2.09 Fri Sep 18 10:33:38 EDT 1998
|
||||
- Fixed errors that caused -w to fret in XML::Parser.
|
||||
- Fixed depth method in XML::Parser::Expat
|
||||
- There were a few places in Expat.xs where garbage strings may
|
||||
have been returned due to the expat library giving us zero-length
|
||||
strings. Fixed by using a local version of newSVpv where length
|
||||
means length, even when zero.
|
||||
- The default handler setter in Expat.xs, was inappropriately setting
|
||||
cbv->dflt_sv when there was a null handler.
|
||||
2.08 Thu Sep 17 11:47:13 EDT 1998
|
||||
- Make XML::Parser higher-level re-usable parser objects. Old object
|
||||
now becomes XML::Parser::Expat.
|
||||
- The XML::Parser object now supports the style mechanism very close
|
||||
to that in the 1.0 version.
|
||||
2.07 Wed Sep 9 11:03:43 EDT 1998
|
||||
- Added some samples (xmlcomments & xmlstats)
|
||||
- Now requires 5.004 (due to sv_catpvf)
|
||||
- Changed Makefile.PL to allow automatic manification
|
||||
- Added a test that reads xml spec (to check buffer boundary errors)
|
||||
2.06 Tue Sep 1 10:40:41 EDT 1998
|
||||
- Fixed the methods current_line, current_byte, and current_column
|
||||
- Added some tests
|
||||
2.05 Mon Aug 31 15:29:42 EDT 1998
|
||||
- Made Makefile.PL changes suggested by Murray Nesbitt
|
||||
<murray@ActiveState.com> to support building on Win32
|
||||
and for making PPM binaries.
|
||||
- Added method parse
|
||||
- Changed parsestring and parsefile to use new parse method
|
||||
- Deprecated parsestring method
|
||||
- Improved error handling in the ExternEnt handler
|
||||
2.04 Wed Aug 26 13:25:01 EDT 1998
|
||||
- Uses expat Version 1.0 of August 14, 1998
|
||||
- Some document changes
|
||||
- Changed dist section in Makefile.PL
|
||||
- Added ExternEnt handler
|
||||
- Added tests for ExternEnt
|
||||
2.03 Fri Aug 21 17:19:26 EDT 1998
|
||||
- Changed InitEncoding to ProtocolEncoding. Default to none.
|
||||
Pass null string to expat's ParserCreate when there is no
|
||||
ProtocolEncoding.
|
||||
- Fixed bug in parsefile & parsestring where they were referring
|
||||
to an ErrorContext *method* instead of a field.
|
||||
- Fixed position_in_context bugs:
|
||||
-- 'last' in do {} while ();
|
||||
-- insert newline before pointer when no following newline
|
||||
in buffer.
|
||||
- Added some additional tests
|
||||
2.02 Thu Aug 20 14:05:08 EDT 1998
|
||||
- Fixed parsefile problem reported by
|
||||
"Robert Hanson" <robertha@zenweb.com>, using a modification of
|
||||
his suggested fix.
|
||||
- Responded to problem reported by
|
||||
Bart Schuller <schuller+perl-xml@lunatech.com>
|
||||
by pre-expanding parts of the XML_UPD macro to avoid confusing
|
||||
some versions of gcc.
|
||||
- Changed the constructor to take the option InitEncoding, which
|
||||
gets passed to the ParserCreate call. When not given, defaults
|
||||
to UTF-8.
|
||||
- Added method position_in_context
|
||||
- Added Constructor option ErrorContext and added reporting of
|
||||
errors in context.
|
||||
2.01 Wed Aug 19 11:42:42 EDT 1998
|
||||
- Added methods:
|
||||
default_current, base, current_line, current_column,
|
||||
current_byte, context
|
||||
- Added some tests
|
||||
- parsestring and parsefile now croak if they're re-used
|
||||
- Filled in some documentation
|
||||
2.00 Mon Aug 17 12:01:33 EDT 1998
|
||||
- repackaged with James Clark's most recent expat
|
||||
- changed to an API closer to expat
|
||||
1.00 March 1998
|
||||
- Larry Wall's original version
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,29 @@
|
|||
use ExtUtils::MakeMaker;
|
||||
use Config;
|
||||
use English;
|
||||
|
||||
my $libs = "-lexpat";
|
||||
my @extras = ();
|
||||
|
||||
push(@extras, INC => "-I$expat_incpath") if $expat_incpath;
|
||||
|
||||
$libs = "-L$expat_libpath $libs" if $expat_libpath;
|
||||
|
||||
push(@extras, CAPI => 'TRUE')
|
||||
if (($PERL_VERSION >= 5.005) and ($OSNAME eq 'MSWin32')
|
||||
and ($Config{archname} =~ /-object\b/i));
|
||||
|
||||
push(@extras,
|
||||
ABSTRACT => "Lowlevel access to James Clark's expat XML parser",
|
||||
AUTHOR => 'Matt Sergeant (matt@sergeant.org)')
|
||||
if ($ExtUtils::MakeMaker::VERSION >= 5.4301);
|
||||
|
||||
WriteMakefile(
|
||||
NAME => 'XML::Parser::Expat',
|
||||
C => ['Expat.c'],
|
||||
LIBS => $libs,
|
||||
XSPROTOARG => '-noprototypes',
|
||||
VERSION_FROM => 'Expat.pm',
|
||||
@extras
|
||||
);
|
||||
|
|
@ -0,0 +1,91 @@
|
|||
/*****************************************************************
|
||||
** encoding.h
|
||||
**
|
||||
** Copyright 1998 Clark Cooper
|
||||
** All rights reserved.
|
||||
**
|
||||
** This program is free software; you can redistribute it and/or
|
||||
** modify it under the same terms as Perl itself.
|
||||
*/
|
||||
|
||||
#ifndef ENCODING_H
|
||||
#define ENCODING_H 1
|
||||
|
||||
#define ENCMAP_MAGIC 0xfeebface
|
||||
|
||||
typedef struct prefixmap {
|
||||
unsigned char min;
|
||||
unsigned char len; /* 0 => 256 */
|
||||
unsigned short bmap_start;
|
||||
unsigned char ispfx[32];
|
||||
unsigned char ischar[32];
|
||||
} PrefixMap;
|
||||
|
||||
typedef struct encinf
|
||||
{
|
||||
unsigned short prefixes_size;
|
||||
unsigned short bytemap_size;
|
||||
int firstmap[256];
|
||||
PrefixMap *prefixes;
|
||||
unsigned short *bytemap;
|
||||
} Encinfo;
|
||||
|
||||
typedef struct encmaphdr
|
||||
{
|
||||
unsigned int magic;
|
||||
char name[40];
|
||||
unsigned short pfsize;
|
||||
unsigned short bmsize;
|
||||
int map[256];
|
||||
} Encmap_Header;
|
||||
|
||||
/*================================================================
|
||||
** Structure of Encoding map binary encoding
|
||||
**
|
||||
** Note that all shorts and ints are in network order,
|
||||
** so when packing or unpacking with perl, use 'n' and 'N' respectively.
|
||||
** In C, use the htonl family of functions.
|
||||
**
|
||||
** The basic structure is:
|
||||
**
|
||||
** _______________________
|
||||
** |Header (including map expat needs for 1st byte)
|
||||
** |PrefixMap * pfsize
|
||||
** | This section isn't included for single-byte encodings.
|
||||
** | For multiple byte encodings, when a byte represents a prefix
|
||||
** | then it indexes into this vector instead of mapping to a
|
||||
** | Unicode character. The PrefixMap type is declared above. The
|
||||
** | ispfx and ischar fields are bitvectors indicating whether
|
||||
** | the byte being mapped is a prefix or character respectively.
|
||||
** | If neither is set, then the character is not mapped to Unicode.
|
||||
** |
|
||||
** | The min field is the 1st byte mapped for this prefix; the
|
||||
** | len field is the number of bytes mapped; and bmap_start is
|
||||
** | the starting index of the map for this prefix in the overall
|
||||
** | map (next section).
|
||||
** |unsigned short * bmsize
|
||||
** | This section also is omitted for single-byte encodings.
|
||||
** | Each short is either a Unicode scalar or an index into the
|
||||
** | PrefixMap vector.
|
||||
**
|
||||
** The header for these files is declared above as the Encmap_Header type.
|
||||
** The magic field is a magic number which should match the ENCMAP_MAGIC
|
||||
** macro above. The next 40 bytes stores IANA registered name for the
|
||||
** encoding. The pfsize field holds the number of PrefixMaps, which should
|
||||
** be zero for single byte encodings. The bmsize field holds the number of
|
||||
** shorts used for the overall map.
|
||||
**
|
||||
** The map field contains either the Unicode scalar encoded by the 1st byte
|
||||
** or -n where n is the number of bytes that such a 1st byte implies (Expat
|
||||
** requires that the number of bytes to encode a character is indicated by
|
||||
** the 1st byte) or -1 if the byte doesn't map to any Unicode character.
|
||||
**
|
||||
** If the encoding is a multiple byte encoding, then there will be PrefixMap
|
||||
** and character map sections. The 1st PrefixMap (index 0), covers a range
|
||||
** of bytes that includes all 1st byte prefixes.
|
||||
**
|
||||
** Look at convert_to_unicode in Expat.xs to see how this data structure
|
||||
** is used.
|
||||
*/
|
||||
|
||||
#endif /* ndef ENCODING_H */
|
|
@ -0,0 +1,24 @@
|
|||
#
|
||||
##### XML::Parser::Expat typemap
|
||||
#
|
||||
|
||||
XML_Parser T_PTR
|
||||
Encinfo * T_ENCOBJ
|
||||
|
||||
################################################################
|
||||
INPUT
|
||||
T_ENCOBJ
|
||||
if (sv_derived_from($arg, \"XML::Parser::Encinfo\")) {
|
||||
IV tmp = SvIV((SV*)SvRV($arg));
|
||||
$var = ($type) tmp;
|
||||
}
|
||||
else
|
||||
croak(\"$var is not of type XML::Parser::Encinfo\")
|
||||
################################################################
|
||||
OUTPUT
|
||||
T_ENCOBJ
|
||||
if ($var) {
|
||||
sv_setref_pv($arg, \"XML::Parser::Encinfo\", (void*)$var);
|
||||
}
|
||||
else
|
||||
$arg = &PL_sv_undef;
|
|
@ -0,0 +1,68 @@
|
|||
inc/Devel/CheckLib.pm
|
||||
Changes Change log
|
||||
Expat/Expat.pm XML::Parser::Expat module
|
||||
Expat/Expat.xs Extension library
|
||||
Expat/Makefile.PL perl MakeMaker script for XML::Parser::Expat
|
||||
Expat/encoding.h Header file; describes *.enc structure
|
||||
Expat/typemap XS typemap
|
||||
MANIFEST This file
|
||||
Makefile.PL perl MakeMaker script for XML::Parser
|
||||
Parser.pm XML::Parser module
|
||||
Parser/LWPExternEnt.pl LWP based external entity handler
|
||||
Parser/Encodings/Japanese_Encodings.msg Message about Japanese encodings.
|
||||
Parser/Encodings/README Info about encoding maps
|
||||
Parser/Encodings/big5.enc Big5 binary encoding map
|
||||
Parser/Encodings/euc-kr.enc EUC-KR binary encoding map
|
||||
Parser/Encodings/iso-8859-2.enc ISO-8859-2 binary encoding map
|
||||
Parser/Encodings/iso-8859-3.enc ISO-8859-3 binary encoding map
|
||||
Parser/Encodings/iso-8859-4.enc ISO-8859-4 binary encoding map
|
||||
Parser/Encodings/iso-8859-5.enc ISO-8859-5 binary encoding map
|
||||
Parser/Encodings/iso-8859-7.enc ISO-8859-7 binary encoding map
|
||||
Parser/Encodings/iso-8859-8.enc ISO-8859-8 binary encoding map
|
||||
Parser/Encodings/iso-8859-9.enc ISO-8859-9 binary encoding map
|
||||
Parser/Encodings/iso-8859-15.enc ISO-8859-15 binary encoding map
|
||||
Parser/Encodings/windows-1250.enc cp1250-WinLatin2 binary encoding map
|
||||
Parser/Encodings/windows-1251.enc cp1251-Russian binary encoding map
|
||||
Parser/Encodings/windows-1252.enc cp1252-WinLatin1 binary encoding map
|
||||
Parser/Encodings/windows-1255.enc hebrew
|
||||
Parser/Encodings/x-euc-jp-jisx0221.enc X-euc-jp-jisx0221 encoding map
|
||||
Parser/Encodings/x-euc-jp-unicode.enc X-euc-jp-unicde encoding map
|
||||
Parser/Encodings/x-sjis-cp932.enc x-sjis-cp932 encoding map
|
||||
Parser/Encodings/x-sjis-jdk117.enc x-sjis-jdk117 encoding map
|
||||
Parser/Encodings/x-sjis-jisx0221.enc x-sjis-jisx0221 encoding map
|
||||
Parser/Encodings/x-sjis-unicode.enc x-sjis-unicode encoding map
|
||||
Parser/Encodings/ibm866.enc
|
||||
Parser/Encodings/koi8-r.enc
|
||||
Parser/Style/Debug.pm Debug style parser
|
||||
Parser/Style/Objects.pm Objects style parser
|
||||
Parser/Style/Stream.pm Stream style parser
|
||||
Parser/Style/Subs.pm Subs style parser
|
||||
Parser/Style/Tree.pm Tree style parser
|
||||
README Short explanation
|
||||
samples/canonical A utility to generate canonical XML
|
||||
samples/canontst.xml An xml document to demonstrate canonical
|
||||
samples/ctest.dtd An external DTD used by canontst.xml
|
||||
samples/REC-xml-19980210.xml The XML spec in xml form
|
||||
samples/xmlcomments A utility to extract comments
|
||||
samples/xmlfilter A utility to filter elements
|
||||
samples/xmlstats A utility to report on element statistics
|
||||
t/astress.t Test script
|
||||
t/cdata.t Test script
|
||||
t/decl.t Test script
|
||||
t/defaulted.t Test script
|
||||
t/encoding.t Test script
|
||||
t/external_ent.t Test script
|
||||
t/file.t Test script
|
||||
t/file_open_scalar.t Test script
|
||||
t/finish.t Test script
|
||||
t/ext.ent External entity for parament.t test
|
||||
t/ext2.ent External entity for parament.t test
|
||||
t/foo.dtd External DTD for parament.t test
|
||||
t/namespaces.t Test script
|
||||
t/parament.t Test script
|
||||
t/partial.t Test script
|
||||
t/skip.t Test script
|
||||
t/stream.t Test script
|
||||
t/styles.t Test script
|
||||
META.yml Module YAML meta-data (added by MakeMaker)
|
||||
META.json Module JSON meta-data (added by MakeMaker)
|
|
@ -0,0 +1,57 @@
|
|||
{
|
||||
"abstract" : "A perl module for parsing XML documents",
|
||||
"author" : [
|
||||
"Clark Cooper (coopercc@netheaven.com)"
|
||||
],
|
||||
"dynamic_config" : 1,
|
||||
"generated_by" : "ExtUtils::MakeMaker version 7.34, CPAN::Meta::Converter version 2.150010",
|
||||
"license" : [
|
||||
"perl_5"
|
||||
],
|
||||
"meta-spec" : {
|
||||
"url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
|
||||
"version" : 2
|
||||
},
|
||||
"name" : "XML-Parser",
|
||||
"no_index" : {
|
||||
"directory" : [
|
||||
"t",
|
||||
"inc"
|
||||
]
|
||||
},
|
||||
"prereqs" : {
|
||||
"build" : {
|
||||
"requires" : {
|
||||
"ExtUtils::MakeMaker" : "0"
|
||||
}
|
||||
},
|
||||
"configure" : {
|
||||
"requires" : {
|
||||
"ExtUtils::MakeMaker" : "0"
|
||||
}
|
||||
},
|
||||
"runtime" : {
|
||||
"requires" : {
|
||||
"LWP::UserAgent" : "0",
|
||||
"perl" : "5.00405"
|
||||
}
|
||||
},
|
||||
"test" : {
|
||||
"requires" : {
|
||||
"Test::More" : "0",
|
||||
"warnings" : "0"
|
||||
}
|
||||
}
|
||||
},
|
||||
"release_status" : "stable",
|
||||
"resources" : {
|
||||
"bugtracker" : {
|
||||
"web" : "https://github.com/toddr/XML-Parser/issues"
|
||||
},
|
||||
"repository" : {
|
||||
"url" : "http://github.com/toddr/XML-Parser"
|
||||
}
|
||||
},
|
||||
"version" : "2.46",
|
||||
"x_serialization_backend" : "JSON::PP version 2.97001"
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
---
|
||||
abstract: 'A perl module for parsing XML documents'
|
||||
author:
|
||||
- 'Clark Cooper (coopercc@netheaven.com)'
|
||||
build_requires:
|
||||
ExtUtils::MakeMaker: '0'
|
||||
Test::More: '0'
|
||||
warnings: '0'
|
||||
configure_requires:
|
||||
ExtUtils::MakeMaker: '0'
|
||||
dynamic_config: 1
|
||||
generated_by: 'ExtUtils::MakeMaker version 7.34, CPAN::Meta::Converter version 2.150010'
|
||||
license: perl
|
||||
meta-spec:
|
||||
url: http://module-build.sourceforge.net/META-spec-v1.4.html
|
||||
version: '1.4'
|
||||
name: XML-Parser
|
||||
no_index:
|
||||
directory:
|
||||
- t
|
||||
- inc
|
||||
requires:
|
||||
LWP::UserAgent: '0'
|
||||
perl: '5.00405'
|
||||
resources:
|
||||
bugtracker: https://github.com/toddr/XML-Parser/issues
|
||||
repository: http://github.com/toddr/XML-Parser
|
||||
version: '2.46'
|
||||
x_serialization_backend: 'CPAN::Meta::YAML version 0.018'
|
|
@ -0,0 +1,159 @@
|
|||
use 5.004005; #Devel::CheckLib
|
||||
use ExtUtils::MakeMaker;
|
||||
use lib qw(inc);
|
||||
use Devel::CheckLib;
|
||||
|
||||
use Config;
|
||||
|
||||
$expat_libpath = $ENV{EXPATLIBPATH} || '';
|
||||
$expat_incpath = $ENV{EXPATINCPATH} || '';
|
||||
|
||||
my @replacement_args;
|
||||
|
||||
foreach (@ARGV) {
|
||||
if (/^EXPAT(LIB|INC)PATH=(.+)/) {
|
||||
if ( $1 eq 'LIB' ) {
|
||||
$expat_libpath = $2;
|
||||
}
|
||||
else {
|
||||
$expat_incpath = $2;
|
||||
}
|
||||
|
||||
#push(@replacement_args, "$1=$2");
|
||||
}
|
||||
else {
|
||||
push( @replacement_args, $_ );
|
||||
}
|
||||
}
|
||||
|
||||
@ARGV = @replacement_args;
|
||||
|
||||
unless (
|
||||
check_lib( # fill in what you prompted the user for here
|
||||
lib => [qw(expat)],
|
||||
header => ['expat.h'],
|
||||
incpath => $expat_incpath,
|
||||
( $expat_libpath ? ( libpath => $expat_libpath ) : () ),
|
||||
)
|
||||
) {
|
||||
warn <<'Expat_Not_Installed;';
|
||||
|
||||
Expat must be installed prior to building XML::Parser and I can't find
|
||||
it in the standard library directories. Install 'expat-devel' (or
|
||||
'libexpat1-dev') package with your OS package manager. See 'README'.
|
||||
|
||||
Or you can download expat from:
|
||||
|
||||
http://sourceforge.net/projects/expat/
|
||||
|
||||
If expat is installed, but in a non-standard directory, then use the
|
||||
following options to Makefile.PL:
|
||||
|
||||
EXPATLIBPATH=... To set the directory in which to find libexpat
|
||||
|
||||
EXPATINCPATH=... To set the directory in which to find expat.h
|
||||
|
||||
For example:
|
||||
|
||||
perl Makefile.PL EXPATLIBPATH=/home/me/lib EXPATINCPATH=/home/me/include
|
||||
|
||||
Note that if you build against a shareable library in a non-standard location
|
||||
you may (on some platforms) also have to set your LD_LIBRARY_PATH environment
|
||||
variable at run time for perl to find the library.
|
||||
|
||||
Expat_Not_Installed;
|
||||
|
||||
# exiting before Makefile generation silences CPANTesters reports
|
||||
# when expat is not available.
|
||||
exit 0;
|
||||
}
|
||||
|
||||
if ( not $expat_libpath and $] >= 5.006001 and $^O ne 'MSWin32' ) {
|
||||
require ExtUtils::Liblist; # Buggy before this
|
||||
($expat_libpath) = ExtUtils::Liblist->ext('-lexpat');
|
||||
}
|
||||
|
||||
# Don't try to descend into Expat directory for testing
|
||||
|
||||
sub MY::test {
|
||||
my $self = shift;
|
||||
|
||||
my $hold = delete $self->{DIR};
|
||||
my $ret = $self->MM::test(@_);
|
||||
$self->{DIR} = $hold if defined($hold);
|
||||
$ret;
|
||||
}
|
||||
|
||||
my @extras = ();
|
||||
|
||||
push(
|
||||
@extras,
|
||||
CAPI => 'TRUE'
|
||||
)
|
||||
if ( $PERL_VERSION >= 5.005
|
||||
and $OSNAME eq 'MSWin32'
|
||||
and $Config{archname} =~ /-object\b/i );
|
||||
|
||||
WriteMakefile1(
|
||||
ABSTRACT_FROM => 'Parser.pm',
|
||||
AUTHOR => 'Clark Cooper (coopercc@netheaven.com)',
|
||||
LICENSE => 'perl',
|
||||
MIN_PERL_VERSION => '5.00405',
|
||||
META_MERGE => {
|
||||
resources => {
|
||||
bugtracker => 'https://github.com/toddr/XML-Parser/issues',
|
||||
repository => 'http://github.com/toddr/XML-Parser',
|
||||
},
|
||||
},
|
||||
TEST_REQUIRES => {
|
||||
'Test::More' => 0,
|
||||
'warnings' => 0,
|
||||
},
|
||||
|
||||
NAME => 'XML::Parser',
|
||||
DIR => [qw(Expat)],
|
||||
dist => { COMPRESS => 'gzip', SUFFIX => '.gz' },
|
||||
VERSION_FROM => 'Parser.pm',
|
||||
PREREQ_PM => {
|
||||
'LWP::UserAgent' => 0, #for tests
|
||||
},
|
||||
$^O =~ /win/i
|
||||
? (
|
||||
dist => {
|
||||
TAR => 'ptar',
|
||||
TARFLAGS => '-c -C -f',
|
||||
},
|
||||
)
|
||||
: (),
|
||||
@extras
|
||||
);
|
||||
|
||||
sub WriteMakefile1 { #Compatibility code for old versions of EU::MM. Written by Alexandr Ciornii, version 0.23. Added by eumm-upgrade.
|
||||
my %params = @_;
|
||||
my $eumm_version = $ExtUtils::MakeMaker::VERSION;
|
||||
$eumm_version = eval $eumm_version;
|
||||
die "EXTRA_META is deprecated" if exists $params{EXTRA_META};
|
||||
die "License not specified" if not exists $params{LICENSE};
|
||||
if ( $params{AUTHOR} and ref( $params{AUTHOR} ) eq 'ARRAY' and $eumm_version < 6.5705 ) {
|
||||
$params{META_ADD}->{author} = $params{AUTHOR};
|
||||
$params{AUTHOR} = join( ', ', @{ $params{AUTHOR} } );
|
||||
}
|
||||
if ( $params{TEST_REQUIRES} and $eumm_version < 6.64 ) {
|
||||
$params{BUILD_REQUIRES} = { %{ $params{BUILD_REQUIRES} || {} }, %{ $params{TEST_REQUIRES} } };
|
||||
delete $params{TEST_REQUIRES};
|
||||
}
|
||||
if ( $params{BUILD_REQUIRES} and $eumm_version < 6.5503 ) {
|
||||
|
||||
#EUMM 6.5502 has problems with BUILD_REQUIRES
|
||||
$params{PREREQ_PM} = { %{ $params{PREREQ_PM} || {} }, %{ $params{BUILD_REQUIRES} } };
|
||||
delete $params{BUILD_REQUIRES};
|
||||
}
|
||||
delete $params{CONFIGURE_REQUIRES} if $eumm_version < 6.52;
|
||||
delete $params{MIN_PERL_VERSION} if $eumm_version < 6.48;
|
||||
delete $params{META_MERGE} if $eumm_version < 6.46;
|
||||
delete $params{META_ADD} if $eumm_version < 6.46;
|
||||
delete $params{LICENSE} if $eumm_version < 6.31;
|
||||
|
||||
WriteMakefile(%params);
|
||||
}
|
||||
|
|
@ -0,0 +1,832 @@
|
|||
# XML::Parser
|
||||
#
|
||||
# Copyright (c) 1998-2000 Larry Wall and Clark Cooper
|
||||
# All rights reserved.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or
|
||||
# modify it under the same terms as Perl itself.
|
||||
|
||||
package XML::Parser;
|
||||
|
||||
use strict;
|
||||
|
||||
our ( $VERSION, $LWP_load_failed );
|
||||
|
||||
use Carp;
|
||||
|
||||
BEGIN {
|
||||
require XML::Parser::Expat;
|
||||
$VERSION = '2.46';
|
||||
die "Parser.pm and Expat.pm versions don't match"
|
||||
unless $VERSION eq $XML::Parser::Expat::VERSION;
|
||||
}
|
||||
|
||||
$LWP_load_failed = 0;
|
||||
|
||||
sub new {
|
||||
my ( $class, %args ) = @_;
|
||||
my $style = $args{Style};
|
||||
|
||||
my $nonexopt = $args{Non_Expat_Options} ||= {};
|
||||
|
||||
$nonexopt->{Style} = 1;
|
||||
$nonexopt->{Non_Expat_Options} = 1;
|
||||
$nonexopt->{Handlers} = 1;
|
||||
$nonexopt->{_HNDL_TYPES} = 1;
|
||||
$nonexopt->{NoLWP} = 1;
|
||||
|
||||
$args{_HNDL_TYPES} = {%XML::Parser::Expat::Handler_Setters};
|
||||
$args{_HNDL_TYPES}->{Init} = 1;
|
||||
$args{_HNDL_TYPES}->{Final} = 1;
|
||||
|
||||
$args{Handlers} ||= {};
|
||||
my $handlers = $args{Handlers};
|
||||
|
||||
if ( defined($style) ) {
|
||||
my $stylepkg = $style;
|
||||
|
||||
if ( $stylepkg !~ /::/ ) {
|
||||
$stylepkg = "\u$style";
|
||||
|
||||
eval {
|
||||
my $fullpkg = "XML::Parser::Style::$stylepkg";
|
||||
my $stylefile = $fullpkg;
|
||||
$stylefile =~ s/::/\//g;
|
||||
require "$stylefile.pm";
|
||||
$stylepkg = $fullpkg;
|
||||
};
|
||||
if ($@) {
|
||||
|
||||
# fallback to old behaviour
|
||||
$stylepkg = "XML::Parser::$stylepkg";
|
||||
}
|
||||
}
|
||||
|
||||
foreach my $htype ( keys %{ $args{_HNDL_TYPES} } ) {
|
||||
|
||||
# Handlers explicitly given override
|
||||
# handlers from the Style package
|
||||
unless ( defined( $handlers->{$htype} ) ) {
|
||||
|
||||
# A handler in the style package must either have
|
||||
# exactly the right case as the type name or a
|
||||
# completely lower case version of it.
|
||||
|
||||
my $hname = "${stylepkg}::$htype";
|
||||
if ( defined(&$hname) ) {
|
||||
$handlers->{$htype} = \&$hname;
|
||||
next;
|
||||
}
|
||||
|
||||
$hname = "${stylepkg}::\L$htype";
|
||||
if ( defined(&$hname) ) {
|
||||
$handlers->{$htype} = \&$hname;
|
||||
next;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unless ( defined( $handlers->{ExternEnt} )
|
||||
or defined( $handlers->{ExternEntFin} ) ) {
|
||||
|
||||
if ( $args{NoLWP} or $LWP_load_failed ) {
|
||||
$handlers->{ExternEnt} = \&file_ext_ent_handler;
|
||||
$handlers->{ExternEntFin} = \&file_ext_ent_cleanup;
|
||||
}
|
||||
else {
|
||||
# The following just bootstraps the real LWP external entity
|
||||
# handler
|
||||
|
||||
$handlers->{ExternEnt} = \&initial_ext_ent_handler;
|
||||
|
||||
# No cleanup function available until LWPExternEnt.pl loaded
|
||||
}
|
||||
}
|
||||
|
||||
$args{Pkg} ||= caller;
|
||||
bless \%args, $class;
|
||||
} # End of new
|
||||
|
||||
sub setHandlers {
|
||||
my ( $self, @handler_pairs ) = @_;
|
||||
|
||||
croak('Uneven number of arguments to setHandlers method')
|
||||
if ( int(@handler_pairs) & 1 );
|
||||
|
||||
my @ret;
|
||||
while (@handler_pairs) {
|
||||
my $type = shift @handler_pairs;
|
||||
my $handler = shift @handler_pairs;
|
||||
unless ( defined( $self->{_HNDL_TYPES}->{$type} ) ) {
|
||||
my @types = sort keys %{ $self->{_HNDL_TYPES} };
|
||||
|
||||
croak("Unknown Parser handler type: $type\n Valid types: @types");
|
||||
}
|
||||
push( @ret, $type, $self->{Handlers}->{$type} );
|
||||
$self->{Handlers}->{$type} = $handler;
|
||||
}
|
||||
|
||||
return @ret;
|
||||
}
|
||||
|
||||
sub parse_start {
|
||||
my $self = shift;
|
||||
my @expat_options = ();
|
||||
|
||||
my ( $key, $val );
|
||||
while ( ( $key, $val ) = each %{$self} ) {
|
||||
push( @expat_options, $key, $val )
|
||||
unless exists $self->{Non_Expat_Options}->{$key};
|
||||
}
|
||||
|
||||
my %handlers = %{ $self->{Handlers} };
|
||||
my $init = delete $handlers{Init};
|
||||
my $final = delete $handlers{Final};
|
||||
|
||||
my $expatnb = XML::Parser::ExpatNB->new( @expat_options, @_ );
|
||||
$expatnb->setHandlers(%handlers);
|
||||
|
||||
&$init($expatnb)
|
||||
if defined($init);
|
||||
|
||||
$expatnb->{_State_} = 1;
|
||||
|
||||
$expatnb->{FinalHandler} = $final
|
||||
if defined($final);
|
||||
|
||||
return $expatnb;
|
||||
}
|
||||
|
||||
sub parse {
|
||||
my $self = shift;
|
||||
my $arg = shift;
|
||||
my @expat_options = ();
|
||||
my ( $key, $val );
|
||||
while ( ( $key, $val ) = each %{$self} ) {
|
||||
push( @expat_options, $key, $val )
|
||||
unless exists $self->{Non_Expat_Options}->{$key};
|
||||
}
|
||||
|
||||
my $expat = XML::Parser::Expat->new( @expat_options, @_ );
|
||||
my %handlers = %{ $self->{Handlers} };
|
||||
my $init = delete $handlers{Init};
|
||||
my $final = delete $handlers{Final};
|
||||
|
||||
$expat->setHandlers(%handlers);
|
||||
|
||||
if ( $self->{Base} ) {
|
||||
$expat->base( $self->{Base} );
|
||||
}
|
||||
|
||||
&$init($expat)
|
||||
if defined($init);
|
||||
|
||||
my @result = ();
|
||||
my $result;
|
||||
eval { $result = $expat->parse($arg); };
|
||||
my $err = $@;
|
||||
if ($err) {
|
||||
$expat->release;
|
||||
die $err;
|
||||
}
|
||||
|
||||
if ( $result and defined($final) ) {
|
||||
if (wantarray) {
|
||||
@result = &$final($expat);
|
||||
}
|
||||
else {
|
||||
$result = &$final($expat);
|
||||
}
|
||||
}
|
||||
|
||||
$expat->release;
|
||||
|
||||
return unless defined wantarray;
|
||||
return wantarray ? @result : $result;
|
||||
}
|
||||
|
||||
sub parsestring {
|
||||
my $self = shift;
|
||||
$self->parse(@_);
|
||||
}
|
||||
|
||||
sub parsefile {
|
||||
my $self = shift;
|
||||
my $file = shift;
|
||||
|
||||
open( my $fh, '<', $file ) or croak "Couldn't open $file:\n$!";
|
||||
binmode($fh);
|
||||
my @ret;
|
||||
my $ret;
|
||||
|
||||
$self->{Base} = $file;
|
||||
|
||||
if (wantarray) {
|
||||
eval { @ret = $self->parse( $fh, @_ ); };
|
||||
}
|
||||
else {
|
||||
eval { $ret = $self->parse( $fh, @_ ); };
|
||||
}
|
||||
my $err = $@;
|
||||
close($fh);
|
||||
die $err if $err;
|
||||
|
||||
return unless defined wantarray;
|
||||
return wantarray ? @ret : $ret;
|
||||
}
|
||||
|
||||
sub initial_ext_ent_handler {
|
||||
|
||||
# This just bootstraps in the real lwp_ext_ent_handler which
|
||||
# also loads the URI and LWP modules.
|
||||
|
||||
unless ($LWP_load_failed) {
|
||||
local ($^W) = 0;
|
||||
|
||||
my $stat = eval { require('XML/Parser/LWPExternEnt.pl'); };
|
||||
|
||||
if ($stat) {
|
||||
$_[0]->setHandlers(
|
||||
ExternEnt => \&lwp_ext_ent_handler,
|
||||
ExternEntFin => \&lwp_ext_ent_cleanup
|
||||
);
|
||||
|
||||
goto &lwp_ext_ent_handler;
|
||||
}
|
||||
|
||||
# Failed to load lwp handler, act as if NoLWP
|
||||
|
||||
$LWP_load_failed = 1;
|
||||
|
||||
my $cmsg = "Couldn't load LWP based external entity handler\n" . "Switching to file-based external entity handler\n" . " (To avoid this message, use NoLWP option to XML::Parser)\n";
|
||||
warn($cmsg);
|
||||
}
|
||||
|
||||
$_[0]->setHandlers(
|
||||
ExternEnt => \&file_ext_ent_handler,
|
||||
ExternEntFin => \&file_ext_ent_cleanup
|
||||
);
|
||||
goto &file_ext_ent_handler;
|
||||
|
||||
}
|
||||
|
||||
sub file_ext_ent_handler {
|
||||
my ( $xp, $base, $path ) = @_;
|
||||
|
||||
# Prepend base only for relative paths
|
||||
|
||||
if ( defined($base)
|
||||
and not( $path =~ m!^(?:[\\/]|\w+:)! ) ) {
|
||||
my $newpath = $base;
|
||||
$newpath =~ s![^\\/:]*$!$path!;
|
||||
$path = $newpath;
|
||||
}
|
||||
|
||||
if ( $path =~ /^\s*[|>+]/
|
||||
or $path =~ /\|\s*$/ ) {
|
||||
$xp->{ErrorMessage} .= "System ID ($path) contains Perl IO control characters";
|
||||
return undef;
|
||||
}
|
||||
|
||||
require IO::File;
|
||||
my $fh = IO::File->new($path);
|
||||
unless ( defined $fh ) {
|
||||
$xp->{ErrorMessage} .= "Failed to open $path:\n$!";
|
||||
return undef;
|
||||
}
|
||||
|
||||
$xp->{_BaseStack} ||= [];
|
||||
$xp->{_FhStack} ||= [];
|
||||
|
||||
push( @{ $xp->{_BaseStack} }, $base );
|
||||
push( @{ $xp->{_FhStack} }, $fh );
|
||||
|
||||
$xp->base($path);
|
||||
|
||||
return $fh;
|
||||
}
|
||||
|
||||
sub file_ext_ent_cleanup {
|
||||
my ($xp) = @_;
|
||||
|
||||
my $fh = pop( @{ $xp->{_FhStack} } );
|
||||
$fh->close;
|
||||
|
||||
my $base = pop( @{ $xp->{_BaseStack} } );
|
||||
$xp->base($base);
|
||||
}
|
||||
|
||||
1;
|
||||
|
||||
__END__
|
||||
|
||||
=head1 NAME
|
||||
|
||||
XML::Parser - A perl module for parsing XML documents
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
use XML::Parser;
|
||||
|
||||
$p1 = XML::Parser->new(Style => 'Debug');
|
||||
$p1->parsefile('REC-xml-19980210.xml');
|
||||
$p1->parse('<foo id="me">Hello World</foo>');
|
||||
|
||||
# Alternative
|
||||
$p2 = XML::Parser->new(Handlers => {Start => \&handle_start,
|
||||
End => \&handle_end,
|
||||
Char => \&handle_char});
|
||||
$p2->parse($socket);
|
||||
|
||||
# Another alternative
|
||||
$p3 = XML::Parser->new(ErrorContext => 2);
|
||||
|
||||
$p3->setHandlers(Char => \&text,
|
||||
Default => \&other);
|
||||
|
||||
open(my $fh, 'xmlgenerator |');
|
||||
$p3->parse($foo, ProtocolEncoding => 'ISO-8859-1');
|
||||
close($foo);
|
||||
|
||||
$p3->parsefile('junk.xml', ErrorContext => 3);
|
||||
|
||||
=begin man
|
||||
.ds PI
|
||||
|
||||
=end man
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This module provides ways to parse XML documents. It is built on top of
|
||||
L<XML::Parser::Expat>, which is a lower level interface to James Clark's
|
||||
expat library. Each call to one of the parsing methods creates a new
|
||||
instance of XML::Parser::Expat which is then used to parse the document.
|
||||
Expat options may be provided when the XML::Parser object is created.
|
||||
These options are then passed on to the Expat object on each parse call.
|
||||
They can also be given as extra arguments to the parse methods, in which
|
||||
case they override options given at XML::Parser creation time.
|
||||
|
||||
The behavior of the parser is controlled either by C<L</STYLES>> and/or
|
||||
C<L</HANDLERS>> options, or by L</setHandlers> method. These all provide
|
||||
mechanisms for XML::Parser to set the handlers needed by XML::Parser::Expat.
|
||||
If neither C<Style> nor C<Handlers> are specified, then parsing just
|
||||
checks the document for being well-formed.
|
||||
|
||||
When underlying handlers get called, they receive as their first parameter
|
||||
the I<Expat> object, not the Parser object.
|
||||
|
||||
=head1 METHODS
|
||||
|
||||
=over 4
|
||||
|
||||
=item new
|
||||
|
||||
This is a class method, the constructor for XML::Parser. Options are passed
|
||||
as keyword value pairs. Recognized options are:
|
||||
|
||||
=over 4
|
||||
|
||||
=item * Style
|
||||
|
||||
This option provides an easy way to create a given style of parser. The
|
||||
built in styles are: L<"Debug">, L<"Subs">, L<"Tree">, L<"Objects">,
|
||||
and L<"Stream">. These are all defined in separate packages under
|
||||
C<XML::Parser::Style::*>, and you can find further documentation for
|
||||
each style both below, and in those packages.
|
||||
|
||||
Custom styles can be provided by giving a full package name containing
|
||||
at least one '::'. This package should then have subs defined for each
|
||||
handler it wishes to have installed. See L<"STYLES"> below
|
||||
for a discussion of each built in style.
|
||||
|
||||
=item * Handlers
|
||||
|
||||
When provided, this option should be an anonymous hash containing as
|
||||
keys the type of handler and as values a sub reference to handle that
|
||||
type of event. All the handlers get passed as their 1st parameter the
|
||||
instance of expat that is parsing the document. Further details on
|
||||
handlers can be found in L<"HANDLERS">. Any handler set here
|
||||
overrides the corresponding handler set with the Style option.
|
||||
|
||||
=item * Pkg
|
||||
|
||||
Some styles will refer to subs defined in this package. If not provided,
|
||||
it defaults to the package which called the constructor.
|
||||
|
||||
=item * ErrorContext
|
||||
|
||||
This is an Expat option. When this option is defined, errors are reported
|
||||
in context. The value should be the number of lines to show on either side
|
||||
of the line in which the error occurred.
|
||||
|
||||
=item * ProtocolEncoding
|
||||
|
||||
This is an Expat option. This sets the protocol encoding name. It defaults
|
||||
to none. The built-in encodings are: C<UTF-8>, C<ISO-8859-1>, C<UTF-16>, and
|
||||
C<US-ASCII>. Other encodings may be used if they have encoding maps in one
|
||||
of the directories in the @Encoding_Path list. Check L<"ENCODINGS"> for
|
||||
more information on encoding maps. Setting the protocol encoding overrides
|
||||
any encoding in the XML declaration.
|
||||
|
||||
=item * Namespaces
|
||||
|
||||
This is an Expat option. If this is set to a true value, then namespace
|
||||
processing is done during the parse. See L<XML::Parser::Expat/"Namespaces">
|
||||
for further discussion of namespace processing.
|
||||
|
||||
=item * NoExpand
|
||||
|
||||
This is an Expat option. Normally, the parser will try to expand references
|
||||
to entities defined in the internal subset. If this option is set to a true
|
||||
value, and a default handler is also set, then the default handler will be
|
||||
called when an entity reference is seen in text. This has no effect if a
|
||||
default handler has not been registered, and it has no effect on the expansion
|
||||
of entity references inside attribute values.
|
||||
|
||||
=item * Stream_Delimiter
|
||||
|
||||
This is an Expat option. It takes a string value. When this string is found
|
||||
alone on a line while parsing from a stream, then the parse is ended as if it
|
||||
saw an end of file. The intended use is with a stream of xml documents in a
|
||||
MIME multipart format. The string should not contain a trailing newline.
|
||||
|
||||
=item * ParseParamEnt
|
||||
|
||||
This is an Expat option. Unless standalone is set to "yes" in the XML
|
||||
declaration, setting this to a true value allows the external DTD to be read,
|
||||
and parameter entities to be parsed and expanded.
|
||||
|
||||
=item * NoLWP
|
||||
|
||||
This option has no effect if the ExternEnt or ExternEntFin handlers are
|
||||
directly set. Otherwise, if true, it forces the use of a file based external
|
||||
entity handler.
|
||||
|
||||
=item * Non_Expat_Options
|
||||
|
||||
If provided, this should be an anonymous hash whose keys are options that
|
||||
shouldn't be passed to Expat. This should only be of concern to those
|
||||
subclassing XML::Parser.
|
||||
|
||||
=back
|
||||
|
||||
=item setHandlers(TYPE, HANDLER [, TYPE, HANDLER [...]])
|
||||
|
||||
This method registers handlers for various parser events. It overrides any
|
||||
previous handlers registered through the Style or Handler options or through
|
||||
earlier calls to setHandlers. By providing a false or undefined value as
|
||||
the handler, the existing handler can be unset.
|
||||
|
||||
This method returns a list of type, handler pairs corresponding to the
|
||||
input. The handlers returned are the ones that were in effect prior to
|
||||
the call.
|
||||
|
||||
See a description of the handler types in L<"HANDLERS">.
|
||||
|
||||
=item parse(SOURCE [, OPT => OPT_VALUE [...]])
|
||||
|
||||
The SOURCE parameter should either be a string containing the whole XML
|
||||
document, or it should be an open IO::Handle. Constructor options to
|
||||
XML::Parser::Expat given as keyword-value pairs may follow the SOURCE
|
||||
parameter. These override, for this call, any options or attributes passed
|
||||
through from the XML::Parser instance.
|
||||
|
||||
A die call is thrown if a parse error occurs. Otherwise it will return 1
|
||||
or whatever is returned from the B<Final> handler, if one is installed.
|
||||
In other words, what parse may return depends on the style.
|
||||
|
||||
=item parsestring
|
||||
|
||||
This is just an alias for parse for backwards compatibility.
|
||||
|
||||
=item parsefile(FILE [, OPT => OPT_VALUE [...]])
|
||||
|
||||
Open FILE for reading, then call parse with the open handle. The file
|
||||
is closed no matter how parse returns. Returns what parse returns.
|
||||
|
||||
=item parse_start([ OPT => OPT_VALUE [...]])
|
||||
|
||||
Create and return a new instance of XML::Parser::ExpatNB. Constructor
|
||||
options may be provided. If an init handler has been provided, it is
|
||||
called before returning the ExpatNB object. Documents are parsed by
|
||||
making incremental calls to the parse_more method of this object, which
|
||||
takes a string. A single call to the parse_done method of this object,
|
||||
which takes no arguments, indicates that the document is finished.
|
||||
|
||||
If there is a final handler installed, it is executed by the parse_done
|
||||
method before returning and the parse_done method returns whatever is
|
||||
returned by the final handler.
|
||||
|
||||
=back
|
||||
|
||||
=head1 HANDLERS
|
||||
|
||||
Expat is an event based parser. As the parser recognizes parts of the
|
||||
document (say the start or end tag for an XML element), then any handlers
|
||||
registered for that type of an event are called with suitable parameters.
|
||||
All handlers receive an instance of XML::Parser::Expat as their first
|
||||
argument. See L<XML::Parser::Expat/"METHODS"> for a discussion of the
|
||||
methods that can be called on this object.
|
||||
|
||||
=head2 Init (Expat)
|
||||
|
||||
This is called just before the parsing of the document starts.
|
||||
|
||||
=head2 Final (Expat)
|
||||
|
||||
This is called just after parsing has finished, but only if no errors
|
||||
occurred during the parse. Parse returns what this returns.
|
||||
|
||||
=head2 Start (Expat, Element [, Attr, Val [,...]])
|
||||
|
||||
This event is generated when an XML start tag is recognized. Element is the
|
||||
name of the XML element type that is opened with the start tag. The Attr &
|
||||
Val pairs are generated for each attribute in the start tag.
|
||||
|
||||
=head2 End (Expat, Element)
|
||||
|
||||
This event is generated when an XML end tag is recognized. Note that
|
||||
an XML empty tag (<foo/>) generates both a start and an end event.
|
||||
|
||||
=head2 Char (Expat, String)
|
||||
|
||||
This event is generated when non-markup is recognized. The non-markup
|
||||
sequence of characters is in String. A single non-markup sequence of
|
||||
characters may generate multiple calls to this handler. Whatever the
|
||||
encoding of the string in the original document, this is given to the
|
||||
handler in UTF-8.
|
||||
|
||||
=head2 Proc (Expat, Target, Data)
|
||||
|
||||
This event is generated when a processing instruction is recognized.
|
||||
|
||||
=head2 Comment (Expat, Data)
|
||||
|
||||
This event is generated when a comment is recognized.
|
||||
|
||||
=head2 CdataStart (Expat)
|
||||
|
||||
This is called at the start of a CDATA section.
|
||||
|
||||
=head2 CdataEnd (Expat)
|
||||
|
||||
This is called at the end of a CDATA section.
|
||||
|
||||
=head2 Default (Expat, String)
|
||||
|
||||
This is called for any characters that don't have a registered handler.
|
||||
This includes both characters that are part of markup for which no
|
||||
events are generated (markup declarations) and characters that
|
||||
could generate events, but for which no handler has been registered.
|
||||
|
||||
Whatever the encoding in the original document, the string is returned to
|
||||
the handler in UTF-8.
|
||||
|
||||
=head2 Unparsed (Expat, Entity, Base, Sysid, Pubid, Notation)
|
||||
|
||||
This is called for a declaration of an unparsed entity. Entity is the name
|
||||
of the entity. Base is the base to be used for resolving a relative URI.
|
||||
Sysid is the system id. Pubid is the public id. Notation is the notation
|
||||
name. Base and Pubid may be undefined.
|
||||
|
||||
=head2 Notation (Expat, Notation, Base, Sysid, Pubid)
|
||||
|
||||
This is called for a declaration of notation. Notation is the notation name.
|
||||
Base is the base to be used for resolving a relative URI. Sysid is the system
|
||||
id. Pubid is the public id. Base, Sysid, and Pubid may all be undefined.
|
||||
|
||||
=head2 ExternEnt (Expat, Base, Sysid, Pubid)
|
||||
|
||||
This is called when an external entity is referenced. Base is the base to be
|
||||
used for resolving a relative URI. Sysid is the system id. Pubid is the public
|
||||
id. Base, and Pubid may be undefined.
|
||||
|
||||
This handler should either return a string, which represents the contents of
|
||||
the external entity, or return an open filehandle that can be read to obtain
|
||||
the contents of the external entity, or return undef, which indicates the
|
||||
external entity couldn't be found and will generate a parse error.
|
||||
|
||||
If an open filehandle is returned, it must be returned as either a glob
|
||||
(*FOO) or as a reference to a glob (e.g. an instance of IO::Handle).
|
||||
|
||||
A default handler is installed for this event. The default handler is
|
||||
XML::Parser::lwp_ext_ent_handler unless the NoLWP option was provided with
|
||||
a true value, otherwise XML::Parser::file_ext_ent_handler is the default
|
||||
handler for external entities. Even without the NoLWP option, if the
|
||||
URI or LWP modules are missing, the file based handler ends up being used
|
||||
after giving a warning on the first external entity reference.
|
||||
|
||||
The LWP external entity handler will use proxies defined in the environment
|
||||
(http_proxy, ftp_proxy, etc.).
|
||||
|
||||
Please note that the LWP external entity handler reads the entire
|
||||
entity into a string and returns it, where as the file handler opens a
|
||||
filehandle.
|
||||
|
||||
Also note that the file external entity handler will likely choke on
|
||||
absolute URIs or file names that don't fit the conventions of the local
|
||||
operating system.
|
||||
|
||||
The expat base method can be used to set a basename for
|
||||
relative pathnames. If no basename is given, or if the basename is itself
|
||||
a relative name, then it is relative to the current working directory.
|
||||
|
||||
=head2 ExternEntFin (Expat)
|
||||
|
||||
This is called after parsing an external entity. It's not called unless
|
||||
an ExternEnt handler is also set. There is a default handler installed
|
||||
that pairs with the default ExternEnt handler.
|
||||
|
||||
If you're going to install your own ExternEnt handler, then you should
|
||||
set (or unset) this handler too.
|
||||
|
||||
=head2 Entity (Expat, Name, Val, Sysid, Pubid, Ndata, IsParam)
|
||||
|
||||
This is called when an entity is declared. For internal entities, the Val
|
||||
parameter will contain the value and the remaining three parameters will be
|
||||
undefined. For external entities, the Val parameter will be undefined, the
|
||||
Sysid parameter will have the system id, the Pubid parameter will have the
|
||||
public id if it was provided (it will be undefined otherwise), the Ndata
|
||||
parameter will contain the notation for unparsed entities. If this is a
|
||||
parameter entity declaration, then the IsParam parameter is true.
|
||||
|
||||
Note that this handler and the Unparsed handler above overlap. If both are
|
||||
set, then this handler will not be called for unparsed entities.
|
||||
|
||||
=head2 Element (Expat, Name, Model)
|
||||
|
||||
The element handler is called when an element declaration is found. Name
|
||||
is the element name, and Model is the content model as an XML::Parser::Content
|
||||
object. See L<XML::Parser::Expat/"XML::Parser::ContentModel Methods">
|
||||
for methods available for this class.
|
||||
|
||||
=head2 Attlist (Expat, Elname, Attname, Type, Default, Fixed)
|
||||
|
||||
This handler is called for each attribute in an ATTLIST declaration.
|
||||
So an ATTLIST declaration that has multiple attributes will generate multiple
|
||||
calls to this handler. The Elname parameter is the name of the element with
|
||||
which the attribute is being associated. The Attname parameter is the name
|
||||
of the attribute. Type is the attribute type, given as a string. Default is
|
||||
the default value, which will either be "#REQUIRED", "#IMPLIED" or a quoted
|
||||
string (i.e. the returned string will begin and end with a quote character).
|
||||
If Fixed is true, then this is a fixed attribute.
|
||||
|
||||
=head2 Doctype (Expat, Name, Sysid, Pubid, Internal)
|
||||
|
||||
This handler is called for DOCTYPE declarations. Name is the document type
|
||||
name. Sysid is the system id of the document type, if it was provided,
|
||||
otherwise it's undefined. Pubid is the public id of the document type,
|
||||
which will be undefined if no public id was given. Internal is the internal
|
||||
subset, given as a string. If there was no internal subset, it will be
|
||||
undefined. Internal will contain all whitespace, comments, processing
|
||||
instructions, and declarations seen in the internal subset. The declarations
|
||||
will be there whether or not they have been processed by another handler
|
||||
(except for unparsed entities processed by the Unparsed handler). However,
|
||||
comments and processing instructions will not appear if they've been processed
|
||||
by their respective handlers.
|
||||
|
||||
=head2 * DoctypeFin (Parser)
|
||||
|
||||
This handler is called after parsing of the DOCTYPE declaration has finished,
|
||||
including any internal or external DTD declarations.
|
||||
|
||||
=head2 XMLDecl (Expat, Version, Encoding, Standalone)
|
||||
|
||||
This handler is called for xml declarations. Version is a string containing
|
||||
the version. Encoding is either undefined or contains an encoding string.
|
||||
Standalone will be either true, false, or undefined if the standalone attribute
|
||||
is yes, no, or not made respectively.
|
||||
|
||||
=head1 STYLES
|
||||
|
||||
=head2 Debug
|
||||
|
||||
This just prints out the document in outline form. Nothing special is
|
||||
returned by parse.
|
||||
|
||||
=head2 Subs
|
||||
|
||||
Each time an element starts, a sub by that name in the package specified
|
||||
by the Pkg option is called with the same parameters that the Start
|
||||
handler gets called with.
|
||||
|
||||
Each time an element ends, a sub with that name appended with an underscore
|
||||
("_"), is called with the same parameters that the End handler gets called
|
||||
with.
|
||||
|
||||
Nothing special is returned by parse.
|
||||
|
||||
=head2 Tree
|
||||
|
||||
Parse will return a parse tree for the document. Each node in the tree
|
||||
takes the form of a tag, content pair. Text nodes are represented with
|
||||
a pseudo-tag of "0" and the string that is their content. For elements,
|
||||
the content is an array reference. The first item in the array is a
|
||||
(possibly empty) hash reference containing attributes. The remainder of
|
||||
the array is a sequence of tag-content pairs representing the content
|
||||
of the element.
|
||||
|
||||
So for example the result of parsing:
|
||||
|
||||
<foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo>
|
||||
|
||||
would be:
|
||||
|
||||
Tag Content
|
||||
==================================================================
|
||||
[foo, [{}, head, [{id => "a"}, 0, "Hello ", em, [{}, 0, "there"]],
|
||||
bar, [ {}, 0, "Howdy", ref, [{}]],
|
||||
0, "do"
|
||||
]
|
||||
]
|
||||
|
||||
The root document "foo", has 3 children: a "head" element, a "bar"
|
||||
element and the text "do". After the empty attribute hash, these are
|
||||
represented in it's contents by 3 tag-content pairs.
|
||||
|
||||
=head2 Objects
|
||||
|
||||
This is similar to the Tree style, except that a hash object is created for
|
||||
each element. The corresponding object will be in the class whose name
|
||||
is created by appending "::" and the element name to the package set with
|
||||
the Pkg option. Non-markup text will be in the ::Characters class. The
|
||||
contents of the corresponding object will be in an anonymous array that
|
||||
is the value of the Kids property for that object.
|
||||
|
||||
=head2 Stream
|
||||
|
||||
This style also uses the Pkg package. If none of the subs that this
|
||||
style looks for is there, then the effect of parsing with this style is
|
||||
to print a canonical copy of the document without comments or declarations.
|
||||
All the subs receive as their 1st parameter the Expat instance for the
|
||||
document they're parsing.
|
||||
|
||||
It looks for the following routines:
|
||||
|
||||
=over 4
|
||||
|
||||
=item * StartDocument
|
||||
|
||||
Called at the start of the parse .
|
||||
|
||||
=item * StartTag
|
||||
|
||||
Called for every start tag with a second parameter of the element type. The $_
|
||||
variable will contain a copy of the tag and the %_ variable will contain
|
||||
attribute values supplied for that element.
|
||||
|
||||
=item * EndTag
|
||||
|
||||
Called for every end tag with a second parameter of the element type. The $_
|
||||
variable will contain a copy of the end tag.
|
||||
|
||||
=item * Text
|
||||
|
||||
Called just before start or end tags with accumulated non-markup text in
|
||||
the $_ variable.
|
||||
|
||||
=item * PI
|
||||
|
||||
Called for processing instructions. The $_ variable will contain a copy of
|
||||
the PI and the target and data are sent as 2nd and 3rd parameters
|
||||
respectively.
|
||||
|
||||
=item * EndDocument
|
||||
|
||||
Called at conclusion of the parse.
|
||||
|
||||
=back
|
||||
|
||||
=head1 ENCODINGS
|
||||
|
||||
XML documents may be encoded in character sets other than Unicode as
|
||||
long as they may be mapped into the Unicode character set. Expat has
|
||||
further restrictions on encodings. Read the xmlparse.h header file in
|
||||
the expat distribution to see details on these restrictions.
|
||||
|
||||
Expat has built-in encodings for: C<UTF-8>, C<ISO-8859-1>, C<UTF-16>, and
|
||||
C<US-ASCII>. Encodings are set either through the XML declaration
|
||||
encoding attribute or through the ProtocolEncoding option to XML::Parser
|
||||
or XML::Parser::Expat.
|
||||
|
||||
For encodings other than the built-ins, expat calls the function
|
||||
load_encoding in the Expat package with the encoding name. This function
|
||||
looks for a file in the path list @XML::Parser::Expat::Encoding_Path, that
|
||||
matches the lower-cased name with a '.enc' extension. The first one it
|
||||
finds, it loads.
|
||||
|
||||
If you wish to build your own encoding maps, check out the XML::Encoding
|
||||
module from CPAN.
|
||||
|
||||
=head1 AUTHORS
|
||||
|
||||
Larry Wall <F<larry@wall.org>> wrote version 1.0.
|
||||
|
||||
Clark Cooper <F<coopercc@netheaven.com>> picked up support, changed the API
|
||||
for this version (2.x), provided documentation,
|
||||
and added some standard package features.
|
||||
|
||||
Matt Sergeant <F<matt@sergeant.org>> is now maintaining XML::Parser
|
||||
|
||||
=cut
|
|
@ -0,0 +1,117 @@
|
|||
Mapping files for Japanese encodings
|
||||
|
||||
1998 12/25
|
||||
|
||||
Fuji Xerox Information Systems
|
||||
MURATA Makoto
|
||||
|
||||
1. Overview
|
||||
|
||||
This version of XML::Parser and XML::Encoding does not come with map files for
|
||||
the charset "Shift_JIS" and the charset "euc-jp". Unfortunately, each of these
|
||||
charsets has more than one mapping. None of these mappings are
|
||||
considered as authoritative.
|
||||
|
||||
Therefore, we have come to believe that it is dangerous to provide map files
|
||||
for these charsets. Rather, we introduce several private charsets and map
|
||||
files for these private charsets. If IANA, Unicode Consoritum, and JIS
|
||||
eventually reach a consensus, we will be able to provide map files for
|
||||
"Shift_JIS" and "euc-jp".
|
||||
|
||||
2. Different mappings from existing charsets to Unicode
|
||||
|
||||
1) Different mappings in JIS X0221 and Unicode
|
||||
|
||||
The mapping between JIS X0208:1990 and Unicode 1.1 and the mapping
|
||||
between JIS X0212:1990 and Unicode 1.1 are published from Unicode
|
||||
consortium. They are available at
|
||||
ftp://ftp.unicode.org/Public/MAPPINGS/EASTASIA/JIS/JIS0208.TXT and
|
||||
ftp://ftp.unicode.org/Public/MAPPINGS/EASTASIA/JIS/JIS0212.TXT,
|
||||
respectively.) These mapping files have a note as below:
|
||||
|
||||
# The kanji mappings are a normative part of ISO/IEC 10646. The
|
||||
# non-kanji mappings are provisional, pending definition of
|
||||
# official mappings by Japanese standards bodies.
|
||||
|
||||
Unfortunately, the non-kanji mappings in the Japanese standard for ISO 10646/1,
|
||||
namely JIS X 0221:1995, is different from the Unicode Consortium mapping since
|
||||
0x213D of JIS X 0208 is mapped to U+2014 (em dash) rather than U+2015
|
||||
(horizontal bar). Furthermore, JIS X 0221 clearly says that the mapping is
|
||||
informational and non-normative. As a result, some companies (e.g., Microsoft and
|
||||
Apple) have introduced slightly different mappings. Therefore, neither the
|
||||
Unicode consortium mapping nor the JIS X 0221 mapping are considered as
|
||||
authoritative.
|
||||
|
||||
2) Shift-JIS
|
||||
|
||||
This charset is especially problematic, since its definition has been unclear
|
||||
since its inception.
|
||||
|
||||
The current registration of the charset "Shift_JIS" is as below:
|
||||
|
||||
>Name: Shift_JIS (preferred MIME name)
|
||||
>MIBenum: 17
|
||||
>Source: A Microsoft code that extends csHalfWidthKatakana to include
|
||||
> kanji by adding a second byte when the value of the first
|
||||
> byte is in the ranges 81-9F or E0-EF.
|
||||
>Alias: MS_Kanji
|
||||
>Alias: csShiftJIS
|
||||
|
||||
First, this does not reference to the mapping "Shift-JIS to Unicode"
|
||||
published by the Unicode consortium (available at
|
||||
ftp://ftp.unicode.org/Public/MAPPINGS/EASTASIA/JIS/SHIFTJIS.TXT).
|
||||
|
||||
Second, "kanji" in this registration can be interepreted in different ways.
|
||||
Does this "kanji" reference to JIS X0208:1978, JIS X0208:1983, or JIS
|
||||
X0208:1990(== JIS X0208:1997)? These three standards are *incompatible* with
|
||||
each other. Moreover, we can even argue that "kanji" refers to JIS X0212 or
|
||||
ideographic characters in other countries.
|
||||
|
||||
Third, each company has extended Shift JIS. For example, Microsoft introduced
|
||||
OEM extensions (NEC extensionsand IBM extensions).
|
||||
|
||||
Forth, Shift JIS uses JIS X0201, which is almost upper-compatible with US-ASCII
|
||||
but is not quite. 5C and 7E of JIS X 0201 are different from backslash and
|
||||
tilde, respectively. However, many programming languages (e.g., Java)
|
||||
ignore this difference and assumes that 5C and 7E of Shift JIS are backslash
|
||||
and tilde.
|
||||
|
||||
|
||||
3. Proposed charsets and mappings
|
||||
|
||||
As a tentative solution, we introduce two private charsets for EUC-JP and four
|
||||
priviate charsets for Shift JIS.
|
||||
|
||||
1) EUC-JP
|
||||
|
||||
We have two charsets, namely "x-eucjp-unicode" and "x-eucjp-jisx0221". Their
|
||||
difference is only one code point. The mapping for the former is based
|
||||
on the Unicode Consortium mapping, while the latter is based on the JIS X0221
|
||||
mapping.
|
||||
|
||||
2) Shift JIS
|
||||
|
||||
We have four charsets, namely x-sjis-unicode, x-sjis-jisx0221,
|
||||
x-sjis-jdk117, and x-sjis-cp932.
|
||||
|
||||
The mapping for the charset x-sjis-unicode is the one published by the Unicode
|
||||
consortium. The mapping for x-sjis-jisx0221 is almost equivalent to
|
||||
x-sjis-unicode, but 0x213D of JIS X 0208 is mapped to U+2014 (em dash) rather
|
||||
than U+2015. The charset x-sjis-jdk117 is again almost equivalent to
|
||||
x-sjis-unicode, but 0x5C and 0x7E of JIS X0201 are mapped to backslash and
|
||||
tilde.
|
||||
|
||||
The charset x-sjis-cp932 is used by Microsoft Windows, and its mapping is
|
||||
published from the Unicode Consortium (available at:
|
||||
ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.txt). The
|
||||
coded character set for this charset includes NEC-extensions and
|
||||
IBM-extensions. 0x5C and 0x7E of JIS X0201 are mapped to backslash and tilde;
|
||||
0x213D is mapped to U+2015; and 0x2140, 0x2141, 0x2142, and 0x215E of JIS X
|
||||
0208 are mapped to compatibility characters.
|
||||
|
||||
Makoto
|
||||
|
||||
Fuji Xerox Information Systems
|
||||
|
||||
Tel: +81-44-812-7230 Fax: +81-44-812-7231
|
||||
E-mail: murata@apsdc.ksp.fujixerox.co.jp
|
|
@ -0,0 +1,51 @@
|
|||
This directory contains binary encoding maps for some selected encodings.
|
||||
If they are placed in a directory listed in @XML::Parser::Expat::Encoding_Path,
|
||||
then they are automatically loaded by the XML::Parser::Expat::load_encoding
|
||||
function as needed. Otherwise you may load what you need directly by
|
||||
explicitly calling this function.
|
||||
|
||||
These maps were generated by a perl script that comes with the module
|
||||
XML::Encoding, compile_encoding, from XML formatted encoding maps that
|
||||
are distributed with that module. These XML encoding maps were generated
|
||||
in turn with a different script, domap, from mapping information contained
|
||||
on the Unicode version 2.0 CD-ROM. This CD-ROM comes with the Unicode
|
||||
Standard reference manual and can be ordered from the Unicode Consortium
|
||||
at http://www.unicode.org. The identical information is available on the
|
||||
internet at ftp://ftp.unicode.org/Public/MAPPINGS.
|
||||
|
||||
See the encoding.h header in the Expat sub-directory for a description of
|
||||
the structure of these files.
|
||||
|
||||
Clark Cooper
|
||||
December 12, 1998
|
||||
|
||||
================================================================
|
||||
|
||||
Contributed maps
|
||||
|
||||
This distribution contains four contributed encodings from MURATA Makoto
|
||||
<murata@apsdc.ksp.fujixerox.co.jp> that are variations on the encoding
|
||||
commonly called Shift_JIS:
|
||||
|
||||
x-sjis-cp932.enc
|
||||
x-sjis-jdk117.enc
|
||||
x-sjis-jisx0221.enc
|
||||
x-sjis-unicode.enc (This is the same encoding as the shift_jis.enc that
|
||||
was distributed with this module in version 2.17)
|
||||
|
||||
Please read his message (Japanese_Encodings.msg) about why these are here
|
||||
and why I've removed the shift_jis.enc encoding.
|
||||
|
||||
We also have two contributed encodings that are variations of the EUC-JP
|
||||
encoding from Yoshida Masato <yoshidam@inse.co.jp>:
|
||||
|
||||
x-euc-jp-jisx0221.enc
|
||||
x-euc-jp-unicode.enc
|
||||
|
||||
The comments that MURATA Makoto made in his message apply to these
|
||||
encodings too.
|
||||
|
||||
KangChan Lee <dolphin@comeng.chungnam.ac.kr> supplied the euc-kr encoding.
|
||||
|
||||
Clark Cooper
|
||||
December 26, 1998
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,71 @@
|
|||
# LWPExternEnt.pl
|
||||
#
|
||||
# Copyright (c) 2000 Clark Cooper
|
||||
# All rights reserved.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or
|
||||
# modify it under the same terms as Perl itself.
|
||||
|
||||
package XML::Parser;
|
||||
|
||||
use URI;
|
||||
use URI::file;
|
||||
use LWP::UserAgent;
|
||||
|
||||
##
|
||||
## Note that this external entity handler reads the entire entity into
|
||||
## memory, so it will choke on huge ones. It would be really nice if
|
||||
## LWP::UserAgent optionally returned us an IO::Handle.
|
||||
##
|
||||
|
||||
sub lwp_ext_ent_handler {
|
||||
my ($xp, $base, $sys) = @_; # We don't use public id
|
||||
|
||||
my $uri;
|
||||
|
||||
if (defined $base) {
|
||||
# Base may have been set by parsefile, which is agnostic about
|
||||
# whether its a file or URI.
|
||||
my $base_uri = new URI($base);
|
||||
unless (defined $base_uri->scheme) {
|
||||
$base_uri = URI->new_abs($base_uri, URI::file->cwd);
|
||||
}
|
||||
|
||||
$uri = URI->new_abs($sys, $base_uri);
|
||||
}
|
||||
else {
|
||||
$uri = new URI($sys);
|
||||
unless (defined $uri->scheme) {
|
||||
$uri = URI->new_abs($uri, URI::file->cwd);
|
||||
}
|
||||
}
|
||||
|
||||
my $ua = $xp->{_lwpagent};
|
||||
unless (defined $ua) {
|
||||
$ua = $xp->{_lwpagent} = new LWP::UserAgent();
|
||||
$ua->env_proxy();
|
||||
}
|
||||
|
||||
my $req = new HTTP::Request('GET', $uri);
|
||||
|
||||
my $res = $ua->request($req);
|
||||
if ($res->is_error) {
|
||||
$xp->{ErrorMessage} .= "\n" . $res->status_line . " $uri";
|
||||
return undef;
|
||||
}
|
||||
|
||||
$xp->{_BaseStack} ||= [];
|
||||
push(@{$xp->{_BaseStack}}, $base);
|
||||
|
||||
$xp->base($uri);
|
||||
|
||||
return $res->content;
|
||||
} # End lwp_ext_ent_handler
|
||||
|
||||
sub lwp_ext_ent_cleanup {
|
||||
my ($xp) = @_;
|
||||
|
||||
$xp->base(pop(@{$xp->{_BaseStack}}));
|
||||
} # End lwp_ext_ent_cleanup
|
||||
|
||||
1;
|
|
@ -0,0 +1,52 @@
|
|||
# $Id: Debug.pm,v 1.1 2003-07-27 16:07:49 matt Exp $
|
||||
|
||||
package XML::Parser::Style::Debug;
|
||||
use strict;
|
||||
|
||||
sub Start {
|
||||
my $expat = shift;
|
||||
my $tag = shift;
|
||||
print STDERR "@{$expat->{Context}} \\\\ (@_)\n";
|
||||
}
|
||||
|
||||
sub End {
|
||||
my $expat = shift;
|
||||
my $tag = shift;
|
||||
print STDERR "@{$expat->{Context}} //\n";
|
||||
}
|
||||
|
||||
sub Char {
|
||||
my $expat = shift;
|
||||
my $text = shift;
|
||||
$text =~ s/([\x80-\xff])/sprintf "#x%X;", ord $1/eg;
|
||||
$text =~ s/([\t\n])/sprintf "#%d;", ord $1/eg;
|
||||
print STDERR "@{$expat->{Context}} || $text\n";
|
||||
}
|
||||
|
||||
sub Proc {
|
||||
my $expat = shift;
|
||||
my $target = shift;
|
||||
my $text = shift;
|
||||
my @foo = @{ $expat->{Context} };
|
||||
print STDERR "@foo $target($text)\n";
|
||||
}
|
||||
|
||||
1;
|
||||
__END__
|
||||
|
||||
=head1 NAME
|
||||
|
||||
XML::Parser::Style::Debug - Debug style for XML::Parser
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
use XML::Parser;
|
||||
my $p = XML::Parser->new(Style => 'Debug');
|
||||
$p->parsefile('foo.xml');
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This just prints out the document in outline form to STDERR. Nothing special is
|
||||
returned by parse.
|
||||
|
||||
=cut
|
|
@ -0,0 +1,79 @@
|
|||
# $Id: Objects.pm,v 1.1 2003-08-18 20:20:51 matt Exp $
|
||||
|
||||
package XML::Parser::Style::Objects;
|
||||
use strict;
|
||||
|
||||
sub Init {
|
||||
my $expat = shift;
|
||||
$expat->{Lists} = [];
|
||||
$expat->{Curlist} = $expat->{Tree} = [];
|
||||
}
|
||||
|
||||
sub Start {
|
||||
my $expat = shift;
|
||||
my $tag = shift;
|
||||
my $newlist = [];
|
||||
my $class = "${$expat}{Pkg}::$tag";
|
||||
my $newobj = bless { @_, Kids => $newlist }, $class;
|
||||
push @{ $expat->{Lists} }, $expat->{Curlist};
|
||||
push @{ $expat->{Curlist} }, $newobj;
|
||||
$expat->{Curlist} = $newlist;
|
||||
}
|
||||
|
||||
sub End {
|
||||
my $expat = shift;
|
||||
my $tag = shift;
|
||||
$expat->{Curlist} = pop @{ $expat->{Lists} };
|
||||
}
|
||||
|
||||
sub Char {
|
||||
my $expat = shift;
|
||||
my $text = shift;
|
||||
my $class = "${$expat}{Pkg}::Characters";
|
||||
my $clist = $expat->{Curlist};
|
||||
my $pos = $#$clist;
|
||||
|
||||
if ( $pos >= 0 and ref( $clist->[$pos] ) eq $class ) {
|
||||
$clist->[$pos]->{Text} .= $text;
|
||||
}
|
||||
else {
|
||||
push @$clist, bless { Text => $text }, $class;
|
||||
}
|
||||
}
|
||||
|
||||
sub Final {
|
||||
my $expat = shift;
|
||||
delete $expat->{Curlist};
|
||||
delete $expat->{Lists};
|
||||
$expat->{Tree};
|
||||
}
|
||||
|
||||
1;
|
||||
__END__
|
||||
|
||||
=head1 NAME
|
||||
|
||||
XML::Parser::Style::Objects - Objects styler parser
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
use XML::Parser;
|
||||
my $p = XML::Parser->new(Style => 'Objects', Pkg => 'MyNode');
|
||||
my $tree = $p->parsefile('foo.xml');
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This module implements XML::Parser's Objects style parser.
|
||||
|
||||
This is similar to the Tree style, except that a hash object is created for
|
||||
each element. The corresponding object will be in the class whose name
|
||||
is created by appending "::" and the element name to the package set with
|
||||
the Pkg option. Non-markup text will be in the ::Characters class. The
|
||||
contents of the corresponding object will be in an anonymous array that
|
||||
is the value of the Kids property for that object.
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
L<XML::Parser::Style::Tree>
|
||||
|
||||
=cut
|
|
@ -0,0 +1,188 @@
|
|||
# $Id: Stream.pm,v 1.1 2003-07-27 16:07:49 matt Exp $
|
||||
|
||||
package XML::Parser::Style::Stream;
|
||||
use strict;
|
||||
|
||||
# This style invented by Tim Bray <tbray@textuality.com>
|
||||
|
||||
sub Init {
|
||||
no strict 'refs';
|
||||
my $expat = shift;
|
||||
$expat->{Text} = '';
|
||||
my $sub = $expat->{Pkg} . "::StartDocument";
|
||||
&$sub($expat)
|
||||
if defined(&$sub);
|
||||
}
|
||||
|
||||
sub Start {
|
||||
no strict 'refs';
|
||||
my $expat = shift;
|
||||
my $type = shift;
|
||||
|
||||
doText($expat);
|
||||
$_ = "<$type";
|
||||
|
||||
%_ = @_;
|
||||
while (@_) {
|
||||
$_ .= ' ' . shift() . '="' . shift() . '"';
|
||||
}
|
||||
$_ .= '>';
|
||||
|
||||
my $sub = $expat->{Pkg} . "::StartTag";
|
||||
if ( defined(&$sub) ) {
|
||||
&$sub( $expat, $type );
|
||||
}
|
||||
else {
|
||||
print;
|
||||
}
|
||||
}
|
||||
|
||||
sub End {
|
||||
no strict 'refs';
|
||||
my $expat = shift;
|
||||
my $type = shift;
|
||||
|
||||
# Set right context for Text handler
|
||||
push( @{ $expat->{Context} }, $type );
|
||||
doText($expat);
|
||||
pop( @{ $expat->{Context} } );
|
||||
|
||||
$_ = "</$type>";
|
||||
|
||||
my $sub = $expat->{Pkg} . "::EndTag";
|
||||
if ( defined(&$sub) ) {
|
||||
&$sub( $expat, $type );
|
||||
}
|
||||
else {
|
||||
print;
|
||||
}
|
||||
}
|
||||
|
||||
sub Char {
|
||||
my $expat = shift;
|
||||
$expat->{Text} .= shift;
|
||||
}
|
||||
|
||||
sub Proc {
|
||||
no strict 'refs';
|
||||
my $expat = shift;
|
||||
my $target = shift;
|
||||
my $text = shift;
|
||||
|
||||
doText($expat);
|
||||
|
||||
$_ = "<?$target $text?>";
|
||||
|
||||
my $sub = $expat->{Pkg} . "::PI";
|
||||
if ( defined(&$sub) ) {
|
||||
&$sub( $expat, $target, $text );
|
||||
}
|
||||
else {
|
||||
print;
|
||||
}
|
||||
}
|
||||
|
||||
sub Final {
|
||||
no strict 'refs';
|
||||
my $expat = shift;
|
||||
my $sub = $expat->{Pkg} . "::EndDocument";
|
||||
&$sub($expat)
|
||||
if defined(&$sub);
|
||||
}
|
||||
|
||||
sub doText {
|
||||
no strict 'refs';
|
||||
my $expat = shift;
|
||||
$_ = $expat->{Text};
|
||||
|
||||
if ( length($_) ) {
|
||||
my $sub = $expat->{Pkg} . "::Text";
|
||||
if ( defined(&$sub) ) {
|
||||
&$sub($expat);
|
||||
}
|
||||
else {
|
||||
print;
|
||||
}
|
||||
|
||||
$expat->{Text} = '';
|
||||
}
|
||||
}
|
||||
|
||||
1;
|
||||
__END__
|
||||
|
||||
=head1 NAME
|
||||
|
||||
XML::Parser::Style::Stream - Stream style for XML::Parser
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
use XML::Parser;
|
||||
my $p = XML::Parser->new(Style => 'Stream', Pkg => 'MySubs');
|
||||
$p->parsefile('foo.xml');
|
||||
|
||||
{
|
||||
package MySubs;
|
||||
|
||||
sub StartTag {
|
||||
my ($e, $name) = @_;
|
||||
# do something with start tags
|
||||
}
|
||||
|
||||
sub EndTag {
|
||||
my ($e, $name) = @_;
|
||||
# do something with end tags
|
||||
}
|
||||
|
||||
sub Characters {
|
||||
my ($e, $data) = @_;
|
||||
# do something with text nodes
|
||||
}
|
||||
}
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This style uses the Pkg option to find subs in a given package to call for each event.
|
||||
If none of the subs that this
|
||||
style looks for is there, then the effect of parsing with this style is
|
||||
to print a canonical copy of the document without comments or declarations.
|
||||
All the subs receive as their 1st parameter the Expat instance for the
|
||||
document they're parsing.
|
||||
|
||||
It looks for the following routines:
|
||||
|
||||
=over 4
|
||||
|
||||
=item * StartDocument
|
||||
|
||||
Called at the start of the parse .
|
||||
|
||||
=item * StartTag
|
||||
|
||||
Called for every start tag with a second parameter of the element type. The $_
|
||||
variable will contain a copy of the tag and the %_ variable will contain
|
||||
attribute values supplied for that element.
|
||||
|
||||
=item * EndTag
|
||||
|
||||
Called for every end tag with a second parameter of the element type. The $_
|
||||
variable will contain a copy of the end tag.
|
||||
|
||||
=item * Text
|
||||
|
||||
Called just before start or end tags with accumulated non-markup text in
|
||||
the $_ variable.
|
||||
|
||||
=item * PI
|
||||
|
||||
Called for processing instructions. The $_ variable will contain a copy of
|
||||
the PI and the target and data are sent as 2nd and 3rd parameters
|
||||
respectively.
|
||||
|
||||
=item * EndDocument
|
||||
|
||||
Called at conclusion of the parse.
|
||||
|
||||
=back
|
||||
|
||||
=cut
|
|
@ -0,0 +1,58 @@
|
|||
# $Id: Subs.pm,v 1.1 2003-07-27 16:07:49 matt Exp $
|
||||
|
||||
package XML::Parser::Style::Subs;
|
||||
|
||||
sub Start {
|
||||
no strict 'refs';
|
||||
my $expat = shift;
|
||||
my $tag = shift;
|
||||
my $sub = $expat->{Pkg} . "::$tag";
|
||||
eval { &$sub( $expat, $tag, @_ ) };
|
||||
}
|
||||
|
||||
sub End {
|
||||
no strict 'refs';
|
||||
my $expat = shift;
|
||||
my $tag = shift;
|
||||
my $sub = $expat->{Pkg} . "::${tag}_";
|
||||
eval { &$sub( $expat, $tag ) };
|
||||
}
|
||||
|
||||
1;
|
||||
__END__
|
||||
|
||||
=head1 NAME
|
||||
|
||||
XML::Parser::Style::Subs - glue for handling element callbacks
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
use XML::Parser;
|
||||
my $p = XML::Parser->new(Style => 'Subs', Pkg => 'MySubs');
|
||||
$p->parsefile('foo.xml');
|
||||
|
||||
{
|
||||
package MySubs;
|
||||
|
||||
sub foo {
|
||||
# start of foo tag
|
||||
}
|
||||
|
||||
sub foo_ {
|
||||
# end of foo tag
|
||||
}
|
||||
}
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
Each time an element starts, a sub by that name in the package specified
|
||||
by the Pkg option is called with the same parameters that the Start
|
||||
handler gets called with.
|
||||
|
||||
Each time an element ends, a sub with that name appended with an underscore
|
||||
("_"), is called with the same parameters that the End handler gets called
|
||||
with.
|
||||
|
||||
Nothing special is returned by parse.
|
||||
|
||||
=cut
|
|
@ -0,0 +1,91 @@
|
|||
# $Id: Tree.pm,v 1.2 2003-07-31 07:54:51 matt Exp $
|
||||
|
||||
package XML::Parser::Style::Tree;
|
||||
$XML::Parser::Built_In_Styles{Tree} = 1;
|
||||
|
||||
sub Init {
|
||||
my $expat = shift;
|
||||
$expat->{Lists} = [];
|
||||
$expat->{Curlist} = $expat->{Tree} = [];
|
||||
}
|
||||
|
||||
sub Start {
|
||||
my $expat = shift;
|
||||
my $tag = shift;
|
||||
my $newlist = [ {@_} ];
|
||||
push @{ $expat->{Lists} }, $expat->{Curlist};
|
||||
push @{ $expat->{Curlist} }, $tag => $newlist;
|
||||
$expat->{Curlist} = $newlist;
|
||||
}
|
||||
|
||||
sub End {
|
||||
my $expat = shift;
|
||||
my $tag = shift;
|
||||
$expat->{Curlist} = pop @{ $expat->{Lists} };
|
||||
}
|
||||
|
||||
sub Char {
|
||||
my $expat = shift;
|
||||
my $text = shift;
|
||||
my $clist = $expat->{Curlist};
|
||||
my $pos = $#$clist;
|
||||
|
||||
if ( $pos > 0 and $clist->[ $pos - 1 ] eq '0' ) {
|
||||
$clist->[$pos] .= $text;
|
||||
}
|
||||
else {
|
||||
push @$clist, 0 => $text;
|
||||
}
|
||||
}
|
||||
|
||||
sub Final {
|
||||
my $expat = shift;
|
||||
delete $expat->{Curlist};
|
||||
delete $expat->{Lists};
|
||||
$expat->{Tree};
|
||||
}
|
||||
|
||||
1;
|
||||
__END__
|
||||
|
||||
=head1 NAME
|
||||
|
||||
XML::Parser::Style::Tree - Tree style parser
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
use XML::Parser;
|
||||
my $p = XML::Parser->new(Style => 'Tree');
|
||||
my $tree = $p->parsefile('foo.xml');
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This module implements XML::Parser's Tree style parser.
|
||||
|
||||
When parsing a document, C<parse()> will return a parse tree for the
|
||||
document. Each node in the tree
|
||||
takes the form of a tag, content pair. Text nodes are represented with
|
||||
a pseudo-tag of "0" and the string that is their content. For elements,
|
||||
the content is an array reference. The first item in the array is a
|
||||
(possibly empty) hash reference containing attributes. The remainder of
|
||||
the array is a sequence of tag-content pairs representing the content
|
||||
of the element.
|
||||
|
||||
So for example the result of parsing:
|
||||
|
||||
<foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo>
|
||||
|
||||
would be:
|
||||
Tag Content
|
||||
==================================================================
|
||||
[foo, [{}, head, [{id => "a"}, 0, "Hello ", em, [{}, 0, "there"]],
|
||||
bar, [ {}, 0, "Howdy", ref, [{}]],
|
||||
0, "do"
|
||||
]
|
||||
]
|
||||
|
||||
The root document "foo", has 3 children: a "head" element, a "bar"
|
||||
element and the text "do". After the empty attribute hash, these are
|
||||
represented in it's contents by 3 tag-content pairs.
|
||||
|
||||
=cut
|
|
@ -0,0 +1,468 @@
|
|||
NAME
|
||||
XML::Parser - A perl module for parsing XML documents
|
||||
|
||||
SYNOPSIS
|
||||
use XML::Parser;
|
||||
|
||||
$p1 = XML::Parser->new(Style => 'Debug');
|
||||
$p1->parsefile('REC-xml-19980210.xml');
|
||||
$p1->parse('<foo id="me">Hello World</foo>');
|
||||
|
||||
# Alternative
|
||||
$p2 = XML::Parser->new(Handlers => {Start => \&handle_start,
|
||||
End => \&handle_end,
|
||||
Char => \&handle_char});
|
||||
$p2->parse($socket);
|
||||
|
||||
# Another alternative
|
||||
$p3 = XML::Parser->new(ErrorContext => 2);
|
||||
|
||||
$p3->setHandlers(Char => \&text,
|
||||
Default => \&other);
|
||||
|
||||
open(my $fh, 'xmlgenerator |');
|
||||
$p3->parse($foo, ProtocolEncoding => 'ISO-8859-1');
|
||||
close($foo);
|
||||
|
||||
$p3->parsefile('junk.xml', ErrorContext => 3);
|
||||
|
||||
DESCRIPTION
|
||||
This module provides ways to parse XML documents. It is built on top of
|
||||
XML::Parser::Expat, which is a lower level interface to James Clark's
|
||||
expat library. Each call to one of the parsing methods creates a new
|
||||
instance of XML::Parser::Expat which is then used to parse the document.
|
||||
Expat options may be provided when the XML::Parser object is created.
|
||||
These options are then passed on to the Expat object on each parse call.
|
||||
They can also be given as extra arguments to the parse methods, in which
|
||||
case they override options given at XML::Parser creation time.
|
||||
|
||||
The behavior of the parser is controlled either by "STYLES" and/or
|
||||
"HANDLERS" options, or by "setHandlers" method. These all provide
|
||||
mechanisms for XML::Parser to set the handlers needed by
|
||||
XML::Parser::Expat. If neither "Style" nor "Handlers" are specified,
|
||||
then parsing just checks the document for being well-formed.
|
||||
|
||||
When underlying handlers get called, they receive as their first
|
||||
parameter the *Expat* object, not the Parser object.
|
||||
|
||||
METHODS
|
||||
new This is a class method, the constructor for XML::Parser. Options are
|
||||
passed as keyword value pairs. Recognized options are:
|
||||
|
||||
* Style
|
||||
|
||||
This option provides an easy way to create a given style of
|
||||
parser. The built in styles are: "Debug", "Subs", "Tree",
|
||||
"Objects", and "Stream". These are all defined in separate
|
||||
packages under "XML::Parser::Style::*", and you can find further
|
||||
documentation for each style both below, and in those packages.
|
||||
|
||||
Custom styles can be provided by giving a full package name
|
||||
containing at least one '::'. This package should then have subs
|
||||
defined for each handler it wishes to have installed. See
|
||||
"STYLES" below for a discussion of each built in style.
|
||||
|
||||
* Handlers
|
||||
|
||||
When provided, this option should be an anonymous hash
|
||||
containing as keys the type of handler and as values a sub
|
||||
reference to handle that type of event. All the handlers get
|
||||
passed as their 1st parameter the instance of expat that is
|
||||
parsing the document. Further details on handlers can be found
|
||||
in "HANDLERS". Any handler set here overrides the corresponding
|
||||
handler set with the Style option.
|
||||
|
||||
* Pkg
|
||||
|
||||
Some styles will refer to subs defined in this package. If not
|
||||
provided, it defaults to the package which called the
|
||||
constructor.
|
||||
|
||||
* ErrorContext
|
||||
|
||||
This is an Expat option. When this option is defined, errors are
|
||||
reported in context. The value should be the number of lines to
|
||||
show on either side of the line in which the error occurred.
|
||||
|
||||
* ProtocolEncoding
|
||||
|
||||
This is an Expat option. This sets the protocol encoding name.
|
||||
It defaults to none. The built-in encodings are: "UTF-8",
|
||||
"ISO-8859-1", "UTF-16", and "US-ASCII". Other encodings may be
|
||||
used if they have encoding maps in one of the directories in the
|
||||
@Encoding_Path list. Check "ENCODINGS" for more information on
|
||||
encoding maps. Setting the protocol encoding overrides any
|
||||
encoding in the XML declaration.
|
||||
|
||||
* Namespaces
|
||||
|
||||
This is an Expat option. If this is set to a true value, then
|
||||
namespace processing is done during the parse. See "Namespaces"
|
||||
in XML::Parser::Expat for further discussion of namespace
|
||||
processing.
|
||||
|
||||
* NoExpand
|
||||
|
||||
This is an Expat option. Normally, the parser will try to expand
|
||||
references to entities defined in the internal subset. If this
|
||||
option is set to a true value, and a default handler is also
|
||||
set, then the default handler will be called when an entity
|
||||
reference is seen in text. This has no effect if a default
|
||||
handler has not been registered, and it has no effect on the
|
||||
expansion of entity references inside attribute values.
|
||||
|
||||
* Stream_Delimiter
|
||||
|
||||
This is an Expat option. It takes a string value. When this
|
||||
string is found alone on a line while parsing from a stream,
|
||||
then the parse is ended as if it saw an end of file. The
|
||||
intended use is with a stream of xml documents in a MIME
|
||||
multipart format. The string should not contain a trailing
|
||||
newline.
|
||||
|
||||
* ParseParamEnt
|
||||
|
||||
This is an Expat option. Unless standalone is set to "yes" in
|
||||
the XML declaration, setting this to a true value allows the
|
||||
external DTD to be read, and parameter entities to be parsed and
|
||||
expanded.
|
||||
|
||||
* NoLWP
|
||||
|
||||
This option has no effect if the ExternEnt or ExternEntFin
|
||||
handlers are directly set. Otherwise, if true, it forces the use
|
||||
of a file based external entity handler.
|
||||
|
||||
* Non_Expat_Options
|
||||
|
||||
If provided, this should be an anonymous hash whose keys are
|
||||
options that shouldn't be passed to Expat. This should only be
|
||||
of concern to those subclassing XML::Parser.
|
||||
|
||||
setHandlers(TYPE, HANDLER [, TYPE, HANDLER [...]])
|
||||
This method registers handlers for various parser events. It
|
||||
overrides any previous handlers registered through the Style or
|
||||
Handler options or through earlier calls to setHandlers. By
|
||||
providing a false or undefined value as the handler, the existing
|
||||
handler can be unset.
|
||||
|
||||
This method returns a list of type, handler pairs corresponding to
|
||||
the input. The handlers returned are the ones that were in effect
|
||||
prior to the call.
|
||||
|
||||
See a description of the handler types in "HANDLERS".
|
||||
|
||||
parse(SOURCE [, OPT => OPT_VALUE [...]])
|
||||
The SOURCE parameter should either be a string containing the whole
|
||||
XML document, or it should be an open IO::Handle. Constructor
|
||||
options to XML::Parser::Expat given as keyword-value pairs may
|
||||
follow the SOURCE parameter. These override, for this call, any
|
||||
options or attributes passed through from the XML::Parser instance.
|
||||
|
||||
A die call is thrown if a parse error occurs. Otherwise it will
|
||||
return 1 or whatever is returned from the Final handler, if one is
|
||||
installed. In other words, what parse may return depends on the
|
||||
style.
|
||||
|
||||
parsestring
|
||||
This is just an alias for parse for backwards compatibility.
|
||||
|
||||
parsefile(FILE [, OPT => OPT_VALUE [...]])
|
||||
Open FILE for reading, then call parse with the open handle. The
|
||||
file is closed no matter how parse returns. Returns what parse
|
||||
returns.
|
||||
|
||||
parse_start([ OPT => OPT_VALUE [...]])
|
||||
Create and return a new instance of XML::Parser::ExpatNB.
|
||||
Constructor options may be provided. If an init handler has been
|
||||
provided, it is called before returning the ExpatNB object.
|
||||
Documents are parsed by making incremental calls to the parse_more
|
||||
method of this object, which takes a string. A single call to the
|
||||
parse_done method of this object, which takes no arguments,
|
||||
indicates that the document is finished.
|
||||
|
||||
If there is a final handler installed, it is executed by the
|
||||
parse_done method before returning and the parse_done method returns
|
||||
whatever is returned by the final handler.
|
||||
|
||||
HANDLERS
|
||||
Expat is an event based parser. As the parser recognizes parts of the
|
||||
document (say the start or end tag for an XML element), then any
|
||||
handlers registered for that type of an event are called with suitable
|
||||
parameters. All handlers receive an instance of XML::Parser::Expat as
|
||||
their first argument. See "METHODS" in XML::Parser::Expat for a
|
||||
discussion of the methods that can be called on this object.
|
||||
|
||||
Init (Expat)
|
||||
This is called just before the parsing of the document starts.
|
||||
|
||||
Final (Expat)
|
||||
This is called just after parsing has finished, but only if no errors
|
||||
occurred during the parse. Parse returns what this returns.
|
||||
|
||||
Start (Expat, Element [, Attr, Val [,...]])
|
||||
This event is generated when an XML start tag is recognized. Element is
|
||||
the name of the XML element type that is opened with the start tag. The
|
||||
Attr & Val pairs are generated for each attribute in the start tag.
|
||||
|
||||
End (Expat, Element)
|
||||
This event is generated when an XML end tag is recognized. Note that an
|
||||
XML empty tag (<foo/>) generates both a start and an end event.
|
||||
|
||||
Char (Expat, String)
|
||||
This event is generated when non-markup is recognized. The non-markup
|
||||
sequence of characters is in String. A single non-markup sequence of
|
||||
characters may generate multiple calls to this handler. Whatever the
|
||||
encoding of the string in the original document, this is given to the
|
||||
handler in UTF-8.
|
||||
|
||||
Proc (Expat, Target, Data)
|
||||
This event is generated when a processing instruction is recognized.
|
||||
|
||||
Comment (Expat, Data)
|
||||
This event is generated when a comment is recognized.
|
||||
|
||||
CdataStart (Expat)
|
||||
This is called at the start of a CDATA section.
|
||||
|
||||
CdataEnd (Expat)
|
||||
This is called at the end of a CDATA section.
|
||||
|
||||
Default (Expat, String)
|
||||
This is called for any characters that don't have a registered handler.
|
||||
This includes both characters that are part of markup for which no
|
||||
events are generated (markup declarations) and characters that could
|
||||
generate events, but for which no handler has been registered.
|
||||
|
||||
Whatever the encoding in the original document, the string is returned
|
||||
to the handler in UTF-8.
|
||||
|
||||
Unparsed (Expat, Entity, Base, Sysid, Pubid, Notation)
|
||||
This is called for a declaration of an unparsed entity. Entity is the
|
||||
name of the entity. Base is the base to be used for resolving a relative
|
||||
URI. Sysid is the system id. Pubid is the public id. Notation is the
|
||||
notation name. Base and Pubid may be undefined.
|
||||
|
||||
Notation (Expat, Notation, Base, Sysid, Pubid)
|
||||
This is called for a declaration of notation. Notation is the notation
|
||||
name. Base is the base to be used for resolving a relative URI. Sysid is
|
||||
the system id. Pubid is the public id. Base, Sysid, and Pubid may all be
|
||||
undefined.
|
||||
|
||||
ExternEnt (Expat, Base, Sysid, Pubid)
|
||||
This is called when an external entity is referenced. Base is the base
|
||||
to be used for resolving a relative URI. Sysid is the system id. Pubid
|
||||
is the public id. Base, and Pubid may be undefined.
|
||||
|
||||
This handler should either return a string, which represents the
|
||||
contents of the external entity, or return an open filehandle that can
|
||||
be read to obtain the contents of the external entity, or return undef,
|
||||
which indicates the external entity couldn't be found and will generate
|
||||
a parse error.
|
||||
|
||||
If an open filehandle is returned, it must be returned as either a glob
|
||||
(*FOO) or as a reference to a glob (e.g. an instance of IO::Handle).
|
||||
|
||||
A default handler is installed for this event. The default handler is
|
||||
XML::Parser::lwp_ext_ent_handler unless the NoLWP option was provided
|
||||
with a true value, otherwise XML::Parser::file_ext_ent_handler is the
|
||||
default handler for external entities. Even without the NoLWP option, if
|
||||
the URI or LWP modules are missing, the file based handler ends up being
|
||||
used after giving a warning on the first external entity reference.
|
||||
|
||||
The LWP external entity handler will use proxies defined in the
|
||||
environment (http_proxy, ftp_proxy, etc.).
|
||||
|
||||
Please note that the LWP external entity handler reads the entire entity
|
||||
into a string and returns it, where as the file handler opens a
|
||||
filehandle.
|
||||
|
||||
Also note that the file external entity handler will likely choke on
|
||||
absolute URIs or file names that don't fit the conventions of the local
|
||||
operating system.
|
||||
|
||||
The expat base method can be used to set a basename for relative
|
||||
pathnames. If no basename is given, or if the basename is itself a
|
||||
relative name, then it is relative to the current working directory.
|
||||
|
||||
ExternEntFin (Expat)
|
||||
This is called after parsing an external entity. It's not called unless
|
||||
an ExternEnt handler is also set. There is a default handler installed
|
||||
that pairs with the default ExternEnt handler.
|
||||
|
||||
If you're going to install your own ExternEnt handler, then you should
|
||||
set (or unset) this handler too.
|
||||
|
||||
Entity (Expat, Name, Val, Sysid, Pubid, Ndata, IsParam)
|
||||
This is called when an entity is declared. For internal entities, the
|
||||
Val parameter will contain the value and the remaining three parameters
|
||||
will be undefined. For external entities, the Val parameter will be
|
||||
undefined, the Sysid parameter will have the system id, the Pubid
|
||||
parameter will have the public id if it was provided (it will be
|
||||
undefined otherwise), the Ndata parameter will contain the notation for
|
||||
unparsed entities. If this is a parameter entity declaration, then the
|
||||
IsParam parameter is true.
|
||||
|
||||
Note that this handler and the Unparsed handler above overlap. If both
|
||||
are set, then this handler will not be called for unparsed entities.
|
||||
|
||||
Element (Expat, Name, Model)
|
||||
The element handler is called when an element declaration is found. Name
|
||||
is the element name, and Model is the content model as an
|
||||
XML::Parser::Content object. See "XML::Parser::ContentModel Methods" in
|
||||
XML::Parser::Expat for methods available for this class.
|
||||
|
||||
Attlist (Expat, Elname, Attname, Type, Default, Fixed)
|
||||
This handler is called for each attribute in an ATTLIST declaration. So
|
||||
an ATTLIST declaration that has multiple attributes will generate
|
||||
multiple calls to this handler. The Elname parameter is the name of the
|
||||
element with which the attribute is being associated. The Attname
|
||||
parameter is the name of the attribute. Type is the attribute type,
|
||||
given as a string. Default is the default value, which will either be
|
||||
"#REQUIRED", "#IMPLIED" or a quoted string (i.e. the returned string
|
||||
will begin and end with a quote character). If Fixed is true, then this
|
||||
is a fixed attribute.
|
||||
|
||||
Doctype (Expat, Name, Sysid, Pubid, Internal)
|
||||
This handler is called for DOCTYPE declarations. Name is the document
|
||||
type name. Sysid is the system id of the document type, if it was
|
||||
provided, otherwise it's undefined. Pubid is the public id of the
|
||||
document type, which will be undefined if no public id was given.
|
||||
Internal is the internal subset, given as a string. If there was no
|
||||
internal subset, it will be undefined. Internal will contain all
|
||||
whitespace, comments, processing instructions, and declarations seen in
|
||||
the internal subset. The declarations will be there whether or not they
|
||||
have been processed by another handler (except for unparsed entities
|
||||
processed by the Unparsed handler). However, comments and processing
|
||||
instructions will not appear if they've been processed by their
|
||||
respective handlers.
|
||||
|
||||
* DoctypeFin (Parser)
|
||||
This handler is called after parsing of the DOCTYPE declaration has
|
||||
finished, including any internal or external DTD declarations.
|
||||
|
||||
XMLDecl (Expat, Version, Encoding, Standalone)
|
||||
This handler is called for xml declarations. Version is a string
|
||||
containing the version. Encoding is either undefined or contains an
|
||||
encoding string. Standalone will be either true, false, or undefined if
|
||||
the standalone attribute is yes, no, or not made respectively.
|
||||
|
||||
STYLES
|
||||
Debug
|
||||
This just prints out the document in outline form. Nothing special is
|
||||
returned by parse.
|
||||
|
||||
Subs
|
||||
Each time an element starts, a sub by that name in the package specified
|
||||
by the Pkg option is called with the same parameters that the Start
|
||||
handler gets called with.
|
||||
|
||||
Each time an element ends, a sub with that name appended with an
|
||||
underscore ("_"), is called with the same parameters that the End
|
||||
handler gets called with.
|
||||
|
||||
Nothing special is returned by parse.
|
||||
|
||||
Tree
|
||||
Parse will return a parse tree for the document. Each node in the tree
|
||||
takes the form of a tag, content pair. Text nodes are represented with a
|
||||
pseudo-tag of "0" and the string that is their content. For elements,
|
||||
the content is an array reference. The first item in the array is a
|
||||
(possibly empty) hash reference containing attributes. The remainder of
|
||||
the array is a sequence of tag-content pairs representing the content of
|
||||
the element.
|
||||
|
||||
So for example the result of parsing:
|
||||
|
||||
<foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo>
|
||||
|
||||
would be:
|
||||
|
||||
Tag Content
|
||||
==================================================================
|
||||
[foo, [{}, head, [{id => "a"}, 0, "Hello ", em, [{}, 0, "there"]],
|
||||
bar, [ {}, 0, "Howdy", ref, [{}]],
|
||||
0, "do"
|
||||
]
|
||||
]
|
||||
|
||||
The root document "foo", has 3 children: a "head" element, a "bar"
|
||||
element and the text "do". After the empty attribute hash, these are
|
||||
represented in it's contents by 3 tag-content pairs.
|
||||
|
||||
Objects
|
||||
This is similar to the Tree style, except that a hash object is created
|
||||
for each element. The corresponding object will be in the class whose
|
||||
name is created by appending "::" and the element name to the package
|
||||
set with the Pkg option. Non-markup text will be in the ::Characters
|
||||
class. The contents of the corresponding object will be in an anonymous
|
||||
array that is the value of the Kids property for that object.
|
||||
|
||||
Stream
|
||||
This style also uses the Pkg package. If none of the subs that this
|
||||
style looks for is there, then the effect of parsing with this style is
|
||||
to print a canonical copy of the document without comments or
|
||||
declarations. All the subs receive as their 1st parameter the Expat
|
||||
instance for the document they're parsing.
|
||||
|
||||
It looks for the following routines:
|
||||
|
||||
* StartDocument
|
||||
|
||||
Called at the start of the parse .
|
||||
|
||||
* StartTag
|
||||
|
||||
Called for every start tag with a second parameter of the element
|
||||
type. The $_ variable will contain a copy of the tag and the %_
|
||||
variable will contain attribute values supplied for that element.
|
||||
|
||||
* EndTag
|
||||
|
||||
Called for every end tag with a second parameter of the element
|
||||
type. The $_ variable will contain a copy of the end tag.
|
||||
|
||||
* Text
|
||||
|
||||
Called just before start or end tags with accumulated non-markup
|
||||
text in the $_ variable.
|
||||
|
||||
* PI
|
||||
|
||||
Called for processing instructions. The $_ variable will contain a
|
||||
copy of the PI and the target and data are sent as 2nd and 3rd
|
||||
parameters respectively.
|
||||
|
||||
* EndDocument
|
||||
|
||||
Called at conclusion of the parse.
|
||||
|
||||
ENCODINGS
|
||||
XML documents may be encoded in character sets other than Unicode as
|
||||
long as they may be mapped into the Unicode character set. Expat has
|
||||
further restrictions on encodings. Read the xmlparse.h header file in
|
||||
the expat distribution to see details on these restrictions.
|
||||
|
||||
Expat has built-in encodings for: "UTF-8", "ISO-8859-1", "UTF-16", and
|
||||
"US-ASCII". Encodings are set either through the XML declaration
|
||||
encoding attribute or through the ProtocolEncoding option to XML::Parser
|
||||
or XML::Parser::Expat.
|
||||
|
||||
For encodings other than the built-ins, expat calls the function
|
||||
load_encoding in the Expat package with the encoding name. This function
|
||||
looks for a file in the path list @XML::Parser::Expat::Encoding_Path,
|
||||
that matches the lower-cased name with a '.enc' extension. The first one
|
||||
it finds, it loads.
|
||||
|
||||
If you wish to build your own encoding maps, check out the XML::Encoding
|
||||
module from CPAN.
|
||||
|
||||
AUTHORS
|
||||
Larry Wall <larry@wall.org> wrote version 1.0.
|
||||
|
||||
Clark Cooper <coopercc@netheaven.com> picked up support, changed the API
|
||||
for this version (2.x), provided documentation, and added some standard
|
||||
package features.
|
||||
|
||||
Matt Sergeant <matt@sergeant.org> is now maintaining XML::Parser
|
||||
|
|
@ -0,0 +1,495 @@
|
|||
# $Id: CheckLib.pm,v 1.25 2008/10/27 12:16:23 drhyde Exp $
|
||||
|
||||
package Devel::CheckLib;
|
||||
|
||||
use 5.00405; #postfix foreach
|
||||
use strict;
|
||||
use vars qw($VERSION @ISA @EXPORT);
|
||||
$VERSION = '0.99';
|
||||
use Config qw(%Config);
|
||||
use Text::ParseWords 'quotewords';
|
||||
|
||||
use File::Spec;
|
||||
use File::Temp;
|
||||
|
||||
require Exporter;
|
||||
@ISA = qw(Exporter);
|
||||
@EXPORT = qw(assert_lib check_lib_or_exit check_lib);
|
||||
|
||||
# localising prevents the warningness leaking out of this module
|
||||
local $^W = 1; # use warnings is a 5.6-ism
|
||||
|
||||
_findcc(); # bomb out early if there's no compiler
|
||||
|
||||
=head1 NAME
|
||||
|
||||
Devel::CheckLib - check that a library is available
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
Devel::CheckLib is a perl module that checks whether a particular C
|
||||
library and its headers are available.
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
use Devel::CheckLib;
|
||||
|
||||
check_lib_or_exit( lib => 'jpeg', header => 'jpeglib.h' );
|
||||
check_lib_or_exit( lib => [ 'iconv', 'jpeg' ] );
|
||||
|
||||
# or prompt for path to library and then do this:
|
||||
check_lib_or_exit( lib => 'jpeg', libpath => $additional_path );
|
||||
|
||||
=head1 USING IT IN Makefile.PL or Build.PL
|
||||
|
||||
If you want to use this from Makefile.PL or Build.PL, do
|
||||
not simply copy the module into your distribution as this may cause
|
||||
problems when PAUSE and search.cpan.org index the distro. Instead, use
|
||||
the use-devel-checklib script.
|
||||
|
||||
=head1 HOW IT WORKS
|
||||
|
||||
You pass named parameters to a function, describing to it how to build
|
||||
and link to the libraries.
|
||||
|
||||
It works by trying to compile some code - which defaults to this:
|
||||
|
||||
int main(void) { return 0; }
|
||||
|
||||
and linking it to the specified libraries. If something pops out the end
|
||||
which looks executable, it gets executed, and if main() returns 0 we know
|
||||
that it worked. That tiny program is
|
||||
built once for each library that you specify, and (without linking) once
|
||||
for each header file.
|
||||
|
||||
If you want to check for the presence of particular functions in a
|
||||
library, or even that those functions return particular results, then
|
||||
you can pass your own function body for main() thus:
|
||||
|
||||
check_lib_or_exit(
|
||||
function => 'foo();if(libversion() > 5) return 0; else return 1;'
|
||||
incpath => ...
|
||||
libpath => ...
|
||||
lib => ...
|
||||
header => ...
|
||||
);
|
||||
|
||||
In that case, it will fail to build if either foo() or libversion() don't
|
||||
exist, and main() will return the wrong value if libversion()'s return
|
||||
value isn't what you want.
|
||||
|
||||
=head1 FUNCTIONS
|
||||
|
||||
All of these take the same named parameters and are exported by default.
|
||||
To avoid exporting them, C<use Devel::CheckLib ()>.
|
||||
|
||||
=head2 assert_lib
|
||||
|
||||
This takes several named parameters, all of which are optional, and dies
|
||||
with an error message if any of the libraries listed can
|
||||
not be found. B<Note>: dying in a Makefile.PL or Build.PL may provoke
|
||||
a 'FAIL' report from CPAN Testers' automated smoke testers. Use
|
||||
C<check_lib_or_exit> instead.
|
||||
|
||||
The named parameters are:
|
||||
|
||||
=over
|
||||
|
||||
=item lib
|
||||
|
||||
Must be either a string with the name of a single
|
||||
library or a reference to an array of strings of library names. Depending
|
||||
on the compiler found, library names will be fed to the compiler either as
|
||||
C<-l> arguments or as C<.lib> file names. (E.g. C<-ljpeg> or C<jpeg.lib>)
|
||||
|
||||
=item libpath
|
||||
|
||||
a string or an array of strings
|
||||
representing additional paths to search for libraries.
|
||||
|
||||
=item LIBS
|
||||
|
||||
a C<ExtUtils::MakeMaker>-style space-seperated list of
|
||||
libraries (each preceded by '-l') and directories (preceded by '-L').
|
||||
|
||||
This can also be supplied on the command-line.
|
||||
|
||||
=item debug
|
||||
|
||||
If true - emit information during processing that can be used for
|
||||
debugging.
|
||||
|
||||
=back
|
||||
|
||||
And libraries are no use without header files, so ...
|
||||
|
||||
=over
|
||||
|
||||
=item header
|
||||
|
||||
Must be either a string with the name of a single
|
||||
header file or a reference to an array of strings of header file names.
|
||||
|
||||
=item incpath
|
||||
|
||||
a string or an array of strings
|
||||
representing additional paths to search for headers.
|
||||
|
||||
=item INC
|
||||
|
||||
a C<ExtUtils::MakeMaker>-style space-seperated list of
|
||||
incpaths, each preceded by '-I'.
|
||||
|
||||
This can also be supplied on the command-line.
|
||||
|
||||
=back
|
||||
|
||||
=head2 check_lib_or_exit
|
||||
|
||||
This behaves exactly the same as C<assert_lib()> except that instead of
|
||||
dieing, it warns (with exactly the same error message) and exits.
|
||||
This is intended for use in Makefile.PL / Build.PL
|
||||
when you might want to prompt the user for various paths and
|
||||
things before checking that what they've told you is sane.
|
||||
|
||||
If any library or header is missing, it exits with an exit value of 0 to avoid
|
||||
causing a CPAN Testers 'FAIL' report. CPAN Testers should ignore this
|
||||
result -- which is what you want if an external library dependency is not
|
||||
available.
|
||||
|
||||
=head2 check_lib
|
||||
|
||||
This behaves exactly the same as C<assert_lib()> except that it is silent,
|
||||
returning false instead of dieing, or true otherwise.
|
||||
|
||||
=cut
|
||||
|
||||
sub check_lib_or_exit {
|
||||
eval 'assert_lib(@_)';
|
||||
if ($@) {
|
||||
warn $@;
|
||||
exit;
|
||||
}
|
||||
}
|
||||
|
||||
sub check_lib {
|
||||
eval 'assert_lib(@_)';
|
||||
return $@ ? 0 : 1;
|
||||
}
|
||||
|
||||
sub assert_lib {
|
||||
my %args = @_;
|
||||
my ( @libs, @libpaths, @headers, @incpaths );
|
||||
|
||||
# FIXME: these four just SCREAM "refactor" at me
|
||||
@libs = ( ref( $args{lib} ) ? @{ $args{lib} } : $args{lib} )
|
||||
if $args{lib};
|
||||
@libpaths = ( ref( $args{libpath} ) ? @{ $args{libpath} } : $args{libpath} )
|
||||
if $args{libpath};
|
||||
@headers = ( ref( $args{header} ) ? @{ $args{header} } : $args{header} )
|
||||
if $args{header};
|
||||
@incpaths = ( ref( $args{incpath} ) ? @{ $args{incpath} } : $args{incpath} )
|
||||
if $args{incpath};
|
||||
|
||||
# work-a-like for Makefile.PL's LIBS and INC arguments
|
||||
# if given as command-line argument, append to %args
|
||||
for my $arg (@ARGV) {
|
||||
for my $mm_attr_key (qw(LIBS INC)) {
|
||||
if ( my ($mm_attr_value) = $arg =~ /\A $mm_attr_key = (.*)/x ) {
|
||||
|
||||
# it is tempting to put some \s* into the expression, but the
|
||||
# MM command-line parser only accepts LIBS etc. followed by =,
|
||||
# so we should not be any more lenient with whitespace than that
|
||||
$args{$mm_attr_key} .= " $mm_attr_value";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# using special form of split to trim whitespace
|
||||
if ( defined( $args{LIBS} ) ) {
|
||||
foreach my $arg ( split( ' ', $args{LIBS} ) ) {
|
||||
die("LIBS argument badly-formed: $arg\n") unless ( $arg =~ /^-[lLR]/ );
|
||||
push @{ $arg =~ /^-l/ ? \@libs : \@libpaths }, substr( $arg, 2 );
|
||||
}
|
||||
}
|
||||
if ( defined( $args{INC} ) ) {
|
||||
foreach my $arg ( split( ' ', $args{INC} ) ) {
|
||||
die("INC argument badly-formed: $arg\n") unless ( $arg =~ /^-I/ );
|
||||
push @incpaths, substr( $arg, 2 );
|
||||
}
|
||||
}
|
||||
|
||||
my ( $cc, $ld ) = _findcc();
|
||||
my @missing;
|
||||
my @wrongresult;
|
||||
my @use_headers;
|
||||
|
||||
# first figure out which headers we can't find ...
|
||||
for my $header (@headers) {
|
||||
push @use_headers, $header;
|
||||
my ( $ch, $cfile ) = File::Temp::tempfile( 'assertlibXXXXXXXX', SUFFIX => '.c' );
|
||||
my $ofile = $cfile;
|
||||
$ofile =~ s/\.c$/$Config{_o}/;
|
||||
print $ch qq{#include <$_>\n} for @use_headers;
|
||||
print $ch qq{int main(void) { return 0; }\n};
|
||||
close($ch);
|
||||
my $exefile = File::Temp::mktemp('assertlibXXXXXXXX') . $Config{_exe};
|
||||
my @sys_cmd;
|
||||
|
||||
# FIXME: re-factor - almost identical code later when linking
|
||||
if ( $Config{cc} eq 'cl' ) { # Microsoft compiler
|
||||
require Win32;
|
||||
@sys_cmd = (
|
||||
@$cc,
|
||||
$cfile,
|
||||
"/Fe$exefile",
|
||||
( map { '/I' . Win32::GetShortPathName($_) } @incpaths ),
|
||||
"/link",
|
||||
@$ld
|
||||
);
|
||||
}
|
||||
elsif ( $Config{cc} =~ /bcc32(\.exe)?/ ) { # Borland
|
||||
@sys_cmd = (
|
||||
@$cc,
|
||||
@$ld,
|
||||
( map { "-I$_" } @incpaths ),
|
||||
"-o$exefile",
|
||||
$cfile
|
||||
);
|
||||
}
|
||||
else { # Unix-ish: gcc, Sun, AIX (gcc, cc), ...
|
||||
@sys_cmd = (
|
||||
@$cc,
|
||||
@$ld,
|
||||
$cfile,
|
||||
( map { "-I$_" } @incpaths ),
|
||||
"-o", "$exefile"
|
||||
);
|
||||
}
|
||||
warn "# @sys_cmd\n" if $args{debug};
|
||||
my $rv = $args{debug} ? system(@sys_cmd) : _quiet_system(@sys_cmd);
|
||||
push @missing, $header if $rv != 0 || !-x $exefile;
|
||||
_cleanup_exe($exefile);
|
||||
unlink $ofile if -e $ofile;
|
||||
unlink $cfile;
|
||||
}
|
||||
|
||||
# now do each library in turn with headers
|
||||
my ( $ch, $cfile ) = File::Temp::tempfile( 'assertlibXXXXXXXX', SUFFIX => '.c' );
|
||||
my $ofile = $cfile;
|
||||
$ofile =~ s/\.c$/$Config{_o}/;
|
||||
print $ch qq{#include <$_>\n} foreach (@headers);
|
||||
print $ch "int main(void) { " . ( $args{function} || 'return 0;' ) . " }\n";
|
||||
close($ch);
|
||||
for my $lib (@libs) {
|
||||
my $exefile = File::Temp::mktemp('assertlibXXXXXXXX') . $Config{_exe};
|
||||
my @sys_cmd;
|
||||
if ( $Config{cc} eq 'cl' ) { # Microsoft compiler
|
||||
require Win32;
|
||||
my @libpath = map { q{/libpath:} . Win32::GetShortPathName($_) } @libpaths;
|
||||
|
||||
# this is horribly sensitive to the order of arguments
|
||||
@sys_cmd = (
|
||||
@$cc,
|
||||
$cfile,
|
||||
"${lib}.lib",
|
||||
"/Fe$exefile",
|
||||
( map { '/I' . Win32::GetShortPathName($_) } @incpaths ),
|
||||
"/link",
|
||||
@$ld,
|
||||
( map { '/libpath:' . Win32::GetShortPathName($_) } @libpaths ),
|
||||
);
|
||||
}
|
||||
elsif ( $Config{cc} eq 'CC/DECC' ) { # VMS
|
||||
}
|
||||
elsif ( $Config{cc} =~ /bcc32(\.exe)?/ ) { # Borland
|
||||
@sys_cmd = (
|
||||
@$cc,
|
||||
@$ld,
|
||||
"-o$exefile",
|
||||
( map { "-I$_" } @incpaths ),
|
||||
( map { "-L$_" } @libpaths ),
|
||||
"-l$lib",
|
||||
$cfile
|
||||
);
|
||||
}
|
||||
else { # Unix-ish
|
||||
# gcc, Sun, AIX (gcc, cc)
|
||||
@sys_cmd = (
|
||||
@$cc,
|
||||
@$ld,
|
||||
$cfile,
|
||||
"-o", "$exefile",
|
||||
( map { "-I$_" } @incpaths ),
|
||||
( map { "-L$_" } @libpaths ),
|
||||
"-l$lib",
|
||||
);
|
||||
}
|
||||
warn "# @sys_cmd\n" if $args{debug};
|
||||
my $rv = $args{debug} ? system(@sys_cmd) : _quiet_system(@sys_cmd);
|
||||
push @missing, $lib if $rv != 0 || !-x $exefile;
|
||||
my $absexefile = File::Spec->rel2abs($exefile);
|
||||
$absexefile = '"' . $absexefile . '"' if $absexefile =~ m/\s/;
|
||||
push @wrongresult, $lib if $rv == 0 && -x $exefile && system($absexefile) != 0;
|
||||
unlink $ofile if -e $ofile;
|
||||
_cleanup_exe($exefile);
|
||||
}
|
||||
unlink $cfile;
|
||||
|
||||
my $miss_string = join( q{, }, map { qq{'$_'} } @missing );
|
||||
die("Can't link/include C library $miss_string, aborting.\n") if @missing;
|
||||
my $wrong_string = join( q{, }, map { qq{'$_'} } @wrongresult );
|
||||
die("wrong result: $wrong_string\n") if @wrongresult;
|
||||
}
|
||||
|
||||
sub _cleanup_exe {
|
||||
my ($exefile) = @_;
|
||||
my $ofile = $exefile;
|
||||
$ofile =~ s/$Config{_exe}$/$Config{_o}/;
|
||||
unlink $exefile if -f $exefile;
|
||||
unlink $ofile if -f $ofile;
|
||||
unlink "$exefile\.manifest" if -f "$exefile\.manifest";
|
||||
if ( $Config{cc} eq 'cl' ) {
|
||||
|
||||
# MSVC also creates foo.ilk and foo.pdb
|
||||
my $ilkfile = $exefile;
|
||||
$ilkfile =~ s/$Config{_exe}$/.ilk/;
|
||||
my $pdbfile = $exefile;
|
||||
$pdbfile =~ s/$Config{_exe}$/.pdb/;
|
||||
unlink $ilkfile if -f $ilkfile;
|
||||
unlink $pdbfile if -f $pdbfile;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
# return ($cc, $ld)
|
||||
# where $cc is an array ref of compiler name, compiler flags
|
||||
# where $ld is an array ref of linker flags
|
||||
sub _findcc {
|
||||
|
||||
# Need to use $keep=1 to work with MSWin32 backslashes and quotes
|
||||
my $Config_ccflags = $Config{ccflags}; # use copy so ASPerl will compile
|
||||
my @Config_ldflags = ();
|
||||
for my $config_val ( @Config{qw(ldflags perllibs)} ) {
|
||||
push @Config_ldflags, $config_val if ( $config_val =~ /\S/ );
|
||||
}
|
||||
my @ccflags = grep { length } quotewords( '\s+', 1, $Config_ccflags || '' );
|
||||
my @ldflags = grep { length } quotewords( '\s+', 1, @Config_ldflags );
|
||||
my @paths = split( /$Config{path_sep}/, $ENV{PATH} );
|
||||
my @cc = split( /\s+/, $Config{cc} );
|
||||
return ( [ @cc, @ccflags ], \@ldflags ) if -x $cc[0];
|
||||
foreach my $path (@paths) {
|
||||
my $compiler = File::Spec->catfile( $path, $cc[0] ) . $Config{_exe};
|
||||
return ( [ $compiler, @cc[ 1 .. $#cc ], @ccflags ], \@ldflags )
|
||||
if -x $compiler;
|
||||
}
|
||||
die("Couldn't find your C compiler\n");
|
||||
}
|
||||
|
||||
# code substantially borrowed from IPC::Run3
|
||||
sub _quiet_system {
|
||||
my (@cmd) = @_;
|
||||
|
||||
# save handles
|
||||
local *STDOUT_SAVE;
|
||||
local *STDERR_SAVE;
|
||||
open STDOUT_SAVE, ">&STDOUT" or die "CheckLib: $! saving STDOUT";
|
||||
open STDERR_SAVE, ">&STDERR" or die "CheckLib: $! saving STDERR";
|
||||
|
||||
# redirect to nowhere
|
||||
local *DEV_NULL;
|
||||
open DEV_NULL, ">" . File::Spec->devnull
|
||||
or die "CheckLib: $! opening handle to null device";
|
||||
open STDOUT, ">&" . fileno DEV_NULL
|
||||
or die "CheckLib: $! redirecting STDOUT to null handle";
|
||||
open STDERR, ">&" . fileno DEV_NULL
|
||||
or die "CheckLib: $! redirecting STDERR to null handle";
|
||||
|
||||
# run system command
|
||||
my $rv = system(@cmd);
|
||||
|
||||
# restore handles
|
||||
open STDOUT, ">&" . fileno STDOUT_SAVE
|
||||
or die "CheckLib: $! restoring STDOUT handle";
|
||||
open STDERR, ">&" . fileno STDERR_SAVE
|
||||
or die "CheckLib: $! restoring STDERR handle";
|
||||
|
||||
return $rv;
|
||||
}
|
||||
|
||||
=head1 PLATFORMS SUPPORTED
|
||||
|
||||
You must have a C compiler installed. We check for C<$Config{cc}>,
|
||||
both literally as it is in Config.pm and also in the $PATH.
|
||||
|
||||
It has been tested with varying degrees on rigourousness on:
|
||||
|
||||
=over
|
||||
|
||||
=item gcc (on Linux, *BSD, Mac OS X, Solaris, Cygwin)
|
||||
|
||||
=item Sun's compiler tools on Solaris
|
||||
|
||||
=item IBM's tools on AIX
|
||||
|
||||
=item SGI's tools on Irix 6.5
|
||||
|
||||
=item Microsoft's tools on Windows
|
||||
|
||||
=item MinGW on Windows (with Strawberry Perl)
|
||||
|
||||
=item Borland's tools on Windows
|
||||
|
||||
=item QNX
|
||||
|
||||
=back
|
||||
|
||||
=head1 WARNINGS, BUGS and FEEDBACK
|
||||
|
||||
This is a very early release intended primarily for feedback from
|
||||
people who have discussed it. The interface may change and it has
|
||||
not been adequately tested.
|
||||
|
||||
Feedback is most welcome, including constructive criticism.
|
||||
Bug reports should be made using L<http://rt.cpan.org/> or by email.
|
||||
|
||||
When submitting a bug report, please include the output from running:
|
||||
|
||||
perl -V
|
||||
perl -MDevel::CheckLib -e0
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
L<Devel::CheckOS>
|
||||
|
||||
L<Probe::Perl>
|
||||
|
||||
=head1 AUTHORS
|
||||
|
||||
David Cantrell E<lt>david@cantrell.org.ukE<gt>
|
||||
|
||||
David Golden E<lt>dagolden@cpan.orgE<gt>
|
||||
|
||||
Yasuhiro Matsumoto E<lt>mattn@cpan.orgE<gt>
|
||||
|
||||
Thanks to the cpan-testers-discuss mailing list for prompting us to write it
|
||||
in the first place;
|
||||
|
||||
to Chris Williams for help with Borland support;
|
||||
|
||||
to Tony Cook for help with Microsoft compiler command-line options
|
||||
|
||||
=head1 COPYRIGHT and LICENCE
|
||||
|
||||
Copyright 2007 David Cantrell. Portions copyright 2007 David Golden.
|
||||
|
||||
This module is free-as-in-speech software, and may be used, distributed,
|
||||
and modified under the same conditions as perl itself.
|
||||
|
||||
=head1 CONSPIRACY
|
||||
|
||||
This module is also free-as-in-mason software.
|
||||
|
||||
=cut
|
||||
|
||||
1;
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,126 @@
|
|||
#!/usr/local/bin/perl -w
|
||||
#
|
||||
# Copyright 1999 Clark Cooper <coopercc@netheaven.com>
|
||||
# All rights reserved.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or
|
||||
# modify it under the same terms as Perl itself.
|
||||
#
|
||||
# $Revision: 1.1.1.1 $
|
||||
#
|
||||
# $Date: 2003-07-27 11:07:11 $
|
||||
#
|
||||
# This program take an XML document (either on standard input or
|
||||
# from a filename supplied as an argument) and generates corresponding
|
||||
# canonical XML document on the standard output. The definition of
|
||||
# "Canonical XML" that I'm using is taken from the working draft
|
||||
# published by W3C on 19-Jan-2000:
|
||||
#
|
||||
# http://www.w3.org/TR/2000/WD-xml-c14n-20000119.html
|
||||
#
|
||||
# The latest version of this document is at:
|
||||
#
|
||||
# http://www.w3.org/TR/xml-c14n
|
||||
#
|
||||
|
||||
use XML::Parser;
|
||||
|
||||
my $indoctype = 0;
|
||||
my $inroot = 0;
|
||||
my $p = new XML::Parser(
|
||||
ErrorContext => 2,
|
||||
Namespaces => 1,
|
||||
ParseParamEnt => 1,
|
||||
Handlers => {
|
||||
Start => \&sthndl,
|
||||
End => \&endhndl,
|
||||
Char => \&chrhndl,
|
||||
Proc => \&proc,
|
||||
Doctype => sub { $indoctype = 1 },
|
||||
DoctypeFin => sub { $indoctype = 0 }
|
||||
}
|
||||
);
|
||||
|
||||
my $file = shift;
|
||||
if ( defined $file ) {
|
||||
$p->parsefile($file);
|
||||
}
|
||||
else {
|
||||
$p->parse(*STDIN);
|
||||
}
|
||||
|
||||
################
|
||||
## End main
|
||||
################
|
||||
|
||||
sub sthndl {
|
||||
my $xp = shift;
|
||||
my $el = shift;
|
||||
|
||||
$inroot = 1 unless $inroot;
|
||||
my $ns_index = 1;
|
||||
|
||||
my $elns = $xp->namespace($el);
|
||||
if ( defined $elns ) {
|
||||
my $pfx = 'n' . $ns_index++;
|
||||
print "<$pfx:$el xmlns:$pfx=\"$elns\"";
|
||||
}
|
||||
else {
|
||||
print "<$el";
|
||||
}
|
||||
|
||||
if (@_) {
|
||||
for ( my $i = 0; $i < @_; $i += 2 ) {
|
||||
my $nm = $_[$i];
|
||||
my $ns = $xp->namespace($nm);
|
||||
$_[$i] = defined($ns) ? "$ns\01$nm" : "\01$nm";
|
||||
}
|
||||
|
||||
my %atts = @_;
|
||||
my @ids = sort keys %atts;
|
||||
foreach my $id (@ids) {
|
||||
my ( $ns, $nm ) = split( /\01/, $id );
|
||||
my $val = $xp->xml_escape( $atts{$id}, '"', "\x9", "\xA", "\xD" );
|
||||
if ( length($ns) ) {
|
||||
my $pfx = 'n' . $ns_index++;
|
||||
print " $pfx:$nm=\"$val\" xmlns:$pfx=\"$ns\"";
|
||||
}
|
||||
else {
|
||||
print " $nm=\"$val\"";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
print '>';
|
||||
} # End sthndl
|
||||
|
||||
sub endhndl {
|
||||
my ( $xp, $el ) = @_;
|
||||
|
||||
my $nm = $xp->namespace($el) ? "n1:$el" : $el;
|
||||
print "</$nm>";
|
||||
if ( $xp->depth == 0 ) {
|
||||
$inroot = 0;
|
||||
print "\n";
|
||||
}
|
||||
} # End endhndl
|
||||
|
||||
sub chrhndl {
|
||||
my ( $xp, $data ) = @_;
|
||||
|
||||
print $xp->xml_escape( $data, '>', "\xD" );
|
||||
} # End chrhndl
|
||||
|
||||
sub proc {
|
||||
my ( $xp, $target, $data ) = @_;
|
||||
|
||||
unless ($indoctype) {
|
||||
print "<?$target $data?>";
|
||||
print "\n" unless $inroot;
|
||||
}
|
||||
}
|
||||
|
||||
# Tell emacs that this is really a perl script
|
||||
#Local Variables:
|
||||
#mode: perl
|
||||
#End:
|
|
@ -0,0 +1,20 @@
|
|||
<?xml version="1.0" encoding="ISO-8859-1"?>
|
||||
<?pre_doctype
|
||||
hello ?>
|
||||
<!DOCTYPE ctest SYSTEM "ctest.dtd"
|
||||
[
|
||||
<?in_doctype there ?>
|
||||
]
|
||||
>
|
||||
<?pre_r1
|
||||
testing?><?pre_r2 one two?>
|
||||
<ctest xmlns:x="urn:1" xmlns:y="urn:2" version='0.5'>
|
||||
<x:e a="a"/>
|
||||
<foo xmlns='urn:3' z="hello" y:b="there" k="world">
|
||||
<bar/>
|
||||
Here is a PI: <?inside
|
||||
more pi testing ?>. Like it?
|
||||
</foo>
|
||||
<la y:b='b' a='tab->( ) qtab->(	) junk '/>
|
||||
</ctest>
|
||||
<?post_root last stuff?>
|
|
@ -0,0 +1,2 @@
|
|||
<!ATTLIST ctest magic CDATA "xyzzy">
|
||||
<!ATTLIST la a NMTOKENS #IMPLIED>
|
|
@ -0,0 +1,44 @@
|
|||
#!/usr/local/bin/perl -w
|
||||
#
|
||||
# $Revision: 1.1.1.1 $
|
||||
#
|
||||
# $Date: 2003-07-27 11:07:11 $
|
||||
|
||||
use XML::Parser;
|
||||
|
||||
my $file = shift;
|
||||
|
||||
die "Can't find file \"$file\""
|
||||
unless -f $file;
|
||||
|
||||
my $count = 0;
|
||||
|
||||
my $parser = new XML::Parser(
|
||||
ErrorContext => 2,
|
||||
ParseParamEnt => 0
|
||||
);
|
||||
|
||||
$parser->setHandlers( Comment => \&comments );
|
||||
|
||||
$parser->parsefile($file);
|
||||
|
||||
print "Found $count comments.\n";
|
||||
|
||||
################
|
||||
## End of main
|
||||
################
|
||||
|
||||
sub comments {
|
||||
my ( $p, $data ) = @_;
|
||||
|
||||
my $line = $p->current_line;
|
||||
$data =~ s/\n/\n\t/g;
|
||||
print "$line:\t<!--$data-->\n";
|
||||
$count++;
|
||||
|
||||
} # End comments
|
||||
|
||||
# Tell Emacs that this is really a perl script
|
||||
# Local Variables:
|
||||
# mode:perl
|
||||
# End:
|
|
@ -0,0 +1,308 @@
|
|||
#!/usr/local/bin/perl -w
|
||||
#
|
||||
# $Revision: 1.1.1.1 $
|
||||
#
|
||||
# $Date: 2003-07-27 11:07:11 $
|
||||
|
||||
use XML::Parser;
|
||||
|
||||
my $Usage = <<'End_of_Usage;';
|
||||
Usage is:
|
||||
xmlfilter [-h] [-nl] [{-+}root] [{-+}el=elname] [{-+}el:elnamepat]
|
||||
[{-+}att:attname] [{-+}att:attname:attvalpat] xmlfile
|
||||
|
||||
Prints on standard output the result of filtering the given xmlfile
|
||||
for elements according to the switches. A '-' option will drop the
|
||||
element from the output; a '+' will keep it. The output should also
|
||||
be a well-formed XML document.
|
||||
|
||||
-h Print this message
|
||||
|
||||
-nl Emit a newline prior to every start tag.
|
||||
|
||||
[-+]root Drop (or keep) the root element. Defaults to keep.
|
||||
If the root element were named "foo", then -root
|
||||
would be equivalent to -el=foo. Note that even if
|
||||
you're dropping the root element, it's start and
|
||||
end tag are kept in order that the output remains
|
||||
a well-formed XML document.
|
||||
|
||||
[-+]el=elname
|
||||
Drop (or keep) elements of type elname.
|
||||
|
||||
[-+]el:elnamepat
|
||||
Drop (or keep) element whose type name matches elnamepat.
|
||||
|
||||
[-+]att:attname
|
||||
Drop (or keep) elements which have an attribute = attname.
|
||||
|
||||
[-+]att:attname:attvalpat
|
||||
Drop (or keep) elements which have an attribute = attname
|
||||
and for which the attribute value matches attvalpat.
|
||||
End_of_Usage;
|
||||
|
||||
my $pass = 1;
|
||||
my $do_newline = 0;
|
||||
|
||||
my $attcheck = 0;
|
||||
|
||||
my %drop_el;
|
||||
my @drop_elpat;
|
||||
|
||||
my %keep_el;
|
||||
my @keep_elpat;
|
||||
|
||||
my %drop_att;
|
||||
my %keep_att;
|
||||
|
||||
my $always_true = sub { 1; };
|
||||
my $root_element = '';
|
||||
|
||||
my $in_cdata = 0;
|
||||
|
||||
# Process options
|
||||
|
||||
while ( defined( $ARGV[0] ) and $ARGV[0] =~ /^[-+]/ ) {
|
||||
my $opt = shift;
|
||||
|
||||
if ( $opt eq '-root' ) {
|
||||
$pass = 0;
|
||||
}
|
||||
elsif ( $opt eq '+root' ) {
|
||||
$pass = 1;
|
||||
}
|
||||
elsif ( $opt eq '-h' ) {
|
||||
print $Usage;
|
||||
exit;
|
||||
}
|
||||
elsif ( $opt eq '-nl' ) {
|
||||
$do_newline = 1;
|
||||
}
|
||||
elsif ( $opt =~ /^([-+])el([:=])(\S*)/ ) {
|
||||
my ( $disp, $kind, $pattern ) = ( $1, $2, $3 );
|
||||
my ( $hashref, $aref );
|
||||
|
||||
if ( $disp eq '-' ) {
|
||||
$hashref = \%drop_el;
|
||||
$aref = \@drop_elpat;
|
||||
}
|
||||
else {
|
||||
$hashref = \%keep_el;
|
||||
$aref = \@keep_elpat;
|
||||
}
|
||||
|
||||
if ( $kind eq '=' ) {
|
||||
$hashref->{$pattern} = 1;
|
||||
}
|
||||
else {
|
||||
push( @$aref, $pattern );
|
||||
}
|
||||
}
|
||||
elsif ( $opt =~ /^([-+])att:(\w+)(?::(\S*))?/ ) {
|
||||
my ( $disp, $id, $pattern ) = ( $1, $2, $3 );
|
||||
my $ref = ( $disp eq '-' ) ? \%drop_att : \%keep_att;
|
||||
|
||||
if ( defined($pattern) ) {
|
||||
$pattern =~ s!/!\\/!g;
|
||||
my $sub;
|
||||
eval "\$sub = sub {\$_[0] =~ /$pattern/;};";
|
||||
|
||||
$ref->{$id} = $sub;
|
||||
}
|
||||
else {
|
||||
$ref->{$id} = $always_true;
|
||||
}
|
||||
|
||||
$attcheck = 1;
|
||||
}
|
||||
else {
|
||||
die "Unknown option: $opt\n$Usage";
|
||||
}
|
||||
}
|
||||
|
||||
my $drop_el_pattern = join( '|', @drop_elpat );
|
||||
my $keep_el_pattern = join( '|', @keep_elpat );
|
||||
|
||||
my $drop_sub;
|
||||
if ($drop_el_pattern) {
|
||||
eval "\$drop_sub = sub {\$_[0] =~ /$drop_el_pattern/;}";
|
||||
}
|
||||
else {
|
||||
$drop_sub = sub { };
|
||||
}
|
||||
|
||||
my $keep_sub;
|
||||
if ($keep_el_pattern) {
|
||||
eval "\$keep_sub = sub {\$_[0] =~ /$keep_el_pattern/;}";
|
||||
}
|
||||
else {
|
||||
$keep_sub = sub { };
|
||||
}
|
||||
|
||||
my $doc = shift;
|
||||
|
||||
die "No file specified\n$Usage" unless defined($doc);
|
||||
|
||||
my @togglestack = ();
|
||||
|
||||
my $p = new XML::Parser(
|
||||
ErrorContext => 2,
|
||||
Handlers => {
|
||||
Start => \&start_handler,
|
||||
End => \&end_handler
|
||||
}
|
||||
);
|
||||
|
||||
if ($pass) {
|
||||
$p->setHandlers(
|
||||
Char => \&char_handler,
|
||||
CdataStart => \&cdata_start,
|
||||
CdataEnd => \&cdata_end
|
||||
);
|
||||
}
|
||||
|
||||
$p->parsefile($doc);
|
||||
|
||||
print "</$root_element>\n"
|
||||
unless $pass;
|
||||
|
||||
################
|
||||
## End of main
|
||||
################
|
||||
|
||||
sub start_handler {
|
||||
my $xp = shift;
|
||||
my $el = shift;
|
||||
|
||||
unless ($root_element) {
|
||||
$root_element = $el;
|
||||
print "<$el>\n"
|
||||
unless $pass;
|
||||
}
|
||||
|
||||
my ( $elref, $attref, $sub );
|
||||
|
||||
if ($pass) {
|
||||
$elref = \%drop_el;
|
||||
$attref = \%drop_att;
|
||||
$sub = $drop_sub;
|
||||
}
|
||||
else {
|
||||
$elref = \%keep_el;
|
||||
$attref = \%keep_att;
|
||||
$sub = $keep_sub;
|
||||
}
|
||||
|
||||
if ( defined( $elref->{$el} )
|
||||
or &$sub($el)
|
||||
or check_atts( $attref, @_ ) ) {
|
||||
$pass = !$pass;
|
||||
if ($pass) {
|
||||
$xp->setHandlers(
|
||||
Char => \&char_handler,
|
||||
CdataStart => \&cdata_start,
|
||||
CdataEnd => \&cdata_end
|
||||
);
|
||||
}
|
||||
else {
|
||||
$xp->setHandlers(
|
||||
Char => 0,
|
||||
CdataStart => 0,
|
||||
CdataEnd => 0
|
||||
);
|
||||
}
|
||||
push( @togglestack, $xp->depth );
|
||||
}
|
||||
|
||||
if ($pass) {
|
||||
print "\n" if $do_newline;
|
||||
print "<$el";
|
||||
while (@_) {
|
||||
my $id = shift;
|
||||
my $val = shift;
|
||||
|
||||
$val = $xp->xml_escape( $val, "'" );
|
||||
print " $id='$val'";
|
||||
}
|
||||
print ">";
|
||||
}
|
||||
} # End start_handler
|
||||
|
||||
sub end_handler {
|
||||
my $xp = shift;
|
||||
my $el = shift;
|
||||
|
||||
if ($pass) {
|
||||
print "</$el>";
|
||||
}
|
||||
|
||||
if ( @togglestack and $togglestack[-1] == $xp->depth ) {
|
||||
$pass = !$pass;
|
||||
if ($pass) {
|
||||
$xp->setHandlers(
|
||||
Char => \&char_handler,
|
||||
CdataStart => \&cdata_start,
|
||||
CdataEnd => \&cdata_end
|
||||
);
|
||||
}
|
||||
else {
|
||||
$xp->setHandlers(
|
||||
Char => 0,
|
||||
CdataStart => 0,
|
||||
CdataEnd => 0
|
||||
);
|
||||
}
|
||||
|
||||
pop(@togglestack);
|
||||
}
|
||||
|
||||
} # End end_handler
|
||||
|
||||
sub char_handler {
|
||||
my ( $xp, $text ) = @_;
|
||||
|
||||
if ( length($text) ) {
|
||||
|
||||
$text = $xp->xml_escape( $text, '>' )
|
||||
unless $in_cdata;
|
||||
|
||||
print $text;
|
||||
}
|
||||
} # End char_handler
|
||||
|
||||
sub cdata_start {
|
||||
my $xp = shift;
|
||||
|
||||
print '<![CDATA[';
|
||||
$in_cdata = 1;
|
||||
}
|
||||
|
||||
sub cdata_end {
|
||||
my $xp = shift;
|
||||
|
||||
print ']]>';
|
||||
$in_cdata = 0;
|
||||
}
|
||||
|
||||
sub check_atts {
|
||||
return $attcheck unless $attcheck;
|
||||
|
||||
my $ref = shift;
|
||||
|
||||
while (@_) {
|
||||
my $id = shift;
|
||||
my $val = shift;
|
||||
|
||||
if ( defined( $ref->{$id} ) ) {
|
||||
my $ret = &{ $ref->{$id} }($val);
|
||||
return $ret if $ret;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
} # End check_atts
|
||||
|
||||
# Tell Emacs that this is really a perl script
|
||||
# Local Variables:
|
||||
# mode:perl
|
||||
# End:
|
|
@ -0,0 +1,171 @@
|
|||
#!/usr/local/bin/perl -w
|
||||
#
|
||||
# $Revision: 1.1.1.1 $
|
||||
#
|
||||
# $Date: 2003-07-27 11:07:11 $
|
||||
|
||||
package Elinfo;
|
||||
|
||||
sub new {
|
||||
bless {
|
||||
COUNT => 0,
|
||||
MINLEV => undef,
|
||||
SEEN => 0,
|
||||
CHARS => 0,
|
||||
EMPTY => 1,
|
||||
PTAB => {},
|
||||
KTAB => {},
|
||||
ATAB => {}
|
||||
},
|
||||
shift;
|
||||
}
|
||||
|
||||
package main;
|
||||
|
||||
use English;
|
||||
use XML::Parser;
|
||||
|
||||
my %elements;
|
||||
my $seen = 0;
|
||||
my $root;
|
||||
|
||||
my $file = shift;
|
||||
|
||||
my $subform = ' @<<<<<<<<<<<<<<< @>>>>';
|
||||
die "Can't find file \"$file\""
|
||||
unless -f $file;
|
||||
|
||||
my $parser = new XML::Parser( ErrorContext => 2 );
|
||||
$parser->setHandlers(
|
||||
Start => \&start_handler,
|
||||
Char => \&char_handler
|
||||
);
|
||||
|
||||
$parser->parsefile($file);
|
||||
|
||||
set_minlev( $root, 0 );
|
||||
|
||||
my $el;
|
||||
|
||||
foreach $el ( sort bystruct keys %elements ) {
|
||||
my $ref = $elements{$el};
|
||||
print "\n================\n$el: ", $ref->{COUNT}, "\n";
|
||||
print "Had ", $ref->{CHARS}, " bytes of character data\n"
|
||||
if $ref->{CHARS};
|
||||
print "Always empty\n"
|
||||
if $ref->{EMPTY};
|
||||
|
||||
showtab( 'Parents', $ref->{PTAB}, 0 );
|
||||
showtab( 'Children', $ref->{KTAB}, 1 );
|
||||
showtab( 'Attributes', $ref->{ATAB}, 0 );
|
||||
}
|
||||
|
||||
################
|
||||
## End of main
|
||||
################
|
||||
|
||||
sub start_handler {
|
||||
my $p = shift;
|
||||
my $el = shift;
|
||||
|
||||
my $elinf = $elements{$el};
|
||||
|
||||
if ( not defined($elinf) ) {
|
||||
$elements{$el} = $elinf = new Elinfo;
|
||||
$elinf->{SEEN} = $seen++;
|
||||
}
|
||||
|
||||
$elinf->{COUNT}++;
|
||||
|
||||
my $partab = $elinf->{PTAB};
|
||||
|
||||
my $parent = $p->current_element;
|
||||
if ( defined($parent) ) {
|
||||
$partab->{$parent}++;
|
||||
my $pinf = $elements{$parent};
|
||||
|
||||
# Increment our slot in parent's child table
|
||||
$pinf->{KTAB}->{$el}++;
|
||||
$pinf->{EMPTY} = 0;
|
||||
}
|
||||
else {
|
||||
$root = $el;
|
||||
}
|
||||
|
||||
# Deal with attributes
|
||||
|
||||
my $atab = $elinf->{ATAB};
|
||||
|
||||
while (@_) {
|
||||
my $att = shift;
|
||||
|
||||
$atab->{$att}++;
|
||||
shift; # Throw away value
|
||||
}
|
||||
|
||||
} # End start_handler
|
||||
|
||||
sub char_handler {
|
||||
my ( $p, $data ) = @_;
|
||||
my $inf = $elements{ $p->current_element };
|
||||
|
||||
$inf->{EMPTY} = 0;
|
||||
if ( $data =~ /\S/ ) {
|
||||
$inf->{CHARS} += length($data);
|
||||
}
|
||||
} # End char_handler
|
||||
|
||||
sub set_minlev {
|
||||
my ( $el, $lev ) = @_;
|
||||
|
||||
my $elinfo = $elements{$el};
|
||||
if ( !defined( $elinfo->{MINLEV} ) or $elinfo->{MINLEV} > $lev ) {
|
||||
my $newlev = $lev + 1;
|
||||
|
||||
$elinfo->{MINLEV} = $lev;
|
||||
foreach ( keys %{ $elinfo->{KTAB} } ) {
|
||||
set_minlev( $_, $newlev );
|
||||
}
|
||||
}
|
||||
} # End set_minlev
|
||||
|
||||
sub bystruct {
|
||||
my $refa = $elements{$a};
|
||||
my $refb = $elements{$b};
|
||||
|
||||
$refa->{MINLEV} <=> $refb->{MINLEV}
|
||||
or $refa->{SEEN} <=> $refb->{SEEN};
|
||||
} # End bystruct
|
||||
|
||||
sub showtab {
|
||||
my ( $title, $table, $dosum ) = @_;
|
||||
|
||||
my @list = sort keys %{$table};
|
||||
|
||||
if (@list) {
|
||||
print "\n $title:\n";
|
||||
|
||||
my $item;
|
||||
my $sum = 0;
|
||||
foreach $item (@list) {
|
||||
my $cnt = $table->{$item};
|
||||
$sum += $cnt;
|
||||
formline( $subform, $item, $cnt );
|
||||
print $ACCUMULATOR, "\n";
|
||||
$ACCUMULATOR = '';
|
||||
}
|
||||
|
||||
if ( $dosum and @list > 1 ) {
|
||||
print " =====\n";
|
||||
formline( $subform, '', $sum );
|
||||
print $ACCUMULATOR, "\n";
|
||||
$ACCUMULATOR = '';
|
||||
}
|
||||
}
|
||||
|
||||
} # End showtab
|
||||
|
||||
# Tell Emacs that this is really a perl script
|
||||
# Local Variables:
|
||||
# mode:perl
|
||||
# End:
|
|
@ -0,0 +1,239 @@
|
|||
# Before `make install' is performed this script should be runnable with
|
||||
# `make test'. After `make install' it should work as `perl test.pl'
|
||||
|
||||
######################### We start with some black magic to print on failure.
|
||||
|
||||
# Change 1..1 below to 1..last_test_to_print .
|
||||
# (It may become useful if the test is moved to ./t subdirectory.)
|
||||
|
||||
BEGIN { print "1..27\n"; }
|
||||
END { print "not ok 1\n" unless $loaded; }
|
||||
use XML::Parser;
|
||||
use FileHandle; # Make 5.10.0 happy.
|
||||
$loaded = 1;
|
||||
print "ok 1\n";
|
||||
|
||||
######################### End of black magic.
|
||||
|
||||
# Insert your test code below (better if it prints "ok 13"
|
||||
# (correspondingly "not ok 13") depending on the success of chunk 13
|
||||
# of the test code):
|
||||
|
||||
# Test 2
|
||||
|
||||
my $parser = new XML::Parser( ProtocolEncoding => 'ISO-8859-1' );
|
||||
if ($parser) {
|
||||
print "ok 2\n";
|
||||
}
|
||||
else {
|
||||
print "not ok 2\n";
|
||||
exit;
|
||||
}
|
||||
|
||||
my @ndxstack;
|
||||
my $indexok = 1;
|
||||
|
||||
# Need this external entity
|
||||
|
||||
open( ZOE, '>zoe.ent' );
|
||||
print ZOE "'cute'";
|
||||
close(ZOE);
|
||||
|
||||
# XML string for tests
|
||||
|
||||
my $xmlstring = <<"End_of_XML;";
|
||||
<!DOCTYPE foo
|
||||
[
|
||||
<!NOTATION bar PUBLIC "qrs">
|
||||
<!ENTITY zinger PUBLIC "xyz" "abc" NDATA bar>
|
||||
<!ENTITY fran SYSTEM "fran-def">
|
||||
<!ENTITY zoe SYSTEM "zoe.ent">
|
||||
]>
|
||||
<foo>
|
||||
First line in foo
|
||||
<boom>Fran is &fran; and Zoe is &zoe;</boom>
|
||||
<bar id="jack" stomp="jill">
|
||||
<?line-noise *&*&^&<< ?>
|
||||
1st line in bar
|
||||
<blah> 2nd line in bar </blah>
|
||||
3rd line in bar <!-- Isn't this a doozy -->
|
||||
</bar>
|
||||
<zap ref="zing" />
|
||||
This, '\240', would be a bad character in UTF-8.
|
||||
</foo>
|
||||
End_of_XML;
|
||||
|
||||
# Handlers
|
||||
my @tests;
|
||||
my $pos = '';
|
||||
|
||||
sub ch {
|
||||
my ( $p, $str ) = @_;
|
||||
$tests[4]++;
|
||||
$tests[5]++ if ( $str =~ /2nd line/ and $p->in_element('blah') );
|
||||
if ( $p->in_element('boom') ) {
|
||||
$tests[17]++ if $str =~ /pretty/;
|
||||
$tests[18]++ if $str =~ /cute/;
|
||||
}
|
||||
}
|
||||
|
||||
sub st {
|
||||
my ( $p, $el, %atts ) = @_;
|
||||
|
||||
$ndxstack[ $p->depth ] = $p->element_index;
|
||||
$tests[6]++ if ( $el eq 'bar' and $atts{stomp} eq 'jill' );
|
||||
if ( $el eq 'zap' and $atts{'ref'} eq 'zing' ) {
|
||||
$tests[7]++;
|
||||
$p->default_current;
|
||||
}
|
||||
elsif ( $el eq 'bar' ) {
|
||||
$tests[22]++ if $p->recognized_string eq '<bar id="jack" stomp="jill">';
|
||||
}
|
||||
}
|
||||
|
||||
sub eh {
|
||||
my ( $p, $el ) = @_;
|
||||
$indexok = 0 unless $p->element_index == $ndxstack[ $p->depth ];
|
||||
if ( $el eq 'zap' ) {
|
||||
$tests[8]++;
|
||||
my @old = $p->setHandlers( 'Char', \&newch );
|
||||
$tests[19]++ if $p->current_line == 17;
|
||||
$tests[20]++ if $p->current_column == 20;
|
||||
$tests[23]++ if ( $old[0] eq 'Char' and $old[1] == \&ch );
|
||||
}
|
||||
if ( $el eq 'boom' ) {
|
||||
$p->setHandlers( 'Default', \&dh );
|
||||
}
|
||||
}
|
||||
|
||||
sub dh {
|
||||
my ( $p, $str ) = @_;
|
||||
if ( $str =~ /doozy/ ) {
|
||||
$tests[9]++;
|
||||
$pos = $p->position_in_context(1);
|
||||
}
|
||||
$tests[10]++ if $str =~ /^<zap/;
|
||||
}
|
||||
|
||||
sub pi {
|
||||
my ( $p, $tar, $data ) = @_;
|
||||
|
||||
$tests[11]++ if ( $tar eq 'line-noise' and $data =~ /&\^&<</ );
|
||||
}
|
||||
|
||||
sub note {
|
||||
my ( $p, $name, $base, $sysid, $pubid ) = @_;
|
||||
|
||||
$tests[12]++ if ( $name eq 'bar' and $pubid eq 'qrs' );
|
||||
}
|
||||
|
||||
sub unp {
|
||||
my ( $p, $name, $base, $sysid, $pubid, $notation ) = @_;
|
||||
|
||||
$tests[13]++ if ( $name eq 'zinger'
|
||||
and $pubid eq 'xyz'
|
||||
and $sysid eq 'abc'
|
||||
and $notation eq 'bar' );
|
||||
}
|
||||
|
||||
sub newch {
|
||||
my ( $p, $str ) = @_;
|
||||
|
||||
if ( $] < 5.007001 ) {
|
||||
$tests[14]++ if $str =~ /'\302\240'/;
|
||||
}
|
||||
else {
|
||||
$tests[14]++ if $str =~ /'\xa0'/;
|
||||
}
|
||||
}
|
||||
|
||||
sub extent {
|
||||
my ( $p, $base, $sys, $pub ) = @_;
|
||||
|
||||
if ( $sys eq 'fran-def' ) {
|
||||
$tests[15]++;
|
||||
return 'pretty';
|
||||
}
|
||||
elsif ( $sys eq 'zoe.ent' ) {
|
||||
$tests[16]++;
|
||||
|
||||
open( FOO, $sys ) or die "Couldn't open $sys";
|
||||
return *FOO;
|
||||
}
|
||||
}
|
||||
|
||||
eval {
|
||||
$parser->setHandlers(
|
||||
'Char' => \&ch,
|
||||
'Start' => \&st,
|
||||
'End' => \&eh,
|
||||
'Proc' => \&pi,
|
||||
'Notation' => \¬e,
|
||||
'Unparsed' => \&unp,
|
||||
'ExternEnt' => \&extent,
|
||||
'ExternEntFin' => sub { close(FOO); }
|
||||
);
|
||||
};
|
||||
|
||||
if ($@) {
|
||||
print "not ok 3\n";
|
||||
exit;
|
||||
}
|
||||
|
||||
print "ok 3\n";
|
||||
|
||||
# Test 4..20
|
||||
eval { $parser->parsestring($xmlstring); };
|
||||
|
||||
if ($@) {
|
||||
print "Parse error:\n$@";
|
||||
}
|
||||
else {
|
||||
$tests[21]++;
|
||||
}
|
||||
|
||||
unlink('zoe.ent') if ( -f 'zoe.ent' );
|
||||
|
||||
for ( 4 .. 23 ) {
|
||||
print "not " unless $tests[$_];
|
||||
print "ok $_\n";
|
||||
}
|
||||
|
||||
$cmpstr = << 'End_of_Cmp;';
|
||||
<blah> 2nd line in bar </blah>
|
||||
3rd line in bar <!-- Isn't this a doozy -->
|
||||
===================^
|
||||
</bar>
|
||||
End_of_Cmp;
|
||||
|
||||
if ( $cmpstr ne $pos ) {
|
||||
print "not ";
|
||||
}
|
||||
print "ok 24\n";
|
||||
|
||||
print "not " unless $indexok;
|
||||
print "ok 25\n";
|
||||
|
||||
# Test that memory leak through autovivifying symbol table entries is fixed.
|
||||
|
||||
my $count = 0;
|
||||
$parser = new XML::Parser(
|
||||
Handlers => {
|
||||
Start => sub { $count++ }
|
||||
}
|
||||
);
|
||||
|
||||
$xmlstring = '<a><b>Sea</b></a>';
|
||||
|
||||
eval { $parser->parsestring($xmlstring); };
|
||||
|
||||
if ( $count != 2 ) {
|
||||
print "not ";
|
||||
}
|
||||
print "ok 26\n";
|
||||
|
||||
if ( defined( *{$xmlstring} ) ) {
|
||||
print "not ";
|
||||
}
|
||||
print "ok 27\n";
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
BEGIN { print "1..2\n"; }
|
||||
END { print "not ok 1\n" unless $loaded; }
|
||||
use XML::Parser;
|
||||
$loaded = 1;
|
||||
print "ok 1\n";
|
||||
|
||||
my $count = 0;
|
||||
|
||||
my $cdata_part = "<<< & > '' << &&&>&&&&;<";
|
||||
|
||||
my $doc = "<foo> hello <![CDATA[$cdata_part]]> there</foo>";
|
||||
|
||||
my $acc = '';
|
||||
|
||||
sub ch {
|
||||
my ( $xp, $data ) = @_;
|
||||
|
||||
$acc .= $data;
|
||||
}
|
||||
|
||||
sub stcd {
|
||||
my $xp = shift;
|
||||
$xp->setHandlers( Char => \&ch );
|
||||
}
|
||||
|
||||
sub ecd {
|
||||
my $xp = shift;
|
||||
$xp->setHandlers( Char => 0 );
|
||||
}
|
||||
|
||||
$parser = new XML::Parser(
|
||||
ErrorContext => 2,
|
||||
Handlers => {
|
||||
CdataStart => \&stcd,
|
||||
CdataEnd => \&ecd
|
||||
}
|
||||
);
|
||||
|
||||
$parser->parse($doc);
|
||||
|
||||
print "not "
|
||||
unless ( $acc eq $cdata_part );
|
||||
print "ok 2\n";
|
||||
|
|
@ -0,0 +1,180 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use Test::More tests => 40;
|
||||
use XML::Parser;
|
||||
|
||||
ok("loaded");
|
||||
|
||||
my $bigval = <<'End_of_bigval;';
|
||||
This is a large string value to test whether the declaration parser still
|
||||
works when the entity or attribute default value may be broken into multiple
|
||||
calls to the default handler.
|
||||
01234567890123456789012345678901234567890123456789012345678901234567890123456789
|
||||
01234567890123456789012345678901234567890123456789012345678901234567890123456789
|
||||
01234567890123456789012345678901234567890123456789012345678901234567890123456789
|
||||
01234567890123456789012345678901234567890123456789012345678901234567890123456789
|
||||
01234567890123456789012345678901234567890123456789012345678901234567890123456789
|
||||
01234567890123456789012345678901234567890123456789012345678901234567890123456789
|
||||
01234567890123456789012345678901234567890123456789012345678901234567890123456789
|
||||
01234567890123456789012345678901234567890123456789012345678901234567890123456789
|
||||
01234567890123456789012345678901234567890123456789012345678901234567890123456789
|
||||
01234567890123456789012345678901234567890123456789012345678901234567890123456789
|
||||
01234567890123456789012345678901234567890123456789012345678901234567890123456789
|
||||
01234567890123456789012345678901234567890123456789012345678901234567890123456789
|
||||
01234567890123456789012345678901234567890123456789012345678901234567890123456789
|
||||
End_of_bigval;
|
||||
|
||||
$bigval =~ s/\n/ /g;
|
||||
|
||||
my $docstr = <<"End_of_Doc;";
|
||||
<?xml version="1.0" encoding="ISO-8859-1" ?>
|
||||
<!DOCTYPE foo SYSTEM 't/foo.dtd'
|
||||
[
|
||||
<!ENTITY alpha 'a'>
|
||||
<!ELEMENT junk ((bar|foo|xyz+), zebra*)>
|
||||
<!ELEMENT xyz (#PCDATA)>
|
||||
<!ELEMENT zebra (#PCDATA|em|strong)*>
|
||||
<!ATTLIST junk
|
||||
id ID #REQUIRED
|
||||
version CDATA #FIXED '1.0'
|
||||
color (red|green|blue) 'green'
|
||||
foo NOTATION (x|y|z) #IMPLIED>
|
||||
<!ENTITY skunk "stinky animal">
|
||||
<!ENTITY big "$bigval">
|
||||
<!-- a comment -->
|
||||
<!NOTATION gif SYSTEM 'http://www.somebody.com/specs/GIF31.TXT'>
|
||||
<!ENTITY logo PUBLIC '//Widgets Corp/Logo' 'logo.gif' NDATA gif>
|
||||
<?DWIM a useless processing instruction ?>
|
||||
<!ELEMENT bar ANY>
|
||||
<!ATTLIST bar big CDATA '$bigval'>
|
||||
]>
|
||||
<foo/>
|
||||
End_of_Doc;
|
||||
|
||||
my $entcnt = 0;
|
||||
my %ents;
|
||||
|
||||
sub enth1 {
|
||||
my ( $p, $name, $val, $sys, $pub, $notation ) = @_;
|
||||
|
||||
is( $val, 'a' ) if ( $name eq 'alpha' );
|
||||
is( $val, 'stinky animal' ) if ( $name eq 'skunk' );
|
||||
if ( $name eq 'logo' ) {
|
||||
ok( !defined($val) );
|
||||
is( $sys, 'logo.gif' );
|
||||
is( $pub, '//Widgets Corp/Logo' );
|
||||
is( $notation, 'gif' );
|
||||
}
|
||||
}
|
||||
|
||||
my $parser = new XML::Parser(
|
||||
ErrorContext => 2,
|
||||
NoLWP => 1,
|
||||
ParseParamEnt => 1,
|
||||
Handlers => { Entity => \&enth1 }
|
||||
);
|
||||
|
||||
eval { $parser->parse($docstr) };
|
||||
|
||||
sub eleh {
|
||||
my ( $p, $name, $model ) = @_;
|
||||
|
||||
if ( $name eq 'junk' ) {
|
||||
is( $model, '((bar|foo|xyz+),zebra*)' );
|
||||
ok $model->isseq;
|
||||
my @parts = $model->children;
|
||||
ok( $parts[0]->ischoice );
|
||||
my @cparts = $parts[0]->children;
|
||||
is( $cparts[0], 'bar' );
|
||||
is( $cparts[1], 'foo' );
|
||||
is( $cparts[2], 'xyz+' );
|
||||
is( $cparts[2]->name, 'xyz' );
|
||||
is( $parts[1]->name, 'zebra' );
|
||||
is( $parts[1]->quant, '*' );
|
||||
}
|
||||
|
||||
if ( $name eq 'xyz' ) {
|
||||
ok( $model->ismixed );
|
||||
ok( !defined( $model->children ) );
|
||||
}
|
||||
|
||||
if ( $name eq 'zebra' ) {
|
||||
ok( $model->ismixed );
|
||||
is( ( $model->children )[1], 'strong' );
|
||||
}
|
||||
|
||||
if ( $name eq 'bar' ) {
|
||||
ok( $model->isany );
|
||||
}
|
||||
}
|
||||
|
||||
sub enth2 {
|
||||
my ( $p, $name, $val, $sys, $pub, $notation ) = @_;
|
||||
|
||||
is( $val, 'a' ) if ( $name eq 'alpha' );
|
||||
is( $val, 'stinky animal' ) if ( $name eq 'skunk' );
|
||||
is( $val, $bigval ) if ( $name eq 'big' );
|
||||
ok( !defined($val) and $sys eq 'logo.gif' and $pub eq '//Widgets Corp/Logo' and $notation eq 'gif' )
|
||||
if ( $name eq 'logo' );
|
||||
}
|
||||
|
||||
sub doc {
|
||||
my ( $p, $name, $sys, $pub, $intdecl ) = @_;
|
||||
|
||||
is( $name, 'foo' );
|
||||
is( $sys, 't/foo.dtd' );
|
||||
ok($intdecl);
|
||||
}
|
||||
|
||||
sub att {
|
||||
my ( $p, $elname, $attname, $type, $default, $fixed ) = @_;
|
||||
|
||||
if ( $elname eq 'junk' ) {
|
||||
if ( $attname eq 'id' and $type eq 'ID' ) {
|
||||
is( $default, '#REQUIRED' );
|
||||
ok( !$fixed );
|
||||
}
|
||||
elsif ( $attname eq 'version' and $type eq 'CDATA' ) {
|
||||
is( $default, "'1.0'" );
|
||||
ok($fixed);
|
||||
}
|
||||
elsif ( $attname eq 'color' and $type eq '(red|green|blue)' ) {
|
||||
is( $default, "'green'" );
|
||||
}
|
||||
elsif ( $attname eq 'foo' and $type eq 'NOTATION(x|y|z)' ) {
|
||||
is( $default, '#IMPLIED' );
|
||||
}
|
||||
}
|
||||
elsif ( $elname eq 'bar' ) {
|
||||
is( $attname, 'big' );
|
||||
is( $default, "'$bigval'" );
|
||||
}
|
||||
}
|
||||
|
||||
sub xd {
|
||||
my ( $p, $version, $enc, $stand ) = @_;
|
||||
|
||||
if ( defined($version) ) {
|
||||
is( $version, '1.0' );
|
||||
is( $enc, 'ISO-8859-1' );
|
||||
ok( !defined($stand) );
|
||||
}
|
||||
else {
|
||||
is( $enc, 'x-sjis-unicode' );
|
||||
}
|
||||
}
|
||||
|
||||
$parser->setHandlers(
|
||||
Entity => \&enth2,
|
||||
Element => \&eleh,
|
||||
Attlist => \&att,
|
||||
Doctype => \&doc,
|
||||
XMLDecl => \&xd
|
||||
);
|
||||
|
||||
$| = 1;
|
||||
$parser->parse($docstr);
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
BEGIN { print "1..4\n"; }
|
||||
END { print "not ok 1\n" unless $loaded; }
|
||||
use XML::Parser;
|
||||
$loaded = 1;
|
||||
print "ok 1\n";
|
||||
|
||||
$doc = <<'End_of_Doc;';
|
||||
<!DOCTYPE foo [
|
||||
<!ATTLIST bar zz CDATA 'there'>
|
||||
]>
|
||||
<foo>
|
||||
<bar xx="hello"/>
|
||||
<bar zz="other"/>
|
||||
</foo>
|
||||
End_of_Doc;
|
||||
|
||||
sub st {
|
||||
my $xp = shift;
|
||||
my $el = shift;
|
||||
|
||||
if ( $el eq 'bar' ) {
|
||||
my %atts = @_;
|
||||
my %isdflt;
|
||||
my $specified = $xp->specified_attr;
|
||||
|
||||
for ( my $i = $specified; $i < @_; $i += 2 ) {
|
||||
$isdflt{ $_[$i] } = 1;
|
||||
}
|
||||
|
||||
if ( defined $atts{xx} ) {
|
||||
print 'not '
|
||||
if $isdflt{'xx'};
|
||||
print "ok 2\n";
|
||||
|
||||
print 'not '
|
||||
unless $isdflt{'zz'};
|
||||
print "ok 3\n";
|
||||
}
|
||||
else {
|
||||
print 'not '
|
||||
if $isdflt{'zz'};
|
||||
print "ok 4\n";
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
$p = new XML::Parser( Handlers => { Start => \&st } );
|
||||
|
||||
$p->parse($doc);
|
|
@ -0,0 +1,111 @@
|
|||
BEGIN { print "1..6\n"; }
|
||||
END { print "not ok 1\n" unless $loaded; }
|
||||
use XML::Parser;
|
||||
$loaded = 1;
|
||||
print "ok 1\n";
|
||||
|
||||
################################################################
|
||||
# Check encoding
|
||||
|
||||
my $xmldec = "<?xml version='1.0' encoding='x-sjis-unicode' ?>\n";
|
||||
|
||||
my $docstring = <<"End_of_doc;";
|
||||
<\x8e\x83>\x90\x46\x81\x41\x98\x61\x81\x41\x99\x44
|
||||
</\x8e\x83>
|
||||
End_of_doc;
|
||||
|
||||
my $doc = $xmldec . $docstring;
|
||||
|
||||
my @bytes;
|
||||
my $lastel;
|
||||
|
||||
sub text {
|
||||
my ( $xp, $data ) = @_;
|
||||
|
||||
push( @bytes, unpack( 'U0C*', $data ) ); # was fixed 5.10
|
||||
}
|
||||
|
||||
sub start {
|
||||
my ( $xp, $el ) = @_;
|
||||
|
||||
$lastel = $el;
|
||||
}
|
||||
|
||||
my $p = XML::Parser->new( Handlers => { Start => \&start, Char => \&text } );
|
||||
|
||||
$p->parse($doc);
|
||||
|
||||
my $exptag = ( $] < 5.006 )
|
||||
? "\xe7\xa5\x89" # U+7949 blessings 0x8e83
|
||||
: chr(0x7949);
|
||||
|
||||
my @expected = (
|
||||
0xe8, 0x89, 0xb2, # U+8272 beauty 0x9046
|
||||
0xe3, 0x80, 0x81, # U+3001 comma 0x8141
|
||||
0xe5, 0x92, 0x8c, # U+548C peace 0x9861
|
||||
0xe3, 0x80, 0x81, # U+3001 comma 0x8141
|
||||
0xe5, 0x83, 0x96, # U+50D6 joy 0x9944
|
||||
0x0a
|
||||
);
|
||||
|
||||
if ( $lastel eq $exptag ) {
|
||||
print "ok 2\n";
|
||||
}
|
||||
else {
|
||||
print "not ok 2\n";
|
||||
}
|
||||
|
||||
if ( @bytes != @expected ) {
|
||||
print "not ok 3\n";
|
||||
}
|
||||
else {
|
||||
my $i;
|
||||
for ( $i = 0; $i < @expected; $i++ ) {
|
||||
if ( $bytes[$i] != $expected[$i] ) {
|
||||
print "not ok 3\n";
|
||||
exit;
|
||||
}
|
||||
}
|
||||
print "ok 3\n";
|
||||
}
|
||||
|
||||
$lastel = '';
|
||||
|
||||
$p->parse( $docstring, ProtocolEncoding => 'X-SJIS-UNICODE' );
|
||||
|
||||
if ( $lastel eq $exptag ) {
|
||||
print "ok 4\n";
|
||||
}
|
||||
else {
|
||||
print "not ok 4\n";
|
||||
}
|
||||
|
||||
# Test the CP-1252 Win-Latin-1 mapping
|
||||
|
||||
$docstring = qq(<?xml version='1.0' encoding='WINDOWS-1252' ?>
|
||||
<doc euro="\x80" lsq="\x91" rdq="\x94" />
|
||||
);
|
||||
|
||||
my %attr;
|
||||
|
||||
sub get_attr {
|
||||
my ( $xp, $el, @list ) = @_;
|
||||
%attr = @list;
|
||||
}
|
||||
|
||||
$p = XML::Parser->new( Handlers => { Start => \&get_attr } );
|
||||
|
||||
eval { $p->parse($docstring) };
|
||||
|
||||
if ($@) {
|
||||
print "not "; # couldn't load the map
|
||||
}
|
||||
print "ok 5\n";
|
||||
|
||||
if ( $attr{euro} ne ( $] < 5.006 ? "\xE2\x82\xAC" : chr(0x20AC) )
|
||||
or $attr{lsq} ne ( $] < 5.006 ? "\xE2\x80\x98" : chr(0x2018) )
|
||||
or $attr{rdq} ne ( $] < 5.006 ? "\xE2\x80\x9D" : chr(0x201D) ) ) {
|
||||
print "not ";
|
||||
}
|
||||
print "ok 6\n";
|
||||
|
|
@ -0,0 +1 @@
|
|||
<more/>
|
|
@ -0,0 +1,68 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use Test::More tests => 4;
|
||||
use XML::Parser;
|
||||
|
||||
################################################################
|
||||
# Check default external entity handler
|
||||
|
||||
my $txt = '';
|
||||
|
||||
sub txt {
|
||||
my ( $xp, $data ) = @_;
|
||||
|
||||
$txt .= $data;
|
||||
}
|
||||
|
||||
my $docstring = <<'End_of_XML;';
|
||||
<!DOCTYPE foo [
|
||||
<!ENTITY a SYSTEM "a.ent">
|
||||
<!ENTITY b SYSTEM "b.ent">
|
||||
<!ENTITY c SYSTEM "c.ent">
|
||||
]>
|
||||
<foo>
|
||||
a = "&a;"
|
||||
b = "&b;"
|
||||
|
||||
|
||||
And here they are again in reverse order:
|
||||
b = "&b;"
|
||||
a = "&a;"
|
||||
|
||||
</foo>
|
||||
End_of_XML;
|
||||
|
||||
my $ent_fh;
|
||||
open( $ent_fh, '>', 'a.ent' ) or die "Couldn't open a.ent for writing";
|
||||
print $ent_fh "This ('&c;') is a quote of c";
|
||||
close($ent_fh);
|
||||
|
||||
open( $ent_fh, '>', 'b.ent' ) or die "Couldn't open b.ent for writing";
|
||||
print $ent_fh "Hello, I'm B";
|
||||
close($ent_fh);
|
||||
|
||||
open( $ent_fh, '>', 'c.ent' ) or die "Couldn't open c.ent for writing";
|
||||
print $ent_fh "Hurrah for C";
|
||||
close($ent_fh);
|
||||
|
||||
my $p = new XML::Parser( Handlers => { Char => \&txt } );
|
||||
|
||||
$p->parse($docstring);
|
||||
|
||||
my %check = (
|
||||
a => "This ('Hurrah for C') is a quote of c",
|
||||
b => "Hello, I'm B"
|
||||
);
|
||||
|
||||
while ( $txt =~ /([ab]) = "(.*)"/g ) {
|
||||
my ( $k, $v ) = ( $1, $2 );
|
||||
|
||||
is($check{$k}, $v);
|
||||
}
|
||||
|
||||
unlink('a.ent');
|
||||
unlink('b.ent');
|
||||
unlink('c.ent');
|
|
@ -0,0 +1,12 @@
|
|||
|
||||
use Test::More tests => 1;
|
||||
use XML::Parser;
|
||||
|
||||
my $count = 0;
|
||||
|
||||
$parser = XML::Parser->new( ErrorContext => 2 );
|
||||
$parser->setHandlers( Comment => sub { $count++; } );
|
||||
|
||||
$parser->parsefile('samples/REC-xml-19980210.xml');
|
||||
|
||||
is( $count, 37 );
|
|
@ -0,0 +1,28 @@
|
|||
|
||||
use if $] < 5.006, Test::More => skip_all => 'syntax requires perl 5.6';
|
||||
|
||||
#tests behaviour on perls 5.10? .. 5.10.1
|
||||
package Some::Fake::Packege;
|
||||
|
||||
sub fake_sub {
|
||||
require FileHandle;
|
||||
}
|
||||
|
||||
package main;
|
||||
|
||||
use Test::More tests => 1;
|
||||
use XML::Parser;
|
||||
use strict;
|
||||
|
||||
my $count = 0;
|
||||
|
||||
my $parser = XML::Parser->new( ErrorContext => 2 );
|
||||
$parser->setHandlers( Comment => sub { $count++; } );
|
||||
|
||||
open my $fh, '<', 'samples/REC-xml-19980210.xml' or die;
|
||||
|
||||
#on 5.10 $fh would be a FileHandle object without a real FileHandle class
|
||||
|
||||
$parser->parse($fh);
|
||||
|
||||
is( $count, 37 );
|
|
@ -0,0 +1,34 @@
|
|||
BEGIN { print "1..3\n"; }
|
||||
END { print "not ok 1\n" unless $loaded; }
|
||||
use XML::Parser;
|
||||
$loaded = 1;
|
||||
print "ok 1\n";
|
||||
|
||||
my $stcount = 0;
|
||||
my $encount = 0;
|
||||
|
||||
sub st {
|
||||
my ( $exp, $el ) = @_;
|
||||
$stcount++;
|
||||
$exp->finish if $el eq 'loc';
|
||||
}
|
||||
|
||||
sub end {
|
||||
$encount++;
|
||||
}
|
||||
|
||||
$parser = new XML::Parser(
|
||||
Handlers => {
|
||||
Start => \&st,
|
||||
End => \&end
|
||||
},
|
||||
ErrorContext => 2
|
||||
);
|
||||
|
||||
$parser->parsefile('samples/REC-xml-19980210.xml');
|
||||
|
||||
print "not " unless $stcount == 12;
|
||||
print "ok 2\n";
|
||||
|
||||
print "not " unless $encount == 8;
|
||||
print "ok 3\n";
|
|
@ -0,0 +1,20 @@
|
|||
<?xml encoding="x-sjis-unicode"?>
|
||||
<!ENTITY joy "™D">
|
||||
|
||||
<!ATTLIST foo zz CDATA 'here'>
|
||||
|
||||
<!ENTITY % bar 'IGNORE'>
|
||||
<!ENTITY % foo 'IGNORE'>
|
||||
|
||||
<!ENTITY more SYSTEM 'ext2.ent'>
|
||||
|
||||
<!ENTITY % ext SYSTEM 'ext.ent'>
|
||||
%ext;
|
||||
|
||||
<![%bar;[
|
||||
<!ATTLIST bar xyz (a|b|c) 'b'>
|
||||
]]>
|
||||
|
||||
<![%foo;[
|
||||
<!ATTLIST foo top CDATA "hello">
|
||||
]]>
|
|
@ -0,0 +1,137 @@
|
|||
BEGIN { print "1..16\n"; }
|
||||
END { print "not ok 1\n" unless $loaded; }
|
||||
use XML::Parser;
|
||||
$loaded = 1;
|
||||
print "ok 1\n";
|
||||
|
||||
################################################################
|
||||
# Check namespaces
|
||||
|
||||
$docstring = <<'End_of_doc;';
|
||||
<foo xmlns="urn:blazing-saddles"
|
||||
xmlns:bar="urn:young-frankenstein"
|
||||
bar:alpha="17">
|
||||
<zebra xyz="nothing"/>
|
||||
<tango xmlns=""
|
||||
xmlns:zoo="urn:high-anxiety"
|
||||
beta="blue"
|
||||
zoo:beta="green"
|
||||
bar:beta="red">
|
||||
<?nscheck?>
|
||||
<zoo:here/>
|
||||
<there/>
|
||||
</tango>
|
||||
<everywhere/>
|
||||
</foo>
|
||||
End_of_doc;
|
||||
|
||||
my $gname;
|
||||
|
||||
sub init {
|
||||
my $xp = shift;
|
||||
$gname = $xp->generate_ns_name( 'alpha', 'urn:young-frankenstein' );
|
||||
}
|
||||
|
||||
sub start {
|
||||
my $xp = shift;
|
||||
my $el = shift;
|
||||
|
||||
if ( $el eq 'foo' ) {
|
||||
print "not " unless $xp->namespace($el) eq 'urn:blazing-saddles';
|
||||
print "ok 2\n";
|
||||
|
||||
print "not " unless $xp->new_ns_prefixes == 2;
|
||||
print "ok 3\n";
|
||||
|
||||
while (@_) {
|
||||
my $att = shift;
|
||||
my $val = shift;
|
||||
if ( $att eq 'alpha' ) {
|
||||
print "not " unless $xp->eq_name( $gname, $att );
|
||||
print "ok 4\n";
|
||||
last;
|
||||
}
|
||||
}
|
||||
}
|
||||
elsif ( $el eq 'zebra' ) {
|
||||
print "not " unless $xp->new_ns_prefixes == 0;
|
||||
print "ok 5\n";
|
||||
|
||||
print "not " unless $xp->namespace($el) eq 'urn:blazing-saddles';
|
||||
print "ok 6\n";
|
||||
}
|
||||
elsif ( $el eq 'tango' ) {
|
||||
print "not " if $xp->namespace( $_[0] );
|
||||
print "ok 8\n";
|
||||
|
||||
print "not " unless $_[0] eq $_[2];
|
||||
print "ok 9\n";
|
||||
|
||||
print "not " if $xp->eq_name( $_[0], $_[2] );
|
||||
print "ok 10\n";
|
||||
|
||||
my $cnt = 0;
|
||||
foreach ( $xp->new_ns_prefixes ) {
|
||||
$cnt++ if $_ eq '#default';
|
||||
$cnt++ if $_ eq 'zoo';
|
||||
}
|
||||
|
||||
print "not " unless $cnt == 2;
|
||||
print "ok 11\n";
|
||||
}
|
||||
}
|
||||
|
||||
sub end {
|
||||
my $xp = shift;
|
||||
my $el = shift;
|
||||
|
||||
if ( $el eq 'zebra' ) {
|
||||
print "not "
|
||||
unless $xp->expand_ns_prefix('#default') eq 'urn:blazing-saddles';
|
||||
print "ok 7\n";
|
||||
}
|
||||
elsif ( $el eq 'everywhere' ) {
|
||||
print "not " unless $xp->namespace($el) eq 'urn:blazing-saddles';
|
||||
print "ok 16\n";
|
||||
}
|
||||
}
|
||||
|
||||
sub proc {
|
||||
my $xp = shift;
|
||||
my $target = shift;
|
||||
|
||||
if ( $target eq 'nscheck' ) {
|
||||
print "not " if $xp->new_ns_prefixes > 0;
|
||||
print "ok 12\n";
|
||||
|
||||
my $cnt = 0;
|
||||
foreach ( $xp->current_ns_prefixes ) {
|
||||
$cnt++ if $_ eq 'zoo';
|
||||
$cnt++ if $_ eq 'bar';
|
||||
}
|
||||
|
||||
print "not " unless $cnt == 2;
|
||||
print "ok 13\n";
|
||||
|
||||
print "not "
|
||||
unless $xp->expand_ns_prefix('bar') eq 'urn:young-frankenstein';
|
||||
print "ok 14\n";
|
||||
|
||||
print "not "
|
||||
unless $xp->expand_ns_prefix('zoo') eq 'urn:high-anxiety';
|
||||
print "ok 15\n";
|
||||
}
|
||||
}
|
||||
|
||||
my $parser = new XML::Parser(
|
||||
ErrorContext => 2,
|
||||
Namespaces => 1,
|
||||
Handlers => {
|
||||
Start => \&start,
|
||||
End => \&end,
|
||||
Proc => \&proc,
|
||||
Init => \&init
|
||||
}
|
||||
);
|
||||
|
||||
$parser->parse($docstring);
|
|
@ -0,0 +1,102 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use Test::More tests => 13;
|
||||
use XML::Parser;
|
||||
|
||||
my $internal_subset = <<'End_of_internal;';
|
||||
[
|
||||
<!ENTITY % foo "IGNORE">
|
||||
<!ENTITY % bar "INCLUDE">
|
||||
<!ENTITY more SYSTEM "t/ext2.ent">
|
||||
]
|
||||
End_of_internal;
|
||||
|
||||
my $doc = <<"End_of_doc;";
|
||||
<?xml version="1.0" encoding="ISO-8859-1"?>
|
||||
<!DOCTYPE foo SYSTEM "t/foo.dtd"
|
||||
$internal_subset>
|
||||
<foo>Happy, happy
|
||||
<bar>&joy;, &joy;</bar>
|
||||
<ext/>
|
||||
&more;
|
||||
</foo>
|
||||
End_of_doc;
|
||||
|
||||
my $bartxt = '';
|
||||
my $internal_exists = 0;
|
||||
|
||||
sub start {
|
||||
my ( $xp, $el, %atts ) = @_;
|
||||
|
||||
if ( $el eq 'foo' ) {
|
||||
ok( !defined $atts{top} );
|
||||
ok( defined $atts{zz} );
|
||||
}
|
||||
elsif ( $el eq 'bar' ) {
|
||||
is( $atts{xyz}, 'b' );
|
||||
}
|
||||
elsif ( $el eq 'ext' ) {
|
||||
is( $atts{type}, 'flag' );
|
||||
}
|
||||
elsif ( $el eq 'more' ) {
|
||||
pass("got 'more'");
|
||||
}
|
||||
}
|
||||
|
||||
sub char {
|
||||
my ( $xp, $text ) = @_;
|
||||
|
||||
$bartxt .= $text if $xp->current_element eq 'bar';
|
||||
}
|
||||
|
||||
sub attl {
|
||||
my ( $xp, $el, $att, $type, $dflt, $fixed ) = @_;
|
||||
|
||||
ok( ( $att eq 'xyz' and $dflt eq "'b'" ), 'when el eq bar' ) if ( $el eq 'bar' );
|
||||
ok( !( $att eq 'top' and $dflt eq '"hello"' ), 'when el eq foo' ) if ( $el eq 'foo' );
|
||||
}
|
||||
|
||||
sub dtd {
|
||||
my ( $xp, $name, $sysid, $pubid, $internal ) = @_;
|
||||
|
||||
pass("doctype called");
|
||||
$internal_exists = $internal;
|
||||
}
|
||||
|
||||
my $p = new XML::Parser(
|
||||
ParseParamEnt => 1,
|
||||
ErrorContext => 2,
|
||||
Handlers => {
|
||||
Start => \&start,
|
||||
Char => \&char,
|
||||
Attlist => \&attl,
|
||||
Doctype => \&dtd
|
||||
}
|
||||
);
|
||||
|
||||
eval { $p->parse($doc) };
|
||||
|
||||
if ( $] < 5.006 ) {
|
||||
is( $bartxt, "\xe5\x83\x96, \xe5\x83\x96" );
|
||||
}
|
||||
else {
|
||||
is( $bartxt, chr(0x50d6) . ", " . chr(0x50d6) );
|
||||
}
|
||||
|
||||
ok( $internal_exists, 'internal exists' );
|
||||
|
||||
$doc =~ s/[\s\n]+\[[^]]*\][\s\n]+//m;
|
||||
|
||||
$p->setHandlers(
|
||||
Start => sub {
|
||||
my ( $xp, $el, %atts ) = @_;
|
||||
if ( $el eq 'foo' ) {
|
||||
ok( defined( $atts{zz} ) );
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
$p->parse($doc);
|
|
@ -0,0 +1,43 @@
|
|||
BEGIN { print "1..3\n"; }
|
||||
END { print "not ok 1\n" unless $loaded; }
|
||||
use XML::Parser;
|
||||
$loaded = 1;
|
||||
print "ok 1\n";
|
||||
|
||||
my $cnt = 0;
|
||||
my $str;
|
||||
|
||||
sub tmpchar {
|
||||
my ( $xp, $data ) = @_;
|
||||
|
||||
if ( $xp->current_element eq 'day' ) {
|
||||
$str = $xp->original_string;
|
||||
$xp->setHandlers( Char => 0 );
|
||||
}
|
||||
}
|
||||
|
||||
my $p = new XML::Parser(
|
||||
Handlers => {
|
||||
Comment => sub { $cnt++; },
|
||||
Char => \&tmpchar
|
||||
}
|
||||
);
|
||||
|
||||
my $xpnb = $p->parse_start;
|
||||
|
||||
open( my $rec, '<', 'samples/REC-xml-19980210.xml' );
|
||||
|
||||
while (<$rec>) {
|
||||
$xpnb->parse_more($_);
|
||||
}
|
||||
|
||||
close($rec);
|
||||
|
||||
$xpnb->parse_done;
|
||||
|
||||
print "not " unless $cnt == 37;
|
||||
print "ok 2\n";
|
||||
|
||||
print "not " unless $str eq '&draft.day;';
|
||||
print "ok 3\n";
|
||||
|
|
@ -0,0 +1,56 @@
|
|||
BEGIN { print "1..4\n"; }
|
||||
END { print "not ok 1\n" unless $loaded; }
|
||||
use XML::Parser;
|
||||
$loaded = 1;
|
||||
print "ok 1\n";
|
||||
|
||||
my $cmnt_count = 0;
|
||||
my $pi_count = 0;
|
||||
my $between_count = 0;
|
||||
my $authseen = 0;
|
||||
|
||||
sub init {
|
||||
my $xp = shift;
|
||||
$xp->skip_until(1); # Skip through prolog
|
||||
}
|
||||
|
||||
sub proc {
|
||||
$pi_count++;
|
||||
}
|
||||
|
||||
sub cmnt {
|
||||
$cmnt_count++;
|
||||
}
|
||||
|
||||
sub start {
|
||||
my ( $xp, $el ) = @_;
|
||||
my $ndx = $xp->element_index;
|
||||
if ( !$authseen and $el eq 'authlist' ) {
|
||||
$authseen = 1;
|
||||
$xp->skip_until(2000);
|
||||
}
|
||||
elsif ( $authseen and $ndx < 2000 ) {
|
||||
$between_count++;
|
||||
}
|
||||
}
|
||||
|
||||
my $p = new XML::Parser(
|
||||
Handlers => {
|
||||
Init => \&init,
|
||||
Start => \&start,
|
||||
Comment => \&cmnt,
|
||||
Proc => \&proc
|
||||
}
|
||||
);
|
||||
|
||||
$p->parsefile('samples/REC-xml-19980210.xml');
|
||||
|
||||
print "not " if $between_count;
|
||||
print "ok 2\n";
|
||||
|
||||
print "not " if $pi_count;
|
||||
print "ok 3\n";
|
||||
|
||||
print "not " unless $cmnt_count == 5;
|
||||
print "ok 4\n";
|
||||
|
|
@ -0,0 +1,53 @@
|
|||
BEGIN { print "1..3\n"; }
|
||||
END { print "not ok 1\n" unless $loaded; }
|
||||
use XML::Parser;
|
||||
$loaded = 1;
|
||||
print "ok 1\n";
|
||||
|
||||
my $delim = '------------123453As23lkjlklz877';
|
||||
my $file = 'samples/REC-xml-19980210.xml';
|
||||
my $tmpfile = 'stream.tmp';
|
||||
|
||||
my $cnt = 0;
|
||||
|
||||
open( my $out_fh, '>', $tmpfile ) or die "Couldn't open $tmpfile for output";
|
||||
open( my $in_fh, '<', $file ) or die "Couldn't open $file for input";
|
||||
|
||||
while (<$in_fh>) {
|
||||
print $out_fh $_;
|
||||
}
|
||||
|
||||
close($in_fh);
|
||||
print $out_fh "$delim\n";
|
||||
|
||||
open( $in_fh, $file );
|
||||
while (<$in_fh>) {
|
||||
print $out_fh $_;
|
||||
}
|
||||
|
||||
close($in_fh);
|
||||
close($out_fh);
|
||||
|
||||
my $parser = new XML::Parser(
|
||||
Stream_Delimiter => $delim,
|
||||
Handlers => {
|
||||
Comment => sub { $cnt++; }
|
||||
}
|
||||
);
|
||||
|
||||
open( my $fh, $tmpfile );
|
||||
|
||||
$parser->parse($fh);
|
||||
|
||||
print "not " if ( $cnt != 37 );
|
||||
print "ok 2\n";
|
||||
|
||||
$cnt = 0;
|
||||
|
||||
$parser->parse($fh);
|
||||
|
||||
print "not " if ( $cnt != 37 );
|
||||
print "ok 3\n";
|
||||
|
||||
close($fh);
|
||||
unlink($tmpfile);
|
|
@ -0,0 +1,62 @@
|
|||
use Test;
|
||||
BEGIN { plan tests => 13 }
|
||||
use XML::Parser;
|
||||
use IO::File;
|
||||
|
||||
my $xmlstr = '<foo>bar</foo>';
|
||||
|
||||
{
|
||||
# Debug style
|
||||
my $parser = XML::Parser->new( Style => 'Debug' );
|
||||
ok($parser);
|
||||
|
||||
my $tmpfile = IO::File->new_tmpfile();
|
||||
open( OLDERR, ">&STDERR" );
|
||||
open( STDERR, ">&" . $tmpfile->fileno ) || die "Cannot re-open STDERR : $!";
|
||||
|
||||
$parser->parse($xmlstr);
|
||||
|
||||
close(STDERR);
|
||||
open( STDERR, ">&OLDERR" );
|
||||
close(OLDERR);
|
||||
|
||||
seek( $tmpfile, 0, 0 );
|
||||
my $warn = 0;
|
||||
$warn++ while (<$tmpfile>);
|
||||
ok( $warn, 3, "Check we got three warnings out" );
|
||||
}
|
||||
|
||||
{
|
||||
# Object style
|
||||
my $parser = XML::Parser->new( Style => 'Objects' );
|
||||
ok($parser);
|
||||
|
||||
my $tree = $parser->parse($xmlstr);
|
||||
ok($tree);
|
||||
}
|
||||
|
||||
{
|
||||
# Stream style
|
||||
my $parser = XML::Parser->new( Style => 'Stream' );
|
||||
ok($parser);
|
||||
}
|
||||
|
||||
{
|
||||
# Subs style
|
||||
my $parser = XML::Parser->new( Style => 'Subs' );
|
||||
ok($parser);
|
||||
}
|
||||
|
||||
{
|
||||
# Tree style
|
||||
my $parser = XML::Parser->new( Style => 'Tree' );
|
||||
ok($parser);
|
||||
|
||||
my $tree = $parser->parse($xmlstr);
|
||||
ok( ref($tree), 'ARRAY' );
|
||||
ok( $tree->[0], 'foo' );
|
||||
ok( ref( $tree->[1] ), 'ARRAY' );
|
||||
ok( ref( $tree->[1]->[0] ), 'HASH' );
|
||||
ok( $tree->[1][1], '0' );
|
||||
ok( $tree->[1][2], 'bar' );
|
||||
}
|
Loading…
Reference in New Issue