commit 1d68b2273bb7628ab0ae8d9926d316cda0a5f5fc Author: su-fang Date: Tue Sep 27 15:00:17 2022 +0800 Import Upstream version 2.0207 diff --git a/Av_CharPtrPtr.c b/Av_CharPtrPtr.c new file mode 100644 index 0000000..bceaa99 --- /dev/null +++ b/Av_CharPtrPtr.c @@ -0,0 +1,97 @@ +/* Modified from API Cookbook A Example 8 */ + +#ifdef __cplusplus +extern "C" { +#endif +#include "EXTERN.h" +#include "perl.h" +#include "XSUB.h" +#include "Av_CharPtrPtr.h" /* XS_*_charPtrPtr() */ +#ifdef __cplusplus +} +#endif + +#if defined(_MSC_VER) +#define _CRT_SECURE_NO_DEPRECATE 1 +#define _CRT_NONSTDC_NO_DEPRECATE 1 +#endif + + +/* Used by the INPUT typemap for char**. + * Will convert a Perl AV* (containing strings) to a C char**. + */ +char ** XS_unpack_charPtrPtr(SV* rv ) +{ + AV *av; + SV **ssv; + char **s; + int avlen; + int x; + + if( SvROK( rv ) && (SvTYPE(SvRV(rv)) == SVt_PVAV) ) + av = (AV*)SvRV(rv); + else { + return( (char**)NULL ); + } + + /* is it empty? */ + avlen = av_len(av); + if( avlen < 0 ){ + return( (char**)NULL ); + } + + /* av_len+2 == number of strings, plus 1 for an end-of-array sentinel. + */ + s = (char **)safemalloc( sizeof(char*) * (avlen + 2) ); + if( s == NULL ){ + warn("XS_unpack_charPtrPtr: unable to malloc char**"); + return( (char**)NULL ); + } + for( x = 0; x <= avlen; ++x ){ + ssv = av_fetch( av, x, 0 ); + if( ssv != NULL ){ + if( SvPOK( *ssv ) ){ + s[x] = (char *)safemalloc( SvCUR(*ssv) + 1 ); + if( s[x] == NULL ) + warn("XS_unpack_charPtrPtr: unable to malloc char*"); + else + strcpy( s[x], SvPV( *ssv, PL_na ) ); + } + else + warn("XS_unpack_charPtrPtr: array elem %d was not a string.", x ); + } + else + s[x] = (char*)NULL; + } + s[x] = (char*)NULL; /* sentinel */ + return( s ); +} + +/* Used by the OUTPUT typemap for char**. + * Will convert a C char** to a Perl AV*. + */ +void XS_pack_charPtrPtr(SV* st, char **s) +{ + AV *av = newAV(); + SV *sv; + char **c; + + for( c = s; *c != NULL; ++c ){ + sv = newSVpv( *c, 0 ); + av_push( av, sv ); + } + sv = newSVrv( st, NULL ); /* upgrade stack SV to an RV */ + SvREFCNT_dec( sv ); /* discard */ + SvRV( st ) = (SV*)av; /* make stack RV point at our AV */ +} + + +/* cleanup the temporary char** from XS_unpack_charPtrPtr */ +void XS_release_charPtrPtr(char **s) +{ + char **c; + for( c = s; *c != NULL; ++c ) + safefree( *c ); + safefree( s ); +} + diff --git a/Av_CharPtrPtr.h b/Av_CharPtrPtr.h new file mode 100644 index 0000000..765f1a7 --- /dev/null +++ b/Av_CharPtrPtr.h @@ -0,0 +1,4 @@ +char ** XS_unpack_charPtrPtr _(( SV *rv )); +void XS_pack_charPtrPtr _(( SV *st, char **s )); +void XS_release_charPtrPtr _(( char **s )); + diff --git a/Changes b/Changes new file mode 100644 index 0000000..e00313e --- /dev/null +++ b/Changes @@ -0,0 +1,1299 @@ +Revision history for Perl extension XML::LibXML + +2.0207 2021-04-17 + - Small cleanups: + - https://github.com/shlomif/perl-XML-LibXML/pull/63 + - Thanks to @Grinnz , @Kritzefitz and @atoomic . + +2.0206 2020-09-15 + - Add expand_entities => 1 to the instantiation at lib/XML/LibXML/SAX.pm + - in order to fix https://rt.cpan.org/Public/Bug/Display.html?id=132759 + - failing XML-Simple tests + - Thanks to SREZIC , and GRANTM . + - Update HACKING.txt . + +2.0205 2020-05-08 + - Add XML::LibXML to the XML/SAX/ParserDetails.ini configuration file + upon installation. + - https://rt.cpan.org/Public/Bug/Display.html?id=132523 + - https://github.com/shlomif/perl-XML-LibXML/pull/49 + - Thanks to SREZIC , @genio , and @plicease . + +2.0204 2020-03-17 + - Require a recent Alien::Libxml2. + - https://rt.cpan.org/Public/Bug/Display.html?id=132129 + - Thanks to SREZIC + +2.0203 2020-03-11 + - Use Alien::Base::Wrapper for better portability. + - https://github.com/shlomif/perl-XML-LibXML/pull/45 + - Thanks to @plicease + +2.0202 2020-01-13 + - Disable loading external DTDs or external entities by default + - Thanks to Tim Retout + - Docs: Noting that HTTPS doesn't work for schema-loading either. + - Thanks to Jason McIntosh + - Allow to parse RelaxNG without accessing network + - Thanks to PALI + - Allow to parse XML Schema without accessing network + - Thanks to PALI + - Add Test-Count assertion count checking using + https://metacpan.org/pod/Code::TidyAll::Plugin::TestCount + +2.0201 2019-05-25 + - Set MIN_PERL_VERSION to 5.8.1. + - Alien::Libxml2 Makefile.PL cleanups. + - Update the README for grammar and info. + - Link to XML-LibXML "by Example" + - https://github.com/shlomif/perl-XML-LibXML/pull/36 + - Thanks to @Grinnz . + +2.0200 2019-03-23 + - Convert to use Alien::Libxml2 . + - https://github.com/shlomif/perl-XML-LibXML/pull/30 + - Thanks to @genio and @plicease . + +2.0134 2019-02-10 + - Fix overzealous POD escaping in the docs' synposes + - https://github.com/shlomif/perl-XML-LibXML/issues/26 + - Thanks to @davorg. + +2.0133 2019-02-02 + - Mark as working with libxml2 2.9.9 ( and below ). + - Allow LibParser to be provided for all methods + - https://github.com/shlomif/perl-XML-LibXML/pull/23 + - Thanks to @lavock . + - Portability fixes by Reini Urban and others. + - https://github.com/shlomif/perl-XML-LibXML/pull/18 . + - Thanks! + +2.0132 2017-10-28 + - Revert setNamespace() enhancements that broke some dependent tests: + - commit df9fdc6659cb2e4e9bc896e58c02dfd79b430fbb + - add t/48_rt123379_setNamespace.t . + - Thanks to Alexander Bluhm and Slaven Rezic for the reports and + the test. + +2.0131 2017-10-24 + - Re-include the missing *.pod documents. + - https://rt.cpan.org/Ticket/Display.html?id=123362 + - Thanks to Stephen for the report. + - Add t/pod-files-presence.t to test for it in the future. + - Merge https://github.com/shlomif/perl-XML-LibXML/pull/8 + - Fix bug in Node::replaceChild() + - Thanks to @Mipu94 . + +2.0130 2017-10-18 + - Fix the tests with libxml2-2.9.6 . + - https://rt.cpan.org/Public/Bug/Display.html?id=122958 + - Thanks to Daniel Macks for the report and ppisar for a patch. + - setNamespace() Enhancements. + - Thanks to E. Choroba. + +2.0129 2017-03-14 + - Add example/create-sample-html-document.pl . + - https://rt.cpan.org/Ticket/Display.html?id=117923 + - Add support for the set_document_locator() SAX method . + - Thanks to Alexander Batyrshin for the pull-request. + - Make parsing of large perl strings much faster. + - https://github.com/shlomif/perl-XML-LibXML/pull/5 + - Thanks to Cedric Cellier for the pull-request. + +2.0128 2016-07-24 + - Hopefully add the .pod files again as they were missing from 2.0127. + - https://github.com/shlomif/perl-XML-LibXML/issues/3 + - Thanks to Paul Howarth for the report. + - This was caused by ExtUtils::Manifest just warning that the files + referenced in the "MANIFEST" file were not present and still + continuing to prepare the archive as usual. A "do-what-I-don't-want-to" + thing. + +2.0127 2016-07-22 + - Make sure t/release-kwalitee.t and other tests do not run by default. + - Only with AUTHOR_TESTING or RELEASE_TESTING specified. + - Thanks to Lance Wicks for the pull request. + - https://rt.cpan.org/Ticket/Display.html?id=115586 + - https://rt.cpan.org/Ticket/Display.html?id=115859 + +2.0126 2016-06-24 + - Workaround RT#114638: + - 2.9.4 broke XSD Schema support. + - https://rt.cpan.org/Public/Bug/Display.html?id=114638 + - https://github.com/shlomif/libxml2-2.9.4-reader-schema-regression + - https://bugzilla.gnome.org/show_bug.cgi?id=766834 + - https://github.com/shlomif/perl-XML-LibXML/pull/1 + - Thanks to Paul for the report and to RURBAN for a pull-req. + - Add t/release-kwalitee.t for testing CPANTS Kwalitee. + +2.0125 2016-05-30 + - Moved the repository from Mercurial and BitBucket to Git and GitHub: + - https://github.com/shlomif/perl-XML-LibXML + - This was done to better encourage contributions to XML::LibXML and + to be able to use the better Continuous Integration options that + are available for GitHub projects. + +2.0124 2016-02-27 + - Fix XML::LibXML::Text->attributes() to return an empty list in list + context. + - https://rt.cpan.org/Ticket/Display.html?id=112470 + - Thanks to Rob Dixon for the report. + +2.0123 2015-12-06 + - Get rid of an undef-warning in XML::LibXML::Reader . + - https://rt.cpan.org/Ticket/Display.html?id=106830 + - Thanks to Rich for the report and testcase. + - Apply patch from Debian for rewording the documentation. + - https://rt.cpan.org/Ticket/Display.html?id=110116 + - Some extra rewording has been done by SHLOMIF. + - Thanks to Gregor Herrman and the Debian Team + +2.0122 2015-09-01 + - Enable the memory test on cygwin as well as Linux. + - https://rt.cpan.org/Ticket/Display.html?id=104666 + - Thanks to https://me.yahoo.com/howdidwegetherereally#f714d for + the report. + - Fix a typo in createElementNS + - https://rt.cpan.org/Public/Bug/Display.html?id=106807 + - Thanks to Rich for the report. + +2.0121 2015-05-03 + - Mention CVE-2015-3451 and related links in the Changes (= this file) + entry for 2.0119. + - Thanks to Tilmann Haak for pointing it out. + +2.0120 2015-05-01 + - Replace the test for the previous change with a more meaningful one. + - Change was to preserve unset options after a _clone() call. + - https://access.redhat.com/security/cve/CVE-2015-3451 + - Thanks to Salvatore Bonaccorso from Debian for the report and + for a proposed fix (which was further refined by Shlomi Fish). + +2.0119 2015-04-23 + - SECURITY: Preserve unset options after a _clone() call (e.g: in + load_xml()). + - This caused expand_entities(0) to not be preserved/etc. + - This is a security problem which was assigned the CVE number of + CVE-2015-3451 . + - https://access.redhat.com/security/cve/CVE-2015-3451 + - http://seclists.org/oss-sec/2015/q2/313 + - Thanks to Tilmann Haak from xing.com for the report. + +2.0118 2015-02-05 + - Add $Config{incpath} to the include paths on Win32. + - Fixes https://rt.cpan.org/Ticket/Display.html?id=101944 + - Thanks to Marek for the report and propsed fix. + +2.0117 2014-10-26 + - Support libxml2 builds with disabled xmlReader + - Makefile.PL : don't require a recentish ExtUtils::MakeMaker. + - https://rt.cpan.org/Ticket/Display.html?id=83322 + - Thanks to Slaven Rezic for the report. + - Fix broken t/02parse.t with non-English locale with recent perls. + - https://rt.cpan.org/Public/Bug/Display.html?id=97805 + - Thanks to Slaven Rezic for the report. + +2.0116 2014-04-12 + - t/cpan-changes.t : minimum version of Test::CPAN::Changes. + - This is to avoid test failures such as: + - http://www.cpantesters.org/cpan/report/69ee1a2a-6c09-1014-be8f-3786912f2992 + +2.0115 2014-04-03 + - Fix double free when calling $node->addSibling with text nodes. + - https://rt.cpan.org/Ticket/Display.html?id=94149 + - Thanks to Jeff Trout for the report. + +2.0114 2014-04-03 + - Fix memory leaks and segfaults related to removal and insertion of + DTD nodes. + - https://rt.cpan.org/Ticket/Display.html?id=80521 + - Fix memory leak in $node->removeChildNodes + +2.0113 2014-03-14 + - Fix test failures with older libxml2 versions. + - https://rt.cpan.org/Ticket/Display.html?id=93852 + - Thanks to Nick Wellnhofer for the patch. + - Thanks to the CPAN Testers for reporting this issue. + +2.0112 2014-03-13 + - Fix segfaults when accessing attributes of DTD nodes + - https://rt.cpan.org/Ticket/Display.html?id=71076 + - Thanks to Ralph Merridew for the report. + - Make $schema->validate work with elements. This uses + xmlSchemaValidateOneElement under the hood. + - https://rt.cpan.org/Ticket/Display.html?id=93496 + - Thanks to Jeremy Marshall for the report. + - Fix https://rt.cpan.org/Ticket/Display.html?id=93429 . + - Thanks to Nick Wellnhofer for the report and test. + - Apply patch to build with MSVC on Windows. + - https://rt.cpan.org/Ticket/Display.html?id=90064 + - Thanks to Nick Wellnhofer for the investigation and the patch. + +2.0111 2014-03-05 + - Skip t/40reader_mem_error.t with libxml2 < 2.7.4 + The failure is probably due to a known double-free bug. + - https://rt.cpan.org/Ticket/Display.html?id=84564 + - https://bugzilla.gnome.org/show_bug.cgi?id=447899 + - Thanks to Nick Wellnhofer for the pull request. + - Die if a file handle with an encoding layer returns more bytes + than requested in parse_fh. + - https://rt.cpan.org/Ticket/Display.html?id=78448 + - Make insertData, deleteData, replaceData work correctly with UTF-8 + strings. + - Fix substringData + - https://rt.cpan.org/Ticket/Display.html?id=88730 + - Fix "Threads still failing?" Bug report. + - https://rt.cpan.org/Ticket/Display.html?id=91800 + - Thanks to Daniel for the bug report and a test case, and to + YOREEK for the patch. + +2.0110 2014-02-01 + - Add "use strict;" and "use warnings;" to all modules (CPANTS). + - MIN_PERL_VERSION (CPANTS). + - Add a LICENSE section to the POD (CPANTS). + +2.0109 2014-01-31 + - Fix for requiring XML::LibXML inside two loops in perl-5.19.6 and up. + - https://rt.cpan.org/Ticket/Display.html?id=92606 + - Thanks to Father Chrysostomos for the investigation, the test + case, and the fix. + - There are other ways to reproduce the bug, but the tests tests + for a require inside two loops. + +2.0108 2013-12-17 + - Replace local $^W with << no warnings 'portable'; >> in t/15nodelist.t + - Should fix https://rt.cpan.org/Public/Bug/Display.html?id=88017 + - Thanks to "pagenyon" for the report. + - Fix hash key typo in SAX/Builder.pm - "LocalName" was mis-capitalised. + - https://rt.cpan.org/Public/Bug/Display.html?id=91433 + - Thanks to Thomas Berger for the report and for a reproducing + testcase. + - Convert from "use base" to the more modern "use parent". + +2.0107 2013-10-31 + - Add a unique_key method for namespace objects. + - https://bitbucket.org/shlomif/perl-xml-libxml/pull-request/24/unique_key-method-for-namespace-objects/diff + - Thanks to garfieldnate for the pull request. + - Grammar fixes in the documentation. + - https://rt.cpan.org/Ticket/Display.html?id=89718 + - Thanks to Gregor Herrman and the Debian Team + +2.0106 2013-09-17 + - Import croak from "use Carp;" to fix a missing croak definition. + - https://rt.cpan.org/Ticket/Display.html?id=88624 + - Update Devel::CheckLib under "./inc" to 1.01 : + - Should fix https://rt.cpan.org/Public/Bug/Display.html?id=81297 + +2.0105 2013-09-07 + - Pull some commits from Jason Mash (JRMASH) to add convenience methods + to the XML::LibXML::NodeList module. + - New method 'to_literal_delimited($separator)' + - New method 'to_literal_list()' + - Fix t/35huge_mode.t on libxml2 versions less than 2.7.0. + - Fixes https://rt.cpan.org/Ticket/Display.html?id=88375 + - Thanks to Yuriy / YOREEK for the patch. + - Add toStringC14N_v1_1() to XML::LibXML::Node. + - Fixes https://rt.cpan.org/Public/Bug/Display.html?id=88254 + - Thanks to Ulrich for the report and for a patch of sorts. + +2.0104 2013-08-30 + - Fix https://rt.cpan.org/Ticket/Display.html?id=88060 + - Use quoted version number in the SYNOPSIS. + - Thanks to Philipp Gortan for the report. + - Apply a patch from Yuriy / YOREEK for test failures with a + directory component that contains whitespace. + - https://rt.cpan.org/Ticket/Display.html?id=86665 + +2.0103 2013-08-22 + - Apply patch from Yuriy / YOREEK for test failures in t/40reader.t: + - https://rt.cpan.org/Public/Bug/Display.html?id=83779 + - Changed the variable name to start with an underscore for internal + use. + +2.0102 2013-08-19 + - Fixed https://rt.cpan.org/Ticket/Display.html?id=83744 + - XPathContext memory leak on registerFunction. + - Thanks to DGINEV for the report and Yuriy for the patch. + - Apply proposed fix for https://rt.cpan.org/Ticket/Display.html?id=80521 + - "replaceNode() segfaults when copying DTD nodes with ATTLISTs" + - Thanks to GUIDO@cpan.org for the report and to YOREEK for + the patch. + - Apply fix for https://rt.cpan.org/Ticket/Display.html?id=83779 + - "building on RHEL-5-64 fails" + - Thanks to mathias@koerber.org for the report, SREZIC@cpan.org + and d.thomas@its.uq.edu.au for taking part and Yuriy for the patch. + +2.0101 2013-08-15 + - Fixed https://rt.cpan.org/Ticket/Display.html?id=87089 . + - "HTML doctype differs for string/scalar input" + - Thanks to NGLENN for the report and to Yuriy for the tests and + fix. + +2.0100 2013-08-14 + - Added the unique_key() method to XML::LibXML::Node. + - t/40reader.t: assigning from $@ to a lexical so it won't be + over-ridden. + - https://rt.cpan.org/Ticket/Display.html?id=87830 + - Thanks to Douglas Christopher Wilson for the report. + +2.0019 2013-07-01 + - Correct typos reported in RT #86599. + - https://rt.cpan.org/Ticket/Display.html?id=86599 + - Thanks to dsteinbrunner. + +2.0018 2013-05-13 + - Revert previous change of minimal version of libxml2. + - This change proved to be unpopular and didn't prevent + the CPAN test failures. + - By SHLOMIF + +2.0017 2013-05-09 + - Made the minimal version of libxml2 2.9.0 as previous versions were + too buggy due to spuriourous CPAN test failures. + - Please upgrade. + - By SHLOMIF + +2.0016 2013-04-13 + - Don't enable XML_PARSE_HUGE by default. + - Fix the previous version due to a mercurial SNAFU. + +2.0015 2013-04-13 + - Don't enable XML_PARSE_HUGE by default. + - https://bitbucket.org/shlomif/perl-xml-libxml/pull-request/19 + - Thanks to Grant McLean ( https://metacpan.org/author/GRANTM ) for + the bug report and patch. + +2.0014 2012-12-05 + - Got 40reader_mem_error.t to not fetch the external DTDs. + - https://rt.cpan.org/Public/Bug/Display.html?id=81703 + - Thanks to Alexandr Ciornii (CHORNY) for the report and Slaven + Rezic (SREZIC) for the analysis and a proposed fix. + +2.0013 2012-12-04 + - Fix a memory error (double-free) in XML::LibXML::Reader if we reached + EOF and + then called destroy. + - discovered by Shlomi Fish. + - Fixed by Shlomi Fish. + - see t/40reader_mem_error.t + +2.0012 2012-11-09 + - Fix support for references to scalars with overloaded stringification + magic. + - https://rt.cpan.org/Public/Bug/Display.html?id=77864 + - Thanks to Christian Hansen (CHANSEN) for a report, a testcase, and + a patch. + +2.0011 2012-11-08 + - Fix crash in removeChild() when not expanding entities + - https://rt.cpan.org/Ticket/Display.html?id=80395 + - "removeChild() segfaults when not expanding entities" + - Thanks to GUIDO@cpan.org for the report, for a test case (that + was adapted into t/48_removeChild_crashes_rt_80395.t ) and for + a patch to fix it. + +2.0010 2012-11-01 + - Passing debug (an undocumented option) to check_lib in Makefile.PL. + - This way we get more meaningful traces on perl Makefile.PL DEBUG=1. + - Thanks to MSTROUT for the report and a proposed fix. + +2.0009 2012-11-01 + - Fix libxml2 detection in Strawberry Perl. + - Another Devel::CheckOS fallout. + - Thanks to KMX for the report and for a proposed fix. The actual fix + was made to be more generic considering the use-cases. + - https://rt.cpan.org/Ticket/Display.html?id=80540 + +2.0008 2012-10-22 + - Fix build error when using non-standard libxml2 installation + - https://rt.cpan.org/Ticket/Display.html?id=80332 + - Thanks to L RW for the report. + +2.0007 2012-10-17 + - Fix for build failures on Windows with Microsoft Visual C++. + - https://rt.cpan.org/Ticket/Display.html?id=80229 + - Thanks to Desmond Daignault for the report and an initial patch. + - Patch modified by Shlomi Fish + +2.0006 2012-10-13 + - When xml2-config returns several paths, the configuration failed. + Fixed that. + - https://rt.cpan.org/Public/Bug/Display.html?id=80167 + - Thanks to VOVKASM for the report and fix. + +2.0005 2012-10-13 + - Added t/style-trailing-space.t and removed trailing space. + - Add a check for the existence of included C headers (*.h) files + in Makefile.PL to avoid failed compilations. + - Using Devel::CheckLib. + - Thanks to its maintainers! + +2.0004 2012-08-07 + - Add a way to specify a different compiler to be used in the + "Makefile" by calling Makefile.PL with the CC environment variable + set to the path to the alternate compiler. + - This way we can use «CC=/usr/bin/clang perl Makefile.PL» + in order to compile faster. + - LibXML.pm (_clone): Fix typo in line_numbers handling. + - Thanks to Bernhard Reutner-Fischer for the report and fix. + +2.0003 2012-07-27 + - Patch to a potential NULL dereference in xpath.c. + - Thanks to Ville Skyttä and cppcheck. + - Fix NodeList::item() calling a 1-indxed array reference. + - See: + - https://bitbucket.org/shlomif/perl-xml-libxml/pull-request/18 + - Thanks to Tim Brody + - Add the scripts/tag-release.pl script to tag a release using + Mercurial. + +2.0002 2012-07-08 + - Applied spelling fixes correction patch by + Ville Skyttä . + - Thanks, Ville! + +2.0001 2012-06-20 + - Remove the leftover perl-libxml-libxml.h from the distribution. + - https://rt.cpan.org/Ticket/Display.html?id=77924 + - Thanks to Martin Mann for the report. + +2.0000 2012-06-19 + - Fix warnings that appear when compiling using the clang C compiler by + default. + - https://rt.cpan.org/Ticket/Display.html?id=77802 + - Thanks to duvny for the report, and to seldon, doy and Zefram + for their assistance in fixing the warnings. + - Fix tests and run-time errors when Hash::FieldHash is installed + by no longer using Hash::FieldHash. + - https://rt.cpan.org/Ticket/Display.html?id=77576 + - Thanks to hsk@fli-leibniz.de for reporting it, and to + Father Chrysostomos ( http://search.cpan.org/~sprout/ ) and + Mons Anderson for some diagnosis. + +1.99 2012-05-31 + - Apply a patch from Mons Anderson ( mons@cpan.org ) for fixing the + overloading. + - t/62overload.t + - Thanks to Mons. + - Fix test failures (and general functionality) on 64-bit big endian + platforms + - https://rt.cpan.org/Ticket/Display.html?id=77340 + - Thanks to Gregor Herrmann and Niko Tyni from the + Debian Perl group. + +1.98 2012-05-13 + - Make sure parse_string() and load_xml() also accept references to + strings (to avoid unnecessary copying). + - See: https://rt.cpan.org/Ticket/Display.html?id=64051 + +1.97 2012-04-30 + - Apply a test and a fix to correct keep_blanks having no effect on + parse_balanced_chunk. + - fixes https://rt.cpan.org/Ticket/Display.html?id=76696 + - Add t/30keep_blanks.t . + - Thanks to SREZIC for the report, the test and the fix. + +1.96 2012-03-16 + - Apply a patch to add leading minus signs to the commands of + install_sax_driver. + - This makes the make process succeed even if they fail. + - Fixes https://rt.cpan.org/Public/Bug/Display.html?id=75007 + - Thanks to POPEL for the report, and to Petr Pajas for the patch. + - Apply a patch from Tim Brody to skip_all on + t/49callbacks_returning_undef.t when URI.pm's version is below 1.35. + - Thanks to Tim Brody for the patch. + - Fixes the problem reported in http://www.city-fan.org/tips/PaulHowarth/Blog/2011-09-06. + +1.95 2012-03-06 + - Got rid of a broken test (at least with recent libxml2s) in + t/03doc.t : + - https://rt.cpan.org/Ticket/Display.html?id=75403 + - The problem was that the test tested for an undefined XML + namespace, a behaviour which was changed in a recent libxml2 + release. + - Thanks to vcizek for the report. + +1.94 2012-03-03 + - Fix XML::LibXML::Element tests for ineqaulity with == and eq. + - Fixes https://rt.cpan.org/Ticket/Display.html?id=75505 . + - Thanks to Mark Overmeer for the report and for a preliminary patch + to t/71overload.t . + +1.93 2012-02-27 + - Fix XML::LibXML::Element comparison with == and eq. + - Fixes https://rt.cpan.org/Ticket/Display.html?id=75257 , + https://rt.cpan.org/Ticket/Display.html?id=75293 , + https://rt.cpan.org/Ticket/Display.html?id=75259 . + - Thanks to Toby Inkster for a preliminary patch (that was modified by + me) and to the various people who reported the problem. + +1.92 2012-02-21 + - Fix for test failure on perls < 5.10. + - Fixes https://rt.cpan.org/Public/Bug/Display.html?id=75195 + - Thanks to Paul for the report, and for a patch that was not + accepted. + +1.91 2012-02-21 + - Overload hash dereferencing on XML::LibXML::Elements, to provide + access to the element's attributes. + - See XML::LibXML::AttributeHash for details. + - Thanks to Toby Inkster. + - Pull some commits from Toby Inkster to add more convenient methods + to XML::LibXML::NodeList such as sort, map, grep, etc. + - https://bitbucket.org/shlomif/perl-xml-libxml/pull-request/11/xml-libxml-nodelist-improvements + - Thanks, Toby! + - Printed some warnings regardless if DEBUG is on. + - Thanks to http://search.cpan.org/~mstrout/ for the suggestion. + +1.90 2012-01-08 + - Pull a commit from Aaron Crange to fix compilation bugs in Devel.xs: + - local variable declarations must be in the PREINIT section, + not `CODE`, at least for some compiler/OS combinations. + - Thanks, Aaron! + +1.89 2011-12-24 + - Apply a patch with spelling fixes by Kevin Lyda : + - https://rt.cpan.org/Public/Bug/Display.html?id=71403 + - Thanks to Kevin. + - Apply a pull request by ElDiablo with the implementation of + lib/XML/LibXML/Devel.pm . + - Adjust the Win32 Build Instructions in the README file. + - Thanks to Christopher J. Madsen. + +1.88 2011-09-21 + - Add libxml2 2.7.8 as tested and working fine for the Makefile.PL. + (Thanks to H. Merijn Brand.). + - Apply a patch to perl-libxml-sax.c to use xmlChar * instead of char *. + (Thanks to H. Merijn Brand.). + - Correct the README so it won't read XML-LibXML-Common. + - see http://code.activestate.com/lists/perl-xml/8907/ + - Add a patch to implement the no_defdtd option in recent libxml2's: + - https://rt.cpan.org/Ticket/Display.html?id=70878 + - Thanks to zzgrim@gmail.com . + - Add scripts/bump-version-number.pl to modify the version number globally. + - Up to then, the version numbers of the modules under lib/ had + been 1.73. + +1.87 2011-08-27 + - Fix t/49callbacks_returning_undef.t to not read /etc/passed which may + not be valid XML. Instead, we're reading a local file while using + URI::file (assuming it exists - else - we skip_all.) + +1.86 2011-08-25 + - Changed SvPVx_nolen() to SvPV_nolen() in LibXML.xs for better compatibility. + - SvPVx_nolen() appears to be undocumented API. + - Resolves https://rt.cpan.org/Public/Bug/Display.html?id=70476 + - Thanks to Paul for the report. + +1.85 2011-08-24 + - Gracefully handle returned undef()s in the read callback under -w ($^W): + - t/49callbacks_returning_undef.t + - https://rt.cpan.org/Ticket/Display.html?id=70321 + - Add a patch from Mithaldu to get XML::LibXML to compile on Win32: + - https://rt.cpan.org/Ticket/Display.html?id=70141 + - I'm applying it by faith, so if it breaks, blame him. (;-). + - the patch adds -lllibgettextlib.dll to the Makefile.PL. + +1.84 2011-07-23 + - Fix for perl 5.8.x before 5.8.8: + - "You can now use the x operator to repeat a qw// list. This used to raise a syntax error." + - http://search.cpan.org/perldoc?perl588delta + - fixes https://rt.cpan.org/Ticket/Display.html?id=69722 . + - thanks to paul@city-fan.org for the report. + +1.83 2011-07-23 + - Fixed missing declarations after statements: + - resolves https://rt.cpan.org/Ticket/Display.html?id=69622 again. + - thanks to Vadim / VKON. + - Fix docbook source validity + - resolves https://rt.cpan.org/Ticket/Display.html?id=69702 + - thanks to Ville Skytta / SCOP for the patch. + - Applied patch from https://rt.cpan.org/Ticket/Display.html?id=69703 + - [PATCH] Documentation spelling fixes + - thanks to Ville Skytta / SCOP for the patch. + - minor correction by the current maintainer (SHLOMIF). + - Convert t/14sax.t to Counter and Stacker so the tests will be more + reliable. + - SHLOMIF + +1.82 2011-07-20 + - Moved some if blocks after the dSP; (which contains declarations) to be + compliant with C89/C90, which don't allow declarations in the middle of + a C function. + - resolves https://rt.cpan.org/Ticket/Display.html?id=69622 + - thanks to Vadim / VKON. + - Fix https://rt.cpan.org/Ticket/Display.html?id=69553 : + - "install_sax_driver doesn't like custom INSTALLARCHLIB" + - thanks to Milki from U.Cal Berkeley. + +1.81 2011-07-16 + - Add scripts/fast-eumm.pl to remove the explicit objects dependency on + the "Makefile" file so after running scripts/fast-eumm.pl one won't have to + rebuild the C-files. + - Add no warnings 'recursion' to lib/XML/LibXML/Error.pm to get rid of + a "Deep recursion" warning. + - Fix "IDs of elements is lost when importing nodes" + - https://rt.cpan.org/Public/Bug/Display.html?id=69520 + - With t/48importing_nodes_IDs_rt_69520.t . + - Thanks to Yuriy Ustushenko. + - Convert all remaining Test.pm-based test scripts except t/14sax.t to + Test::More . + +1.80 2011-07-12 + - Fix https://rt.cpan.org/Public/Bug/Display.html?id=69082 : + - Compilation on strawberry perl. + - The problem was that stderr required a dTHX; call previously. + - DOM Normalisation patches and a fix for #69096 + - Thanks to Daniel Frett. + - https://rt.cpan.org/Ticket/Display.html?id=69096 + - "findvalue from XML::LibXML 1.74 is very slow (regression)" + - https://bitbucket.org/shlomif/perl-xml-libxml/pull-request/5/normalize-bug-fixes + - Fix https://rt.cpan.org/Ticket/Display.html?id=69433 : + - "t/19die_on_invalid_utf8_rt_58848.t assumes errors will be objects:" + - Thanks to TODDR. + - Failed on older libxml2's. + - Add a skip for t/60error_prev_chain.t in case $@ is true but not a ref. + - https://rt.cpan.org/Ticket/Display.html?id=69435 + - Thanks to TODDR. + - http://www.cpantesters.org/cpan/report/4ac00aae-a73f-11e0-84bd-8881cd42d09c + +1.79 2011-07-08 + - t/46err_column.t : add a skip for a test for CentOS/RHEL 4: + - https://rt.cpan.org/Ticket/Display.html?id=69070 + - old version of libxml2 . + - t/49global_extent.t : fix the double plan (present on libxml2 + below 2.6.27): + - https://rt.cpan.org/Ticket/Display.html?id=69330 + - Thanks to Chris for reporting it. + - double plan in t/61error.t . + - in accordance to the previous change. + +1.78 2011-07-06 + - Change t/02parse.t to test for the localized error message: + - https://rt.cpan.org/Public/Bug/Display.html?id=69248 + - Fix the skip() and 'plan skip_all' syntax in t/06elements.t and + t/49global_extent.t for old versions of XML::LibXML: + - http://www.cpantesters.org/cpan/report/b648ae66-a569-11e0-a41d-a7c8b84ee953 + - It did not match the one specified in Test::More. + - Convert more test scripts from Test.pm to Test::More. + +1.77 2011-07-01 + - Change the signature of XML::LibXML::Reader::byteConsumed to be + "long" instead of "int", so it can return values above 2**31 in + 64-bit platforms. + - should fix https://rt.cpan.org/Ticket/Display.html?id=57085 + - Change "a XML::LibXML::*" to "an XML::LibXML::*" in the documentation. + - Document XML::LibXML::NamedNodeMap : + - https://rt.cpan.org/Ticket/Display.html?id=57652 . + - Add an external entity resolver (for XSLT/etc.): + - Fixing https://rt.cpan.org/Ticket/Display.html?id=69166 . + - Thanks to SAMSK for the patch. + - Add the missing string comparison overload in + lib/XML/LibXML/NodeList.pm : + - https://rt.cpan.org/Ticket/Display.html?id=57737 + - Thanks to MSCHWERN . + - Fix https://rt.cpan.org/Ticket/Display.html?id=58024 : + - <<< In XML::LibXML, warnings are not suppressed when specifying the + recover() or recover_silently() flags as per the following excerpt from + the manpage: >>> + - Now XML-LibXML requires perl-5.8.x (to print to a buffer trick.). + - Thanks to Michael Ludwig for the report. + - Fix https://rt.cpan.org/Ticket/Display.html?id=56671 : + - limit the length of the chain of the previous errors. + - New files: + - t/60error_prev_chain.t + - example/JBR-ALLENtrees.htm + - Thanks to SCOP. + - Fix https://rt.cpan.org/Ticket/Display.html?id=58848 : + - "Malformed UTF-8 character (fatal) at" exception thrown on invalid + UTF-8. + - Thanks to David E. Wheeler (DWHEELER) for the report. + +1.76 2011-06-30 + - Cleaned up t/28new_callbacks_multiple.t - convert to a Counter + and Stacker class. + - After that, the regression test for was added: + - https://rt.cpan.org/Ticket/Display.html?id=51086 + - Already fixed in the trunk. + - Add the file HACKING.txt with style guidelines. + - Fix https://rt.cpan.org/Ticket/Display.html?id=53270 (with a test + in t/49_load_html.t ) - uncovered some more bugs in the process + documented in TODO. + - << suppress_errors option not honored by load_html() method if set in + parser object >> + - Created t/lib/TestHelpers.pm with slurp(), utf8_slurp() and, in the + future, some other routines. + - skipping for LIBXML_RUNTIME_VERSION() *less than* 2.7 instead of + *more than* in t/09xpath.t : + - https://rt.cpan.org/Ticket/Display.html?id=69205 + - Thanks to DOUGW . + +1.75 2011-06-24 + - Correct some typos reported in + - https://rt.cpan.org/Ticket/Display.html?id=54390 + - Fix the handling of XML::LibXML::InputCallbacks at load_xml(). + - https://rt.cpan.org/Ticket/Display.html?id=58190 + - The problem was that the input callbacks were not cloned in + _clone(). + - Apply the patches from https://rt.cpan.org/Ticket/Display.html?id=56334 + - Convert t/02parse.t to Test::More . + - Thanks to TODDR . + - Removed the diag() messages which were annoying. + - Add 'make runtest' and 'make distruntest' targets to run the tests using + Test::Run ( http://beta.metacpan.org/module/Test::Run ). + - Adds colours and stuff like that. + - Add << LICENSE => 'perl' >> to the Makefile.PL for a license + meta-data in the META.YML. + - Feature implementation: joining congruent character data together in + SAX driver . + - Apply a somewhat modified patch from: + - https://rt.cpan.org/Ticket/Display.html?id=52368 + - Add t/pod.t . + - Fix https://rt.cpan.org/Ticket/Display.html?id=55000 : + - Apply modified patch in the bug report. + - << If an element contains both a default namespace declaration and a + second namespace declaration, adding an attribute using the default + namespace declaration will cause that attribute to have the other + prefix. >> + +1.74 2011-06-23 + - More work on the t/*.t test scripts. + - Add scripts/Test.pm-to-Test-More.pl to semi-automatically + convert a test script from Test.pm to Test::More. + - Change NodeSet to NodeList in the documentation of + lib/XML/LibXML/NodeList.pm . + - Resolved https://rt.cpan.org/Ticket/Display.html?id=60998 + - Makefile.PL: now saying we are trying to link against -lm, -lz + and -lxml2 . Not only -lxml2: + - https://rt.cpan.org/Ticket/Display.html?id=51439 + - https://rt.cpan.org/Ticket/Display.html?id=61756 + - << $node = XML::LibXML::Comment( $content ); >> is wrong. + - Documentation: moved away from Indirect-object-notation and added + some missing "my"s: + - http://www.modernperlbooks.com/mt/2009/08/the-problems-with-indirect-object-notation.html + - Fix failing t/01basic.t when compiling against libxml2 that comes from + git. + - https://rt.cpan.org/Public/Bug/Display.html?id=54951 + - Thanks to Evan Carroll ( http://www.evancarroll.com/ ) for the + report. + +1.73 2011-06-18 + - Calculating $err->column() properly, so it won't be maxed out at + 80: + - https://rt.cpan.org/Public/Bug/Display.html?id=66642 + - the context still maxes at 80 (to avoid wasting RAM) but we + still continue past that to get the accurate verdict. + - Thanks to SCOP. + - Update the repository in the documentation to point to the + bitbucket.org one. + - Revamped Makefile.PL: + - Got rid of "\t" characters. + - Add "use strict" and "use warnings". + - Add resources and keywords to the META_MERGE. + - Other changes. + - Fix https://rt.cpan.org/Public/Bug/Display.html?id=53632 : + - << when calling normalize on a node, processing of children nodes + will stop when an empty element node is encountered. >> + - Thanks to Daniel Frett for the patch. + - Apply the patch from Daniel Frett's InputCallbackFix branch. + - a partial fix to https://rt.cpan.org/Public/Bug/Display.html?id=4263 . + - Call two $parser->parse_string() in succession. + - Apply the NestedParsing patch. + - more of https://rt.cpan.org/Public/Bug/Display.html?id=4263 + - Thanks to Daniel Frett for the patch. + + [QUOTE] + Updated how legacy parser local callbacks are utilized by + init_callbacks so that the XML::LibXML::InputCallback object doesn't + have to be temporarily modified during the parsing process. + + This change could break code for users that have subclassed + XML::LibXML::InputCallback and overridden the init_callbacks method + [/QUOTE] + - Documentation fixes patch from Daniel Frett on: + - From https://github.com/frett/perl-libxml . + + +1.72 2011-06-16 + - Removed a stray file from the MANIFEST + - http://rt.cpan.org/Ticket/Display.html?id=68865 + - Warned on "kit not complete". + - Thanks to obrien.jk + +1.71 2011-06-14 + - turn XML_LIBXML_PARSE_DEFAULTS constant to $XML::LibXML::XML_LIBXML_PARSE_DEFAULTS + - Apply 0001-XML-LibXML-Error-no-need-to-AUTOLOAD-domain.patch from + https://rt.cpan.org/Public/Bug/Display.html?id=68575 - no need to + AUTOLOAD 'domain' because a method like that exists. + -- Applied by SHLOMIF. + -- Thanks to Aaron Crane. + - Apply 0002-XML-LibXML-Error-avoid-AUTOLOAD.patch from + https://rt.cpan.org/Public/Bug/Display.html?id=68575 - get rid of + AUTOLOAD completely. + -- Applied by SHLOMIF. + -- Thanks to Aaron Crane. + - Apply 0003-XML-LibXML-Error-make-domain-work-for-unknown-domain.patch + from https://rt.cpan.org/Public/Bug/Display.html?id=68575 - handle + unknown domains. + -- Applied by SHLOMIF. + -- Thanks to Aaron Crane. + - Apply 0004-XML-LibXML-Error-add-domains-from-newer-libxml2.patch + from https://rt.cpan.org/Public/Bug/Display.html?id=68575 - add more + errors. + -- Applied by SHLOMIF. + -- Thanks to Aaron Crane. + - Apply 0005-XML-LibXML-Error-avoid-malformed-UTF-8-warnings.patch + from https://rt.cpan.org/Public/Bug/Display.html?id=68575 + -- Applied by SHLOMIF. + -- Thanks to Aaron Crane. + - In replaceDataString - use + http://perldoc.perl.org/functions/quotemeta.html instead of a long (and + incomplete) list of characters to escape. + -- With test. + -- also fix deleteDataString by making it use replaceDataString + for help. + -- Fixing https://rt.cpan.org/Ticket/Display.html?id=68564 + -- Thanks to Daniel Perrett . + +1.70 Unknown + - various fixes and improvements in the documentation + - added (convenient yet non-standard) methods nonBlankChildNodes, + firstNonBlankChild, nextNonBlankSibling, prevNonBlankSibling + that skip empty or whitespace-only Text and CDATA nodes + - exposed and documented external entity handler + - XPathContext can now be passed to toStringC14N and toStringEC14N + (e.g. to provide NS mapping for the XPath expression) + - avoid using libxml2's globals (Nick Wellnhofer) + - added interface to libxml2's regexp implementation: XML::LibXML::RegExp + - added XML::LibXML->load_xml and XML::LibXML->load_html with + uniform and cleaner API than the old parse_* family + - cleanup code dealing with parsing flags + - fix bogus validation results if revalidating a modified document + - added 'eq' and 'cmp' overloading on XML::LibXML::Error and set fallback to 1 + - lots of bugs fixed + +1.69_2 Unknown + - provide context and more accurate column number in + structured errors + - clarify license and copyright + - support for Win32+mingw+ActiveState + +1.69_1 Unknown + - merge with XML::LibXML::Common + - fix compilation on Windows with mingw or msvc + - fix a bug in structured errors preventing the previous errors from being reported + - fix compilation bugs + - fix encoding problem in reader + - added getAttributeHash to the reader interface + - fix segfaults: reconcileNs in domReplaceChild, findnodes with a doc fragment (S. Rezic) + +1.69 Unknown + - fix incorrect output of getAttributeNS and possibly other methods on UTF-8 + - added $node_or_xpc->exists($xpath) method + - remove accidental debug output from XML::LibXML::SAX::Builder + +1.68 Unknown + - compilation problem fixes + +1.67 Unknown + - many bugfixes (rt.cpan.org) + - added XML::LibXML::Pattern module and extended pattern support in Reader + - added XML::LibXML::XPathExpression module that can pre-compile an XPath + expression + - reimplementation of the thread support (mostly by Tim Brody) + - structured errors XML::LibXML::Error + - memory leak fixes + - documentation fixes + - README - notes for building on Win32 (C.J. Madsen) + +1.66 Unknown + - Perl-thread support contributed by Tim Brody [rt.cpan.org #31945] + - fix [rt.cpan.org #30610] possible segmentation fault when importing nodes from a document to an element created with XML::LibXML::Element->new + - fix [rt.cpan.org #30261] Segmentation fault when extracting elements from an XML chunk + - make Makefile.PL require Perl 5.6.1 + - minor fixes and additions to the documentation + - portability patch from [rt.cpan.org #29627] + - give registered Ns declarations precedence over document-specific ones + in XML::LibXML::XPathContext; fixes [rt.cpan.org #29650] + +1.65 Unknown + - fix bug in t/40reader.t revealed by a bugfix in Test::More 0.71 (Jonathan Rockway) + - fix possible SIGSEGV when PI's or attrs created with + createDocument can get garbage-collected after their owning + document (old-standing bug suddenly caught by XML::Compile regression tests) + - skip tests for unsupported features on unsupported versions of Perl/libxml2 + - make Reader interface require Perl 5.8 (patches to extend to 5.6 are welcome) + +1.64 Unknown + - fix reconciliation of the "xml" namespace [rt.cpan.org #26450] + - make tests pass with libxml2 2.9.29 - PI regression tests now + accept "" as data of an empty PI [rt.cpan.org #27659] + - strip-off UTF8 flag with $node->toString($format,1) for consistent + behavior independent on the actual document encoding + - fix in XML::LibXML::Reader::nextSiblingElment + - fix synopsis for XML::LibXML::Reader + - skip tests that require Encode module if not available (perl 5.6) + - finally removed the iterator() method deprecated since 1.54 + - set_document_locator support in XML::LibXML::SAX::Parser + - SYNOPSIS sections of the docs now mention which module to use + and which other manpage to look into for inherited methods + - XML::LibXML::Namespace API fixed in order to achieve + an agreement between the docs and the implementation + +1.63 Unknown + - added no_network parser flag + - added support for exclusive canonicalization (http://www.w3.org/TR/xml-exc-c14n/) + - make XInclude reflect parser flags + - documentation fixes + - better namespace reconciliation implemented by Tim Brody + - $doc->toString always returns octets + - $doc->actualEncoding returns UTF8 if no document encoding is declared + (unlike $doc->getEncoding, which returns undef) + +1.62 Unknown + - interface to libxml2's pull-parser XML::LibXML::Reader + (initiated by Heiko Klein) + - make error messages intended to the user report the line of the + application call rather than that of the internal XS call + - XML::LibXML::Attr->serializeContent added (convenience function) + - fix getAttributeNode etc. w.r.t. #FIXED attributes (as well as some + cases with old buggy libxml2 versions) + - warn if runtime libxml2 is older than the one used at the compile time + - if compiled against libxml2 >= 2.6.27, new parse_html_* implementation is used + allowing encoding and other options to be passed to the parser + - DOM-compliant nodeNames: #comment, #text, #cdata, #document, #document-fragment + - toString on empty text node returns empty string, not undef + - cloneNode copies attributes on an element as required by the DOM spec + +1.61 Unknown + - get{Elements,Children}By{TagName,TagNameNS,LocalName} now + obey wildcards '*', getChildrenByLocalName was added. + - XML::LibXML::XPathContext merged in + - many new tests added + - the module should now be fully compatibile with libxml2 >= 2.6.16 + (some older versions compile but have problems with namespaced attributes) + - threads test skipped by default + - documentation updates (namely DOM namespace conformance in XML::LibXML::DOM) + - added setNamespaceDecl{URI,Prefix} + - get/setAttribute(NS)? implementation made xmlns aware + - all sub-modules have the same version as XML::LibXML + +1.60 Unknown + - getElementsById corrected to getElementById and the old name kept + as an alias. Also re-implemented without XPath for improved + performance + - DOM Level 3 method $attr->isId() added + - make {get,set,has}Attribute(Node)? methods work with full + attribute names rather than just localnames. + (Although DOM Level 3 is not very clear about the behavior of + these methods for an attributes with namespaces, it certainly + does not imply that getAttribute('foo') should return value of a + bar:foo, which was the old behavior.) + - added publicId and systemId methods to XML::LibXML::Dtd + +1.59 Unknown + - new parser and callback code (Christian Glahn) + - new XML::LibXML::InputCallback class + - many bug fixes (including several memory leaks) + - documentation and regression fixes and enhancements + - Perl wrappers for parse_html_* + - make sure parse_* methods are not called on class (bug 11126) + - DOM Layer 3 conformance fixes: + * lookupNamespaceURI(empty_or_undef) now returns the default NS + - faster getChildrenByTagNameNS implementation + - remove the SGML parser code no longer supported by libxml (Michael Kröll) + +1.58 Unknown + - fixed a pointer initialization in parse_xml_chunk(), fixes + random several segmentation faults on document fragments. + - added NSCLEAN feature to the parser interface (bug 4560) + - minor code cleanups + - updated libxml2 blacklist. + - fixed croak while requesting nodeName() of CDATA sections (bug 1694). + - more documentation updates + +1.57 Unknown + - added cloneNode to XML::LibXML::Document + - include Schema/RelaxNG code only with libxml2 >= 2.6.0 (to support old libxml2) + - applied patch to example/cb_example.pl (bug 4262) + - fixed insertBefore/insertAfter on empty elements (bug 3691) + - more DOM conformant XML::LibXML->createDocument API (by Robin Berjon) + - fixed encoding problems with API calls in document encoding + - improved support for importing DTD subsets + - fixed DTD validation error reporting problems with libxml2-2.6.x + - fixed compilation problems with libxml2-2.6.x + - fixed XML::LibXML::Number to support negative numbers + - added XML Schema validation interface (XML::LibXML::Schema) + - added XML RelaxNG validation interface (XML::LibXML::RelaxNG) + - Michael K. Edwards' patch applied with some amendments from Petr Pajas: + * add debian build files (I added SKIP_SAX_INSTALL flag for + Makefile.PL and changed the patch so that it doesn't disable + sax parser registration completely by default, and rather made + debian build use this flag) + * general cleanup (use SV_nolen, etc.) + * SAX parsers cleanup + * general error reporting code cleanup/rewrite, try preventing + possible memory leaks + * recover(1) now triggers warnings (disable with $SIG{__WARN__}=sub {}) + (fixes bug 1968, too) + * slighlty more strict parse_string behavior (now same as when + parsing fh, etc): e.g. parse_string(""), i.e prefix without + NS declaration, raises error unless recover(1) is used + * documentation fixes/updates + * slightly updated test set to reflect the new slightly more strict + parsing. + - fixed default c14n XPath to include attributes and namespaces (Petr Pajas) + - make libxml2's xmlXPathOrderDocElems available through a new + $doc->indexElements method + - added version information of libxml2 + - Les Richardson's documentation patch applied. + +1.56 Unknown + - added line number interface (thanks to Peter Haworth) + - patch to make perl 5.8.1 and XML::LibXML work together (thanks to François Pons) + - added getElementById to XML::LibXML::Document (thanks to Robin Berjon) + - fixes symbol problem with versions of libxml2 compiled without + thread support (reported by Randal L. Schwartz) + - tiny code clean ups + - corrected tested versions after a local setup problem + +1.55 Unknown + - fixed possible problems with math.h + - added C14N interface "toStringC14N()" (thanks to Chip Turner) + - fixed default namespace bug with libxml2 2.5.8 (by Vaclav Barta) + - fixed a NOOP in the XPath code. + - fixed insertBefore() behaviour to be DOM conform + - fixed a minor problem in Makefile.PL + - improved more documentation + - converted documentation to DocBook + + +1.54 Unknown + - fixed some major bugs, works now with libxml2 2.5.x + - fixed problem with empty document fragments + - bad tag and attribute names cannot be created anymore + - Catalog interface is aware about libxml2 configuration + - XML::LibXML should work now on systems without having zlib installed + - cleaned the error handling code, which + - fixes bad reporting of the validating parser + - fixes bad reporting in xpath functions + - added getElementsBy*Name() functions for the Document Class + - fixed memory management problem introduced in 1.53 + (that fixes a lot strange things) + - interface for raw libxml2 DOM building functions + (currently just addChild() and addNewChild(), others will follow) + - fixed namespace handling if nodes are imported to a new DOM. + - fixed segmentation fault during validation under libxml2 2.4.25 + - fixed bad CDATA handing in XML::LibXML::SAX::Builder + - fixed namespace handing in XML::LibXML::SAX + - fixed attribute handing in XML::LibXML::SAX + - fixed memory leak in XML::LibXML::SAX + - fixed memory leak in XML::LibXML::Document + - fixed segfault while appending entity ref nodes to documents + - fixed some backward compatibility issues + - fixed cloning with namespaces misbehaviour + - fixed parser problems with libxml2 2.5.3+ + - moved iterator classes into a separate package + (after realizing some CPAN testers refuse to read their warnings + from Makefile.PL) + - improved parser testsuite + - improved M + - more documentation + + - *NOTE:* + - Version 1.54 fixes potentional buffer overflows were possible with + - earlier versions of the package. + +1.53 Unknown + Parser + - catalog interface + - enabled SGML parsing + - implemented libxml2 dom recovering + - parsing into GDOME nodes is now possible + - XML::LibXML::SAX is now faster + - made XML::LibXML::SAX parser running without errors in most (all?) cases + (DTD handling is still not implemented). + + DOM interface + - Node Iterator class + - NodeList Iterator class + - introduced XML::GDOME import and export. (EXPERIMENTAL) + - more security checks + + general blur + - removed code shared with XML::GDOME to a separate XML::LibXML::Common + module (check CPAN) + - removed some redundand code + - more documentation (and docu fixes) (thanks to Petr Pajas) + + major fixes: + - possible buffer overflow with broken XML: + This may effect all older versions of XML::LibXML, please upgrade! + + - a bug while replacing the document element. + - very stupid encoding bug. all UTF8 strings will now be marked as + UTF8 correctly + - namespace functions to work with empty namespaces + - toFH() + - namespace setting in XPath functions: + the namespaces of the document element will always be added now + - threaded perl 5.8.0 issues + - calling external entity handlers work again + - XML::LibXML::SAX::Parser will not throw warnings on DTD nodes + +1.52 Unknown + - fixed some typos (thanks to Randy Kobes and Hildo Biersma) + - fixed namespace node handling + - fixed empty Text Node bug + - corrected the parser default values. + - added some documentation + +1.51 Unknown + - fixed parser bug with broken XML declarations + - fixed memory management within documents that have subsets + - fixed some threaded perl issues + (special thanks to Andreas Koenig for the patch) + - applied Win32 tests + (special thanks to Randy Kobes for the patch) + - fixed findnodes() and find() to return empty arrays in array context + if the statement was legal but produced no result. + - fixed namespace handling in xpath functions + - fixed local namespace handling in DOM functions + - pretty formating to all serializing functions + *NOTE* the XML::LibXML::Node::toString interface changed + check the XML::LibXML::Node man page + - made xpath functions verbose to perl (one can wrap evals now) + - improved native SAX interface + - improved XML::LibXML::SAX::Builder + - added getNamespaces to the node interface + - better libxml2 version testing + - more documentation + +1.50 Unknown + - fixed major problems with the validating parser + - fixed default behaviour of the generic parser + - fixed attribute setting of the string parser + - fixed external entity loading for entity expansion + - fixed nodeValue() to handle entities and entity refs correctly + - SAX::Parser ignores now hidden XINCLUDE nodes. + - fixed SAX::Builder to recognize namespace declarations correctly + - compatibility fixes + - importNode() bug fix + - fixed library tests and output in Makefile.PL + - added setOwnerDocument() again + - XML::LibXML::Document::process_xincludes reintroduced + - global callbacks reintroduced + NOTE: the Interface changed here, read XML::LibXML manpage! + - code cleanings + - push parser interface + - basic native libxml2 SAX interface + THIS INTERFACE IS STILL EXPERIMENTAL + - cloneNode clones now within documents + - more documentation + +1.49 Unknown + - memory management has been completely rewritten. + now the module should not cause that many memory leaks + (special thanks to Merijn Broeren and Petr Pajas for providing + testcases) + - more libxml2 functions are used + - DOM API is more Level 3 conform + - ownerDocument fixed + - parser validation bug fixed (reported by Erik Ray) + - made parse_xml_chunk() report errors + - fixed the PI interface + - xpath.pl example + - better namespace support + - improved NamedNodeMap support + - restructured the interfaces + - HTML document nodes are recognized as HTML doc nodes instead of plain nodes + - XML::LibXML::SAX::Parser able to handle HTML docs now + (patch by D. Hageman [dhageman@dracken.com]) + - added serialization flags ($setTagCompression, $skipDtd and + $skipXMLDeclaration) + - more documentation + +1.40 Unknown + - new parsefunction: $parser->parse_xml_chunk($string); + - appendChild( $doc_fragment ) bug fixed + - removed obsolete files (parser.?) + - fixed getElementsByTagName and getElementsByTagNameNS to fit the spec + - new functions in XML::LibXML::Element: + getChildrenByTagName + getChildrenByTagNameNS + getElementsByLocalName + - minor fixes and extensions of the tests + - more docu ;) + - SAX added comment and PI support + - SAX added start_prefix_mapping/end_prefix_mapping + - Fixed find() bug with no results + - Added use IO::Handle so FH reads work + - A number of segfault fixes + - constants added without XML_ prefix + +1.31 Unknown + - Removed C-layer parser implementation. + - Added support for prefixes in find* functions + - More memory leak fixes (in custom DOMs) + - Allow global callbacks + +1.30 Unknown + - Full PI access + - New parser implementation (safer) + - Callbacks API changed to be on the object, not the class + - SAX uses XML::SAX now (required) + - Memory leak fixes + - applied a bunch of patches provided by T.J. Mather + +1.00 Unknown + - Added SAX serialisation + - Added a SAX builder module + - Fixed findnodes in scalar context to return a NodeList object + - Added findvalue($xpath) + - Added find(), which returns different things depending on the XPath + - Added Boolean, Number and Literal data types + +0.99 Unknown + - Added support for $doc->URI getter/setter + +0.98 Unknown + - New have_library implementation + +0.97 Unknown + - Addition of Dtd string parser + - Added support for namespace nodes (e.g. $element->getNamespaces()) + - Some memory leak and segfault fixes + - Added $doc->validate([$dtd]) which throws exceptions (augments + $doc->is_valid([$dtd])) + - Added doc files and test files to CPAN distro + +0.96 Unknown + - Addition of HTML parser + - getOwner method added + - Element->getAttributes() added + - Element->getAttributesNS(URI) added + - Documentation updates + - Memory leak fixes + - Bug Fixes + +0.94 Unknown + - Some DOM Level 2 cleanups + - getParentNode returns XML::LibXML::Document if we get the + document node + +0.93 Unknown + - Addition of DOM Level 2 APIs + - some more segfault fixes + - Document is now a Node (which makes lots of things easier) + +0.92 Unknown + - Many segfault and other bug fixes + - More DOM API methods added + +0.91 Unknown + - Removed from XML::LibXSLT distribution + - Added DOM API (phish) + +0.01 2001-03-03 + - original version; created by h2xs 1.19 + diff --git a/Devel.xs b/Devel.xs new file mode 100644 index 0000000..23e8beb --- /dev/null +++ b/Devel.xs @@ -0,0 +1,128 @@ +/* $Id: Devel.xs 20 2011-10-11 02:05:01Z jo $ + * + * This is free software, you may use it and distribute it under the same terms as + * Perl itself. + * + * Copyright 2011 Joachim Zobel + * + * This module gives external access to the functions needed to create + * and use XML::LibXML::Nodes from C functions. These functions are made + * accessible from Perl to have cleaner dependencies. + * The idea is to pass xmlNode * pointers (as typemapped void *) to and + * from Perl and call the functions that turns them to and from + * XML::LibXML::Nodes there. + * + * Be aware that using this module gives you the ability to easily create + * segfaults and memory leaks. + */ + +#include "EXTERN.h" +#include "perl.h" +#include "XSUB.h" + +#include "ppport.h" + +#include + +/* XML::LibXML stuff */ +#include +#include "perl-libxml-mm.h" + +#undef NDEBUG +#include + +static void * xmlMemMallocAtomic(size_t size) +{ + return xmlMallocAtomicLoc(size, "none", 0); +} + +static int debug_memory() +{ + return xmlGcMemSetup( xmlMemFree, + xmlMemMalloc, + xmlMemMallocAtomic, + xmlMemRealloc, + xmlMemStrdup); +} + +MODULE = XML::LibXML::Devel PACKAGE = XML::LibXML::Devel + +PROTOTYPES: DISABLE + +BOOT: + if (getenv("DEBUG_MEMORY")) { + debug_memory(); + } + + + +SV* +node_to_perl( n, o = NULL ) + void * n + void * o + PREINIT: + xmlNode *node = n; + xmlNode *owner = o; + CODE: + RETVAL = PmmNodeToSv(node , owner?owner->_private:NULL ); + OUTPUT: + RETVAL + +void * +node_from_perl( sv ) + SV *sv + PREINIT: + xmlNode *n = PmmSvNodeExt(sv, 0); + CODE: + RETVAL = n; + OUTPUT: + RETVAL + +void +refcnt_inc( n ) + void *n + PREINIT: + xmlNode *node = n; + CODE: + PmmREFCNT_inc(((ProxyNode *)(node->_private))); + +int +refcnt_dec( n ) + void *n + PREINIT: + xmlNode *node = n; + CODE: + RETVAL = PmmREFCNT_dec(((ProxyNode *)(node->_private))); + OUTPUT: + RETVAL + +int +refcnt( n ) + void *n + PREINIT: + xmlNode *node = n; + CODE: + RETVAL = PmmREFCNT(((ProxyNode *)(node->_private))); + OUTPUT: + RETVAL + +int +fix_owner( n, p ) + void * n + void * p + PREINIT: + xmlNode *node = n; + xmlNode *parent = p; + CODE: + RETVAL = PmmFixOwner(node->_private , parent->_private); + OUTPUT: + RETVAL + +int +mem_used() + CODE: + RETVAL = xmlMemUsed(); + OUTPUT: + RETVAL + + diff --git a/HACKING.txt b/HACKING.txt new file mode 100644 index 0000000..3667c02 --- /dev/null +++ b/HACKING.txt @@ -0,0 +1,331 @@ +Coding Style and Conventions for Shlomi Fish’s Projects +======================================================= +Shlomi Fish +:Date: 2012-05-14 +:Revision: $Id$ + +Perl Style Guidelines +--------------------- + +Use Test::More for test scripts while using Test::Count annotations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +One should use Test::More for new test scripts, while using Test::Count +( https://metacpan.org/module/Test::Count ) "# TEST" annotations. Some +of the old test scripts under +t/*.t+ had used Test.pm, but they +have all been converted to Test::More, which should be used for new code. + +Any bug fixes or feature addition patches should be accompanied with +a test script to test the code. + +Avoid trailing statement modifiers +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +One should not use trailing "if"s "while"s "until"s, etc. + +Bad: + +---------------- +print "Hello\n" if $cond; +---------------- + +Good: + +---------------- +if ($cond) +{ + print "Hello\n"; +} +---------------- + +Avoid until and unless +~~~~~~~~~~~~~~~~~~~~~~ + +"until" and "unless" should be spelled using "if !" or "while !" or +alternatively "if not" or "while not". + +Make sure you update the "MANIFEST" file with any new source files +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +All the new source files should be places in the "MANIFEST" file in the core +distribution. Note that I am considering to make use of "MANIFEST.SKIP" +instead, which would not necessitate that in general. + +Make sure to update the "Changes" (or equivalently named) file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A patch should also patch the "Changes" file (whose name may vary) with the +explanation of the change. A Changes file should not be automatically +generated. Note that due to historical reasons, the exact format of the Changes +varies between different projects of mine and you should try to emulate the +style and format of the one of the CPAN distribution in question. + +Test programs should not connect to Internet resources +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +As a general rule, test programs should not connect to Internet resources +(such as global web-sites) using LWP or WWW::Mechanize or whatever, and +should rely only on local resources. The reasons for that are that relying +on such Internet resources: + +* May fail if the machine does not have a fully open Internet connection. + +* Will add load to the hosts in question. + +* Such Internet resources can fluctuate in their content and behaviour, +which may break the tests. + +Other elements to avoid +~~~~~~~~~~~~~~~~~~~~~~~ + +See http://perl-begin.org/tutorials/bad-elements/ . + +C Style Guidelines +------------------ + +Here are some style guidelines for new code to be accepted into XML-LibXML: + +4 Spaces for Indentation +~~~~~~~~~~~~~~~~~~~~~~~~ + +The source code should be kept free of horizontal +tabs (\t, HT, \x09) and use spaces alone. Furthermore, there should be +a 4 wide space indentation inside blocks: + +---------------- +if (COND()) +{ + int i; + + printf("%s\n", "COND() is successful!"); + + for (i=0 ; i < 10 ; i++) + { + ... + } +} +---------------- + +Curly Braces Alignment +~~~~~~~~~~~~~~~~~~~~~~ + +The opening curly brace of an if-statement or a for-statement should be +placed below the statement on the same level as the other line, and the +inner block indented by 4 spaces. A good example can be found in the previous +section. Here are some bad examples: + +---------------- +if ( COND() ) { + /* Bad because the opening brace is on the same line. +} +---------------- + +---------------- +if ( COND() ) + { + /* Bad because the left and right braces are indented along with + the block. */ + printf(....) + } +---------------- + +---------------- +/* GNU Style - fear and loathing. */ +if ( COND() ) + { + printf(....) + } +---------------- + +Comments should precede the lines performing the action +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Comments should come one line before the line that they explain: + +---------------- +/* Check if it can be moved to something on the same stack */ +for(dc=0;dc 'http://www.w3.org/2000/xmlns/'; +use constant XML_XML_NS => 'http://www.w3.org/XML/1998/namespace'; + +use XML::LibXML::Error; +use XML::LibXML::NodeList; +use XML::LibXML::XPathContext; +use IO::Handle; # for FH reads called as methods + +BEGIN { +$VERSION = "2.0207"; # VERSION TEMPLATE: DO NOT CHANGE +$ABI_VERSION = 2; +require Exporter; +use XSLoader (); +@ISA = qw(Exporter); + +use vars qw($__PROXY_NODE_REGISTRY $__threads_shared $__PROXY_NODE_REGISTRY_MUTEX $__loaded); + +sub VERSION { + my $class = shift; + my ($caller) = caller; + my $req_abi = $ABI_VERSION; + if (UNIVERSAL::can($caller,'REQUIRE_XML_LIBXML_ABI_VERSION')) { + $req_abi = $caller->REQUIRE_XML_LIBXML_ABI_VERSION(); + } elsif ($caller eq 'XML::LibXSLT') { + # XML::LibXSLT without REQUIRE_XML_LIBXML_ABI_VERSION is an old and incompatible version + $req_abi = 1; + } + unless ($req_abi == $ABI_VERSION) { + my $ver = @_ ? ' '.$_[0] : ''; + die ("This version of $caller requires XML::LibXML$ver (ABI $req_abi), which is incompatible with currently installed XML::LibXML $VERSION (ABI $ABI_VERSION). Please upgrade $caller, XML::LibXML, or both!"); + } + return $class->UNIVERSAL::VERSION(@_) +} + +#-------------------------------------------------------------------------# +# export information # +#-------------------------------------------------------------------------# +%EXPORT_TAGS = ( + all => [qw( + XML_ELEMENT_NODE + XML_ATTRIBUTE_NODE + XML_TEXT_NODE + XML_CDATA_SECTION_NODE + XML_ENTITY_REF_NODE + XML_ENTITY_NODE + XML_PI_NODE + XML_COMMENT_NODE + XML_DOCUMENT_NODE + XML_DOCUMENT_TYPE_NODE + XML_DOCUMENT_FRAG_NODE + XML_NOTATION_NODE + XML_HTML_DOCUMENT_NODE + XML_DTD_NODE + XML_ELEMENT_DECL + XML_ATTRIBUTE_DECL + XML_ENTITY_DECL + XML_NAMESPACE_DECL + XML_XINCLUDE_END + XML_XINCLUDE_START + encodeToUTF8 + decodeFromUTF8 + XML_XMLNS_NS + XML_XML_NS + )], + libxml => [qw( + XML_ELEMENT_NODE + XML_ATTRIBUTE_NODE + XML_TEXT_NODE + XML_CDATA_SECTION_NODE + XML_ENTITY_REF_NODE + XML_ENTITY_NODE + XML_PI_NODE + XML_COMMENT_NODE + XML_DOCUMENT_NODE + XML_DOCUMENT_TYPE_NODE + XML_DOCUMENT_FRAG_NODE + XML_NOTATION_NODE + XML_HTML_DOCUMENT_NODE + XML_DTD_NODE + XML_ELEMENT_DECL + XML_ATTRIBUTE_DECL + XML_ENTITY_DECL + XML_NAMESPACE_DECL + XML_XINCLUDE_END + XML_XINCLUDE_START + )], + encoding => [qw( + encodeToUTF8 + decodeFromUTF8 + )], + ns => [qw( + XML_XMLNS_NS + XML_XML_NS + )], + ); + +@EXPORT_OK = ( + @{$EXPORT_TAGS{all}}, + ); + +@EXPORT = ( + @{$EXPORT_TAGS{all}}, + ); + +#-------------------------------------------------------------------------# +# initialization of the global variables # +#-------------------------------------------------------------------------# +$skipDTD = 0; +$skipXMLDeclaration = 0; +$setTagCompression = 0; + +$MatchCB = undef; +$ReadCB = undef; +$OpenCB = undef; +$CloseCB = undef; + +# if ($threads::threads) { +# our $__THREADS_TID = 0; +# eval q{ +# use threads::shared; +# our $__PROXY_NODE_REGISTRY_MUTEX :shared = 0; +# }; +# die $@ if $@; +# } +#-------------------------------------------------------------------------# +# bootstrapping # +#-------------------------------------------------------------------------# +XSLoader::load( 'XML::LibXML', $VERSION ); +undef &AUTOLOAD; + +*encodeToUTF8 = \&XML::LibXML::Common::encodeToUTF8; +*decodeFromUTF8 = \&XML::LibXML::Common::decodeFromUTF8; + +} # BEGIN + + +#-------------------------------------------------------------------------# +# libxml2 node names (see also XML::LibXML::Common # +#-------------------------------------------------------------------------# +use constant XML_ELEMENT_NODE => 1; +use constant XML_ATTRIBUTE_NODE => 2; +use constant XML_TEXT_NODE => 3; +use constant XML_CDATA_SECTION_NODE => 4; +use constant XML_ENTITY_REF_NODE => 5; +use constant XML_ENTITY_NODE => 6; +use constant XML_PI_NODE => 7; +use constant XML_COMMENT_NODE => 8; +use constant XML_DOCUMENT_NODE => 9; +use constant XML_DOCUMENT_TYPE_NODE => 10; +use constant XML_DOCUMENT_FRAG_NODE => 11; +use constant XML_NOTATION_NODE => 12; +use constant XML_HTML_DOCUMENT_NODE => 13; +use constant XML_DTD_NODE => 14; +use constant XML_ELEMENT_DECL => 15; +use constant XML_ATTRIBUTE_DECL => 16; +use constant XML_ENTITY_DECL => 17; +use constant XML_NAMESPACE_DECL => 18; +use constant XML_XINCLUDE_START => 19; +use constant XML_XINCLUDE_END => 20; + + +sub import { + my $package=shift; + if (grep /^:threads_shared$/, @_) { + require threads; + if (!defined($__threads_shared)) { + if (INIT_THREAD_SUPPORT()) { + eval q{ + use threads::shared; + share($__PROXY_NODE_REGISTRY_MUTEX); + }; + if ($@) { # something went wrong + DISABLE_THREAD_SUPPORT(); # leave the library in a usable state + die $@; # and die + } + $__PROXY_NODE_REGISTRY = XML::LibXML::HashTable->new(); + $__threads_shared=1; + } else { + croak("XML::LibXML or Perl compiled without ithread support!"); + } + } elsif (!$__threads_shared) { + croak("XML::LibXML already loaded without thread support. Too late to enable thread support!"); + } + } elsif (defined $XML::LibXML::__loaded) { + $__threads_shared=0 if not defined $__threads_shared; + } + __PACKAGE__->export_to_level(1,$package,grep !/^:threads(_shared)?$/,@_); +} + +sub threads_shared_enabled { + return $__threads_shared ? 1 : 0; +} + +# if ($threads::threads) { +# our $__PROXY_NODE_REGISTRY = XML::LibXML::HashTable->new(); +# } + +#-------------------------------------------------------------------------# +# test exact version (up to patch-level) # +#-------------------------------------------------------------------------# +{ + my ($runtime_version) = LIBXML_RUNTIME_VERSION() =~ /^(\d+)/; + if ( $runtime_version < LIBXML_VERSION ) { + warn "Warning: XML::LibXML compiled against libxml2 ".LIBXML_VERSION. + ", but runtime libxml2 is older $runtime_version\n"; + } +} + + +#-------------------------------------------------------------------------# +# parser flags # +#-------------------------------------------------------------------------# + +# Copied directly from http://xmlsoft.org/html/libxml-parser.html#xmlParserOption +use constant { + XML_PARSE_RECOVER => 1, # recover on errors + XML_PARSE_NOENT => 2, # substitute entities + XML_PARSE_DTDLOAD => 4, # load the external subset + XML_PARSE_DTDATTR => 8, # default DTD attributes + XML_PARSE_DTDVALID => 16, # validate with the DTD + XML_PARSE_NOERROR => 32, # suppress error reports + XML_PARSE_NOWARNING => 64, # suppress warning reports + XML_PARSE_PEDANTIC => 128, # pedantic error reporting + XML_PARSE_NOBLANKS => 256, # remove blank nodes + XML_PARSE_SAX1 => 512, # use the SAX1 interface internally + XML_PARSE_XINCLUDE => 1024, # Implement XInclude substitution + XML_PARSE_NONET => 2048, # Forbid network access + XML_PARSE_NODICT => 4096, # Do not reuse the context dictionary + XML_PARSE_NSCLEAN => 8192, # remove redundant namespaces declarations + XML_PARSE_NOCDATA => 16384, # merge CDATA as text nodes + XML_PARSE_NOXINCNODE => 32768, # do not generate XINCLUDE START/END nodes + XML_PARSE_COMPACT => 65536, # compact small text nodes; no modification of the tree allowed afterwards + # (will possibly crash if you try to modify the tree) + XML_PARSE_OLD10 => 131072, # parse using XML-1.0 before update 5 + XML_PARSE_NOBASEFIX => 262144, # do not fixup XINCLUDE xml#base uris + XML_PARSE_HUGE => 524288, # relax any hardcoded limit from the parser + XML_PARSE_OLDSAX => 1048576, # parse using SAX2 interface from before 2.7.0 + HTML_PARSE_RECOVER => (1<<0), # suppress error reports + HTML_PARSE_NOERROR => (1<<5), # suppress error reports +}; + +$XML_LIBXML_PARSE_DEFAULTS = ( XML_PARSE_NODICT ); + +# this hash is made global so that applications can add names for new +# libxml2 parser flags as temporary workaround + +%PARSER_FLAGS = ( + recover => XML_PARSE_RECOVER, + expand_entities => XML_PARSE_NOENT, + load_ext_dtd => XML_PARSE_DTDLOAD, + complete_attributes => XML_PARSE_DTDATTR, + validation => XML_PARSE_DTDVALID, + suppress_errors => XML_PARSE_NOERROR, + suppress_warnings => XML_PARSE_NOWARNING, + pedantic_parser => XML_PARSE_PEDANTIC, + no_blanks => XML_PARSE_NOBLANKS, + expand_xinclude => XML_PARSE_XINCLUDE, + xinclude => XML_PARSE_XINCLUDE, + no_network => XML_PARSE_NONET, + clean_namespaces => XML_PARSE_NSCLEAN, + no_cdata => XML_PARSE_NOCDATA, + no_xinclude_nodes => XML_PARSE_NOXINCNODE, + old10 => XML_PARSE_OLD10, + no_base_fix => XML_PARSE_NOBASEFIX, + huge => XML_PARSE_HUGE, + oldsax => XML_PARSE_OLDSAX, +); + +my %OUR_FLAGS = ( + recover => 'XML_LIBXML_RECOVER', + line_numbers => 'XML_LIBXML_LINENUMBERS', + URI => 'XML_LIBXML_BASE_URI', + base_uri => 'XML_LIBXML_BASE_URI', + gdome => 'XML_LIBXML_GDOME', + ext_ent_handler => 'ext_ent_handler', +); + +sub _parser_options { + my ($self, $opts) = @_; + + # currently dictionaries break XML::LibXML memory management + + my $flags; + + if (ref($self)) { + $flags = ($self->{XML_LIBXML_PARSER_OPTIONS}||0); + } else { + $flags = $XML_LIBXML_PARSE_DEFAULTS; # safety precaution + } + + my ($key, $value); + while (($key,$value) = each %$opts) { + my $f = $PARSER_FLAGS{ $key }; + if (defined $f) { + if ($value) { + $flags |= $f + } else { + $flags &= ~$f; + } + } elsif ($key eq 'set_parser_flags') { # this can be used to pass flags XML::LibXML does not yet know about + $flags |= $value; + } elsif ($key eq 'unset_parser_flags') { + $flags &= ~$value; + } + + } + return $flags; +} + +my %compatibility_flags = ( + XML_LIBXML_VALIDATION => 'validation', + XML_LIBXML_EXPAND_ENTITIES => 'expand_entities', + XML_LIBXML_PEDANTIC => 'pedantic_parser', + XML_LIBXML_NONET => 'no_network', + XML_LIBXML_EXT_DTD => 'load_ext_dtd', + XML_LIBXML_COMPLETE_ATTR => 'complete_attributes', + XML_LIBXML_EXPAND_XINCLUDE => 'expand_xinclude', + XML_LIBXML_NSCLEAN => 'clean_namespaces', + XML_LIBXML_KEEP_BLANKS => 'keep_blanks', + XML_LIBXML_LINENUMBERS => 'line_numbers', +); + +#-------------------------------------------------------------------------# +# parser constructor # +#-------------------------------------------------------------------------# + + +sub new { + my $class = shift; + my $self = bless { + }, $class; + if (@_) { + my %opts = (); + if (ref($_[0]) eq 'HASH') { + %opts = %{$_[0]}; + } else { + # old interface + my %args = @_; + %opts=( + map { + (($compatibility_flags{ $_ }||$_) => $args{ $_ }) + } keys %args + ); + } + # parser flags + $opts{no_blanks} = !$opts{keep_blanks} if exists($opts{keep_blanks}) and !exists($opts{no_blanks}); + $opts{load_ext_dtd} = $opts{expand_entities} if exists($opts{expand_entities}) and !exists($opts{load_ext_dtd}); + + for (keys %OUR_FLAGS) { + $self->{$OUR_FLAGS{$_}} = delete $opts{$_}; + } + $class->load_catalog(delete($opts{catalog})) if $opts{catalog}; + + $self->{XML_LIBXML_PARSER_OPTIONS} = XML::LibXML->_parser_options(\%opts); + + # store remaining unknown options directly in $self + for (keys %opts) { + $self->{$_}=$opts{$_} unless exists $PARSER_FLAGS{$_}; + } + } else { + $self->{XML_LIBXML_PARSER_OPTIONS} = $XML_LIBXML_PARSE_DEFAULTS; + } + if ( defined $self->{Handler} ) { + $self->set_handler( $self->{Handler} ); + } + + $self->{_State_} = 0; + return $self; +} + +sub _clone { + my ($self)=@_; + my $new = ref($self)->new({ + recover => $self->{XML_LIBXML_RECOVER}, + line_numbers => $self->{XML_LIBXML_LINENUMBERS}, + base_uri => $self->{XML_LIBXML_BASE_URI}, + gdome => $self->{XML_LIBXML_GDOME}, + }); + # The parser options may contain some options that were zeroed from the + # defaults so set_parser_flags won't work here. We need to assign them + # explicitly. + $new->{XML_LIBXML_PARSER_OPTIONS} = $self->{XML_LIBXML_PARSER_OPTIONS}; + $new->input_callbacks($self->input_callbacks()); + return $new; +} + +#-------------------------------------------------------------------------# +# Threads support methods # +#-------------------------------------------------------------------------# + +# threads doc says CLONE's API may change in future, which would break +# an XS method prototype +sub CLONE { + if ($XML::LibXML::__threads_shared) { + XML::LibXML::_CLONE( $_[0] ); + } +} + +sub CLONE_SKIP { + return $XML::LibXML::__threads_shared ? 0 : 1; +} + +sub __proxy_registry { + my ($class)=caller; + die "This version of $class uses API of XML::LibXML 1.66 which is not compatible with XML::LibXML $VERSION. Please upgrade $class!\n"; +} + +#-------------------------------------------------------------------------# +# DOM Level 2 document constructor # +#-------------------------------------------------------------------------# + +sub createDocument { + my $self = shift; + if (!@_ or $_[0] =~ m/^\d\.\d$/) { + # for backward compatibility + return XML::LibXML::Document->new(@_); + } + else { + # DOM API: createDocument(namespaceURI, qualifiedName, doctype?) + my $doc = XML::LibXML::Document-> new; + my $el = $doc->createElementNS(shift, shift); + $doc->setDocumentElement($el); + $doc->setExternalSubset(shift) if @_; + return $doc; + } +} + +#-------------------------------------------------------------------------# +# callback functions # +#-------------------------------------------------------------------------# + +sub externalEntityLoader(&) +{ + return _externalEntityLoader($_[0]); +} + +sub input_callbacks { + my $self = shift; + my $icbclass = shift; + + if ( defined $icbclass ) { + $self->{XML_LIBXML_CALLBACK_STACK} = $icbclass; + } + return $self->{XML_LIBXML_CALLBACK_STACK}; +} + +sub match_callback { + my $self = shift; + if ( ref $self ) { + if ( scalar @_ ) { + $self->{XML_LIBXML_MATCH_CB} = shift; + $self->{XML_LIBXML_CALLBACK_STACK} = undef; + } + return $self->{XML_LIBXML_MATCH_CB}; + } + else { + $MatchCB = shift if scalar @_; + return $MatchCB; + } +} + +sub read_callback { + my $self = shift; + if ( ref $self ) { + if ( scalar @_ ) { + $self->{XML_LIBXML_READ_CB} = shift; + $self->{XML_LIBXML_CALLBACK_STACK} = undef; + } + return $self->{XML_LIBXML_READ_CB}; + } + else { + $ReadCB = shift if scalar @_; + return $ReadCB; + } +} + +sub close_callback { + my $self = shift; + if ( ref $self ) { + if ( scalar @_ ) { + $self->{XML_LIBXML_CLOSE_CB} = shift; + $self->{XML_LIBXML_CALLBACK_STACK} = undef; + } + return $self->{XML_LIBXML_CLOSE_CB}; + } + else { + $CloseCB = shift if scalar @_; + return $CloseCB; + } +} + +sub open_callback { + my $self = shift; + if ( ref $self ) { + if ( scalar @_ ) { + $self->{XML_LIBXML_OPEN_CB} = shift; + $self->{XML_LIBXML_CALLBACK_STACK} = undef; + } + return $self->{XML_LIBXML_OPEN_CB}; + } + else { + $OpenCB = shift if scalar @_; + return $OpenCB; + } +} + +sub callbacks { + my $self = shift; + if ( ref $self ) { + if (@_) { + my ($match, $open, $read, $close) = @_; + @{$self}{qw(XML_LIBXML_MATCH_CB XML_LIBXML_OPEN_CB XML_LIBXML_READ_CB XML_LIBXML_CLOSE_CB)} = ($match, $open, $read, $close); + $self->{XML_LIBXML_CALLBACK_STACK} = undef; + } + else { + return @{$self}{qw(XML_LIBXML_MATCH_CB XML_LIBXML_OPEN_CB XML_LIBXML_READ_CB XML_LIBXML_CLOSE_CB)}; + } + } + else { + if (@_) { + ( $MatchCB, $OpenCB, $ReadCB, $CloseCB ) = @_; + } + else { + return ( $MatchCB, $OpenCB, $ReadCB, $CloseCB ); + } + } +} + +#-------------------------------------------------------------------------# +# internal member variable manipulation # +#-------------------------------------------------------------------------# +sub __parser_option { + my ($self, $opt) = @_; + if (@_>2) { + if ($_[2]) { + $self->{XML_LIBXML_PARSER_OPTIONS} |= $opt; + return 1; + } else { + $self->{XML_LIBXML_PARSER_OPTIONS} &= ~$opt; + return 0; + } + } else { + return ($self->{XML_LIBXML_PARSER_OPTIONS} & $opt) ? 1 : 0; + } +} + +sub option_exists { + my ($self,$name)=@_; + return ($PARSER_FLAGS{$name} || $OUR_FLAGS{$name}) ? 1 : 0; +} +sub get_option { + my ($self,$name)=@_; + my $flag = $OUR_FLAGS{$name}; + return $self->{$flag} if $flag; + $flag = $PARSER_FLAGS{$name}; + return $self->__parser_option($flag) if $flag; + warn "XML::LibXML::get_option: unknown parser option $name\n"; + return undef; +} +sub set_option { + my ($self,$name,$value)=@_; + my $flag = $OUR_FLAGS{$name}; + return ($self->{$flag}=$value) if $flag; + $flag = $PARSER_FLAGS{$name}; + return $self->__parser_option($flag,$value) if $flag; + warn "XML::LibXML::get_option: unknown parser option $name\n"; + return undef; +} +sub set_options { + my $self=shift; + my $opts; + if (@_==1 and ref($_[0]) eq 'HASH') { + $opts = $_[0]; + } elsif (@_ % 2 == 0) { + $opts={@_}; + } else { + croak("Odd number of elements passed to set_options"); + } + $self->set_option($_=>$opts->{$_}) foreach keys %$opts; + return; +} + +sub validation { + my $self = shift; + return $self->__parser_option(XML_PARSE_DTDVALID,@_); +} + +sub recover { + my $self = shift; + if (scalar @_) { + $self->{XML_LIBXML_RECOVER} = $_[0]; + $self->__parser_option(XML_PARSE_RECOVER,@_); + } + return $self->{XML_LIBXML_RECOVER}; +} + +sub recover_silently { + my $self = shift; + my $arg = shift; + if ( defined($arg) ) + { + $self->recover(($arg == 1) ? 2 : $arg); + } + return (($self->recover()||0) == 2) ? 1 : 0; +} + +sub expand_entities { + my $self = shift; + if (scalar(@_) and $_[0]) { + return $self->__parser_option(XML_PARSE_NOENT | XML_PARSE_DTDLOAD,1); + } + return $self->__parser_option(XML_PARSE_NOENT,@_); +} + +sub keep_blanks { + my $self = shift; + my @args; # we have to negate the argument and return negated value, since + # the actual flag is no_blanks + if (scalar @_) { + @args=($_[0] ? 0 : 1); + } + return $self->__parser_option(XML_PARSE_NOBLANKS,@args) ? 0 : 1; +} + +sub pedantic_parser { + my $self = shift; + return $self->__parser_option(XML_PARSE_PEDANTIC,@_); +} + +sub line_numbers { + my $self = shift; + $self->{XML_LIBXML_LINENUMBERS} = shift if scalar @_; + return $self->{XML_LIBXML_LINENUMBERS}; +} + +sub no_network { + my $self = shift; + return $self->__parser_option(XML_PARSE_NONET,@_); +} + +sub load_ext_dtd { + my $self = shift; + return $self->__parser_option(XML_PARSE_DTDLOAD,@_); +} + +sub complete_attributes { + my $self = shift; + return $self->__parser_option(XML_PARSE_DTDATTR,@_); +} + +sub expand_xinclude { + my $self = shift; + return $self->__parser_option(XML_PARSE_XINCLUDE,@_); +} + +sub base_uri { + my $self = shift; + $self->{XML_LIBXML_BASE_URI} = shift if scalar @_; + return $self->{XML_LIBXML_BASE_URI}; +} + +sub gdome_dom { + my $self = shift; + $self->{XML_LIBXML_GDOME} = shift if scalar @_; + return $self->{XML_LIBXML_GDOME}; +} + +sub clean_namespaces { + my $self = shift; + return $self->__parser_option(XML_PARSE_NSCLEAN,@_); +} + +#-------------------------------------------------------------------------# +# set the optional SAX(2) handler # +#-------------------------------------------------------------------------# +sub set_handler { + my $self = shift; + if ( defined $_[0] ) { + $self->{HANDLER} = $_[0]; + + $self->{SAX_ELSTACK} = []; + $self->{SAX} = {State => 0}; + } + else { + # undef SAX handling + $self->{SAX_ELSTACK} = []; + delete $self->{HANDLER}; + delete $self->{SAX}; + } +} + +#-------------------------------------------------------------------------# +# helper functions # +#-------------------------------------------------------------------------# +sub _auto_expand { + my ( $self, $result, $uri ) = @_; + + $result->setBaseURI( $uri ) if defined $uri; + + if ( $self->expand_xinclude ) { + $self->{_State_} = 1; + eval { $self->processXIncludes($result); }; + my $err = $@; + $self->{_State_} = 0; + if ($err) { + $self->_cleanup_callbacks(); + $result = undef; + croak $err; + } + } + return $result; +} + +sub _init_callbacks { + my $self = shift; + my $icb = $self->{XML_LIBXML_CALLBACK_STACK}; + unless ( defined $icb ) { + $self->{XML_LIBXML_CALLBACK_STACK} = XML::LibXML::InputCallback->new(); + $icb = $self->{XML_LIBXML_CALLBACK_STACK}; + } + + $icb->init_callbacks($self); +} + +sub _cleanup_callbacks { + my $self = shift; + $self->{XML_LIBXML_CALLBACK_STACK}->cleanup_callbacks(); +} + +sub __read { + read($_[0], $_[1], $_[2]); +} + +sub __write { + if ( ref( $_[0] ) ) { + $_[0]->write( $_[1], $_[2] ); + } + else { + $_[0]->write( $_[1] ); + } +} + +sub load_xml { + my $class_or_self = shift; + my %args = map { ref($_) eq 'HASH' ? (%$_) : $_ } @_; + + my $URI = delete($args{URI}); + $URI = "$URI" if defined $URI; # stringify in case it is an URI object + my $parser; + if (ref($class_or_self)) { + $parser = $class_or_self->_clone(); + $parser->{XML_LIBXML_PARSER_OPTIONS} = $parser->_parser_options(\%args); + } else { + $parser = $class_or_self->new(\%args); + } + my $dom; + if ( defined $args{location} ) { + $dom = $parser->parse_file( "$args{location}" ); + } + elsif ( defined $args{string} ) { + $dom = $parser->parse_string( $args{string}, $URI ); + } + elsif ( defined $args{IO} ) { + $dom = $parser->parse_fh( $args{IO}, $URI ); + } + else { + croak("XML::LibXML->load: specify location, string, or IO"); + } + return $dom; +} + +sub load_html { + my ($class_or_self) = shift; + my %args = map { ref($_) eq 'HASH' ? (%$_) : $_ } @_; + my $URI = delete($args{URI}); + $URI = "$URI" if defined $URI; # stringify in case it is an URI object + my $parser; + if (ref($class_or_self)) { + $parser = $class_or_self->_clone(); + } else { + $parser = $class_or_self->new(); + } + my $dom; + if ( defined $args{location} ) { + $dom = $parser->parse_html_file( "$args{location}", \%args ); + } + elsif ( defined $args{string} ) { + $dom = $parser->parse_html_string( $args{string}, \%args ); + } + elsif ( defined $args{IO} ) { + $dom = $parser->parse_html_fh( $args{IO}, \%args ); + } + else { + croak("XML::LibXML->load: specify location, string, or IO"); + } + return $dom; +} + +#-------------------------------------------------------------------------# +# parsing functions # +#-------------------------------------------------------------------------# +# all parsing functions handle normal as SAX parsing at the same time. +# note that SAX parsing is handled incomplete! use XML::LibXML::SAX for +# complete parsing sequences +#-------------------------------------------------------------------------# +sub parse_string { + my $self = shift; + croak("parse_string is not a class method! Create a parser object with XML::LibXML->new first!") unless ref $self; + croak("parse already in progress") if $self->{_State_}; + + unless ( defined $_[0] and length $_[0] ) { + croak("Empty String"); + } + + $self->{_State_} = 1; + my $result; + + $self->_init_callbacks(); + + if ( defined $self->{SAX} ) { + my $string = shift; + $self->{SAX_ELSTACK} = []; + eval { $result = $self->_parse_sax_string($string); }; + my $err = $@; + $self->{_State_} = 0; + if ($err) { + chomp $err unless ref $err; + $self->_cleanup_callbacks(); + croak $err; + } + } + else { + eval { $result = $self->_parse_string( @_ ); }; + + my $err = $@; + $self->{_State_} = 0; + if ($err) { + chomp $err unless ref $err; + $self->_cleanup_callbacks(); + croak $err; + } + + $result = $self->_auto_expand( $result, $self->{XML_LIBXML_BASE_URI} ); + } + $self->_cleanup_callbacks(); + + return $result; +} + +sub parse_fh { + my $self = shift; + croak("parse_fh is not a class method! Create a parser object with XML::LibXML->new first!") unless ref $self; + croak("parse already in progress") if $self->{_State_}; + $self->{_State_} = 1; + my $result; + + $self->_init_callbacks(); + + if ( defined $self->{SAX} ) { + $self->{SAX_ELSTACK} = []; + eval { $self->_parse_sax_fh( @_ ); }; + my $err = $@; + $self->{_State_} = 0; + if ($err) { + chomp $err unless ref $err; + $self->_cleanup_callbacks(); + croak $err; + } + } + else { + eval { $result = $self->_parse_fh( @_ ); }; + my $err = $@; + $self->{_State_} = 0; + if ($err) { + chomp $err unless ref $err; + $self->_cleanup_callbacks(); + croak $err; + } + + $result = $self->_auto_expand( $result, $self->{XML_LIBXML_BASE_URI} ); + } + + $self->_cleanup_callbacks(); + + return $result; +} + +sub parse_file { + my $self = shift; + croak("parse_file is not a class method! Create a parser object with XML::LibXML->new first!") unless ref $self; + croak("parse already in progress") if $self->{_State_}; + + $self->{_State_} = 1; + my $result; + + $self->_init_callbacks(); + + if ( defined $self->{SAX} ) { + $self->{SAX_ELSTACK} = []; + eval { $self->_parse_sax_file( @_ ); }; + my $err = $@; + $self->{_State_} = 0; + if ($err) { + chomp $err unless ref $err; + $self->_cleanup_callbacks(); + croak $err; + } + } + else { + eval { $result = $self->_parse_file(@_); }; + my $err = $@; + $self->{_State_} = 0; + if ($err) { + chomp $err unless ref $err; + $self->_cleanup_callbacks(); + croak $err; + } + + $result = $self->_auto_expand( $result ); + } + $self->_cleanup_callbacks(); + + return $result; +} + +sub parse_xml_chunk { + my $self = shift; + # max 2 parameter: + # 1: the chunk + # 2: the encoding of the string + croak("parse_xml_chunk is not a class method! Create a parser object with XML::LibXML->new first!") unless ref $self; + croak("parse already in progress") if $self->{_State_}; my $result; + + unless ( defined $_[0] and length $_[0] ) { + croak("Empty String"); + } + + $self->{_State_} = 1; + + $self->_init_callbacks(); + + if ( defined $self->{SAX} ) { + eval { + $self->_parse_sax_xml_chunk( @_ ); + + # this is required for XML::GenericChunk. + # in normal case is_filter is not defined, an thus the parsing + # will be terminated. in case of a SAX filter the parsing is not + # finished at that state. therefore we must not reset the parsing + unless ( $self->{IS_FILTER} ) { + $result = $self->{HANDLER}->end_document(); + } + }; + } + else { + eval { $result = $self->_parse_xml_chunk( @_ ); }; + } + + $self->_cleanup_callbacks(); + + my $err = $@; + $self->{_State_} = 0; + if ($err) { + chomp $err unless ref $err; + croak $err; + } + + return $result; +} + +sub parse_balanced_chunk { + my $self = shift; + $self->_init_callbacks(); + my $rv; + eval { + $rv = $self->parse_xml_chunk( @_ ); + }; + my $err = $@; + $self->_cleanup_callbacks(); + if ( $err ) { + chomp $err unless ref $err; + croak $err; + } + return $rv +} + +# java style +sub processXIncludes { + my $self = shift; + my $doc = shift; + my $opts = shift; + my $options = $self->_parser_options($opts); + if ( $self->{_State_} != 1 ) { + $self->_init_callbacks(); + } + my $rv; + eval { + $rv = $self->_processXIncludes($doc || " ", $options); + }; + my $err = $@; + if ( $self->{_State_} != 1 ) { + $self->_cleanup_callbacks(); + } + + if ( $err ) { + chomp $err unless ref $err; + croak $err; + } + return $rv; +} + +# perl style +sub process_xincludes { + my $self = shift; + my $doc = shift; + my $opts = shift; + my $options = $self->_parser_options($opts); + + my $rv; + $self->_init_callbacks(); + eval { + $rv = $self->_processXIncludes($doc || " ", $options); + }; + my $err = $@; + $self->_cleanup_callbacks(); + if ( $err ) { + chomp $err unless ref $err; + croak $@; + } + return $rv; +} + +#-------------------------------------------------------------------------# +# HTML parsing functions # +#-------------------------------------------------------------------------# + +sub _html_options { + my ($self,$opts)=@_; + $opts = {} unless ref $opts; + # return (undef,undef) unless ref $opts; + my $flags = 0; + { + my $recover = exists $opts->{recover} ? $opts->{recover} : $self->recover; + + if ($recover) + { + $flags |= HTML_PARSE_RECOVER; + if ($recover == 2) + { + $flags |= HTML_PARSE_NOERROR; + } + } + } + + $flags |= 4 if $opts->{no_defdtd}; # default is ON: injects DTD as needed + $flags |= 32 if exists $opts->{suppress_errors} ? $opts->{suppress_errors} : $self->get_option('suppress_errors'); + # This is to fix https://rt.cpan.org/Ticket/Display.html?id=58024 : + # + # In XML::LibXML, warnings are not suppressed when specifying the recover + # or recover_silently flags as per the following excerpt from the manpage: + # + if ($self->recover_silently) + { + $flags |= 32; + } + $flags |= 64 if $opts->{suppress_warnings}; + $flags |= 128 if exists $opts->{pedantic_parser} ? $opts->{pedantic_parser} : $self->pedantic_parser; + $flags |= 256 if exists $opts->{no_blanks} ? $opts->{no_blanks} : !$self->keep_blanks; + $flags |= 2048 if exists $opts->{no_network} ? $opts->{no_network} : !$self->no_network; + $flags |= 16384 if $opts->{no_cdata}; + $flags |= 65536 if $opts->{compact}; # compact small text nodes; no modification + # of the tree allowed afterwards + # (WILL possibly CRASH IF YOU try to MODIFY THE TREE) + $flags |= 524288 if $opts->{huge}; # relax any hardcoded limit from the parser + $flags |= 1048576 if $opts->{oldsax}; # parse using SAX2 interface from before 2.7.0 + + return ($opts->{URI},$opts->{encoding},$flags); +} + +sub parse_html_string { + my ($self,$str,$opts) = @_; + croak("parse_html_string is not a class method! Create a parser object with XML::LibXML->new first!") unless ref $self; + croak("parse already in progress") if $self->{_State_}; + + unless ( defined $str and length $str ) { + croak("Empty String"); + } + $self->{_State_} = 1; + my $result; + + $self->_init_callbacks(); + eval { + $result = $self->_parse_html_string( $str, + $self->_html_options($opts) + ); + }; + my $err = $@; + $self->{_State_} = 0; + if ($err) { + chomp $err unless ref $err; + $self->_cleanup_callbacks(); + croak $err; + } + + $self->_cleanup_callbacks(); + + return $result; +} + +sub parse_html_file { + my ($self,$file,$opts) = @_; + croak("parse_html_file is not a class method! Create a parser object with XML::LibXML->new first!") unless ref $self; + croak("parse already in progress") if $self->{_State_}; + $self->{_State_} = 1; + my $result; + + $self->_init_callbacks(); + eval { $result = $self->_parse_html_file($file, + $self->_html_options($opts) + ); }; + my $err = $@; + $self->{_State_} = 0; + if ($err) { + chomp $err unless ref $err; + $self->_cleanup_callbacks(); + croak $err; + } + + $self->_cleanup_callbacks(); + + return $result; +} + +sub parse_html_fh { + my ($self,$fh,$opts) = @_; + croak("parse_html_fh is not a class method! Create a parser object with XML::LibXML->new first!") unless ref $self; + croak("parse already in progress") if $self->{_State_}; + $self->{_State_} = 1; + + my $result; + $self->_init_callbacks(); + eval { $result = $self->_parse_html_fh( $fh, + $self->_html_options($opts) + ); }; + my $err = $@; + $self->{_State_} = 0; + if ($err) { + chomp $err unless ref $err; + $self->_cleanup_callbacks(); + croak $err; + } + $self->_cleanup_callbacks(); + + return $result; +} + +#-------------------------------------------------------------------------# +# push parser interface # +#-------------------------------------------------------------------------# +sub init_push { + my $self = shift; + + if ( defined $self->{CONTEXT} ) { + delete $self->{CONTEXT}; + } + + if ( defined $self->{SAX} ) { + $self->{CONTEXT} = $self->_start_push(1); + } + else { + $self->{CONTEXT} = $self->_start_push(0); + } +} + +sub push { + my $self = shift; + + $self->_init_callbacks(); + + if ( not defined $self->{CONTEXT} ) { + $self->init_push(); + } + + eval { + foreach ( @_ ) { + $self->_push( $self->{CONTEXT}, $_ ); + } + }; + my $err = $@; + $self->_cleanup_callbacks(); + if ( $err ) { + chomp $err unless ref $err; + croak $err; + } +} + +# this function should be promoted! +# the reason is because libxml2 uses xmlParseChunk() for this purpose! +sub parse_chunk { + my $self = shift; + my $chunk = shift; + my $terminate = shift; + + if ( not defined $self->{CONTEXT} ) { + $self->init_push(); + } + + if ( defined $chunk and length $chunk ) { + $self->_push( $self->{CONTEXT}, $chunk ); + } + + if ( $terminate ) { + return $self->finish_push(); + } +} + + +sub finish_push { + my $self = shift; + my $restore = shift || 0; + return undef unless defined $self->{CONTEXT}; + + my $retval; + + if ( defined $self->{SAX} ) { + eval { + $self->_end_sax_push( $self->{CONTEXT} ); + $retval = $self->{HANDLER}->end_document( {} ); + }; + } + else { + eval { $retval = $self->_end_push( $self->{CONTEXT}, $restore ); }; + } + my $err = $@; + delete $self->{CONTEXT}; + if ( $err ) { + chomp $err unless ref $err; + croak( $err ); + } + return $retval; +} + +1; + +#-------------------------------------------------------------------------# +# XML::LibXML::Node Interface # +#-------------------------------------------------------------------------# +package XML::LibXML::Node; + +use Carp qw(croak); + +use overload + '""' => sub { $_[0]->toString() }, + 'bool' => sub { 1 }, + '0+' => sub { Scalar::Util::refaddr($_[0]) }, + fallback => 1, + ; + + +sub CLONE_SKIP { + return $XML::LibXML::__threads_shared ? 0 : 1; +} + +sub isSupported { + my $self = shift; + my $feature = shift; + return $self->can($feature) ? 1 : 0; +} + +sub getChildNodes { my $self = shift; return $self->childNodes(); } + +sub childNodes { + my $self = shift; + my @children = $self->_childNodes(0); + return wantarray ? @children : XML::LibXML::NodeList->new_from_ref(\@children , 1); +} + +sub nonBlankChildNodes { + my $self = shift; + my @children = $self->_childNodes(1); + return wantarray ? @children : XML::LibXML::NodeList->new_from_ref(\@children , 1); +} + +sub attributes { + my $self = shift; + my @attr = $self->_attributes(); + return wantarray ? @attr : XML::LibXML::NamedNodeMap->new( @attr ); +} + + +sub findnodes { + my ($node, $xpath) = @_; + my @nodes = $node->_findnodes($xpath); + if (wantarray) { + return @nodes; + } + else { + return XML::LibXML::NodeList->new_from_ref(\@nodes, 1); + } +} + +sub exists { + my ($node, $xpath) = @_; + my (undef, $value) = $node->_find($xpath,1); + return $value; +} + +sub findvalue { + my ($node, $xpath) = @_; + my $res; + $res = $node->find($xpath); + return $res->to_literal->value; +} + +sub findbool { + my ($node, $xpath) = @_; + my ($type, @params) = $node->_find($xpath,1); + if ($type) { + return $type->new(@params); + } + return undef; +} + +sub find { + my ($node, $xpath) = @_; + my ($type, @params) = $node->_find($xpath,0); + if ($type) { + return $type->new(@params); + } + return undef; +} + +sub setOwnerDocument { + my ( $self, $doc ) = @_; + $doc->adoptNode( $self ); +} + +sub toStringC14N { + my ($self, $comments, $xpath, $xpc) = @_; + return $self->_toStringC14N( $comments || 0, + (defined $xpath ? $xpath : undef), + 0, + undef, + (defined $xpc ? $xpc : undef) + ); +} + +{ +my $C14N_version_1_dot_1_val = 2; + +sub toStringC14N_v1_1 { + my ($self, $comments, $xpath, $xpc) = @_; + + return $self->_toStringC14N( + $comments || 0, + (defined $xpath ? $xpath : undef), + $C14N_version_1_dot_1_val, + undef, + (defined $xpc ? $xpc : undef) + ); +} + +} + +sub toStringEC14N { + my ($self, $comments, $xpath, $xpc, $inc_prefix_list) = @_; + unless (UNIVERSAL::isa($xpc,'XML::LibXML::XPathContext')) { + if ($inc_prefix_list) { + croak("toStringEC14N: 3rd argument is not an XML::LibXML::XPathContext"); + } else { + $inc_prefix_list=$xpc; + $xpc=undef; + } + } + if (defined($inc_prefix_list) and !UNIVERSAL::isa($inc_prefix_list,'ARRAY')) { + croak("toStringEC14N: inclusive_prefix_list must be undefined or ARRAY"); + } + return $self->_toStringC14N( $comments || 0, + (defined $xpath ? $xpath : undef), + 1, + (defined $inc_prefix_list ? $inc_prefix_list : undef), + (defined $xpc ? $xpc : undef) + ); +} + +*serialize_c14n = \&toStringC14N; +*serialize_exc_c14n = \&toStringEC14N; + +1; + +#-------------------------------------------------------------------------# +# XML::LibXML::Document Interface # +#-------------------------------------------------------------------------# +package XML::LibXML::Document; + +use vars qw(@ISA); +@ISA = ('XML::LibXML::Node'); + +sub actualEncoding { + my $doc = shift; + my $enc = $doc->encoding; + return (defined $enc and length $enc) ? $enc : 'UTF-8'; +} + +sub setDocumentElement { + my $doc = shift; + my $element = shift; + + my $oldelem = $doc->documentElement; + if ( defined $oldelem ) { + $doc->removeChild($oldelem); + } + + $doc->_setDocumentElement($element); +} + +sub toString { + my $self = shift; + my $flag = shift; + + my $retval = ""; + + if ( defined $XML::LibXML::skipXMLDeclaration + and $XML::LibXML::skipXMLDeclaration == 1 ) { + foreach ( $self->childNodes ){ + next if $_->nodeType == XML::LibXML::XML_DTD_NODE() + and $XML::LibXML::skipDTD; + $retval .= $_->toString; + } + } + else { + $flag ||= 0 unless defined $flag; + $retval = $self->_toString($flag); + } + + return $retval; +} + +sub serialize { + my $self = shift; + return $self->toString( @_ ); +} + +#-------------------------------------------------------------------------# +# bad style xinclude processing # +#-------------------------------------------------------------------------# +sub process_xinclude { + my $self = shift; + my $opts = shift; + XML::LibXML->new->processXIncludes( $self, $opts ); +} + +sub insertProcessingInstruction { + my $self = shift; + my $target = shift; + my $data = shift; + + my $pi = $self->createPI( $target, $data ); + my $root = $self->documentElement; + + if ( defined $root ) { + # this is actually not correct, but i guess it's what the user + # intends + $self->insertBefore( $pi, $root ); + } + else { + # if no documentElement was found we just append the PI + $self->appendChild( $pi ); + } +} + +sub insertPI { + my $self = shift; + $self->insertProcessingInstruction( @_ ); +} + +#-------------------------------------------------------------------------# +# DOM L3 Document functions. +# added after robins implicit feature request +#-------------------------------------------------------------------------# +*getElementsByTagName = \&XML::LibXML::Element::getElementsByTagName; +*getElementsByTagNameNS = \&XML::LibXML::Element::getElementsByTagNameNS; +*getElementsByLocalName = \&XML::LibXML::Element::getElementsByLocalName; + +1; + +#-------------------------------------------------------------------------# +# XML::LibXML::DocumentFragment Interface # +#-------------------------------------------------------------------------# +package XML::LibXML::DocumentFragment; + +use vars qw(@ISA); +@ISA = ('XML::LibXML::Node'); + +sub toString { + my $self = shift; + my $retval = ""; + if ( $self->hasChildNodes() ) { + foreach my $n ( $self->childNodes() ) { + $retval .= $n->toString(@_); + } + } + return $retval; +} + +*serialize = \&toString; + +1; + +#-------------------------------------------------------------------------# +# XML::LibXML::Element Interface # +#-------------------------------------------------------------------------# +package XML::LibXML::Element; + +use vars qw(@ISA); +@ISA = ('XML::LibXML::Node'); +use XML::LibXML qw(:ns :libxml); +use XML::LibXML::AttributeHash; +use Carp; + +use Scalar::Util qw(blessed); + +use overload + '%{}' => 'getAttributeHash', + 'eq' => '_isSameNodeLax', '==' => '_isSameNodeLax', + 'ne' => '_isNotSameNodeLax', '!=' => '_isNotSameNodeLax', + fallback => 1, + ; + +sub _isNotSameNodeLax { + my ($self, $other) = @_; + + return ((not $self->_isSameNodeLax($other)) ? 1 : ''); +} + +sub _isSameNodeLax { + my ($self, $other) = @_; + + if (blessed($other) and $other->isa('XML::LibXML::Element')) + { + return ($self->isSameNode($other) ? 1 : ''); + } + else + { + return ''; + } +} + +{ + my %tiecache; + + sub __destroy_tiecache + { + delete $tiecache{ 0+$_[0] }; + } + + sub getAttributeHash + { + my $self = shift; + if (!exists $tiecache{ 0+$self }) { + tie my %attr, 'XML::LibXML::AttributeHash', $self, weaken => 1; + $tiecache{ 0+$self } = \%attr; + } + return $tiecache{ 0+$self }; + } + sub DESTROY + { + my ($self) = @_; + $self->__destroy_tiecache; + $self->SUPER::DESTROY; + } +} + +sub setNamespace { + my $self = shift; + my $n = $self->localname; + if ( $self->_setNamespace(@_) ){ + if ( scalar @_ < 3 || $_[2] == 1 ){ + $self->setNodeName( $n ); + } + return 1; + } + return 0; +} + +sub getAttribute { + my $self = shift; + my $name = $_[0]; + if ( $name =~ /^xmlns(?::|$)/ ) { + # user wants to get a namespace ... + (my $prefix = $name )=~s/^xmlns:?//; + $self->_getNamespaceDeclURI($prefix); + } + else { + $self->_getAttribute(@_); + } +} + +sub setAttribute { + my ( $self, $name, $value ) = @_; + if ( $name =~ /^xmlns(?::|$)/ ) { + # user wants to set the special attribute for declaring XML namespace ... + + # this is fine but not exactly DOM conformant behavior, btw (according to DOM we should + # probably declare an attribute which looks like XML namespace declaration + # but isn't) + (my $nsprefix = $name )=~s/^xmlns:?//; + my $nn = $self->nodeName; + if ( $nn =~ /^\Q${nsprefix}\E:/ ) { + # the element has the same prefix + $self->setNamespaceDeclURI($nsprefix,$value) || + $self->setNamespace($value,$nsprefix,1); + ## + ## We set the namespace here. + ## This is helpful, as in: + ## + ## | $e = XML::LibXML::Element->new('foo:bar'); + ## | $e->setAttribute('xmlns:foo','http://yoyodine') + ## + } + else { + # just modify the namespace + $self->setNamespaceDeclURI($nsprefix, $value) || + $self->setNamespace($value,$nsprefix,0); + } + } + else { + $self->_setAttribute($name, $value); + } +} + +sub getAttributeNS { + my $self = shift; + my ($nsURI, $name) = @_; + croak("invalid attribute name") if !defined($name) or $name eq q{}; + if ( defined($nsURI) and $nsURI eq XML_XMLNS_NS ) { + $self->_getNamespaceDeclURI($name eq 'xmlns' ? undef : $name); + } + else { + $self->_getAttributeNS(@_); + } +} + +sub setAttributeNS { + my ($self, $nsURI, $qname, $value)=@_; + unless (defined $qname and length $qname) { + croak("bad name"); + } + if (defined($nsURI) and $nsURI eq XML_XMLNS_NS) { + if ($qname !~ /^xmlns(?::|$)/) { + croak("NAMESPACE ERROR: Namespace declarations must have the prefix 'xmlns'"); + } + $self->setAttribute($qname,$value); # see implementation above + return; + } + if ($qname=~/:/ and not (defined($nsURI) and length($nsURI))) { + croak("NAMESPACE ERROR: Attribute without a prefix cannot be in a namespace"); + } + if ($qname=~/^xmlns(?:$|:)/) { + croak("NAMESPACE ERROR: 'xmlns' prefix and qualified-name are reserved for the namespace ".XML_XMLNS_NS); + } + if ($qname=~/^xml:/ and not (defined $nsURI and $nsURI eq XML_XML_NS)) { + croak("NAMESPACE ERROR: 'xml' prefix is reserved for the namespace ".XML_XML_NS); + } + $self->_setAttributeNS( defined $nsURI ? $nsURI : undef, $qname, $value ); +} + +sub getElementsByTagName { + my ( $node , $name ) = @_; + my $xpath = $name eq '*' ? "descendant::*" : "descendant::*[name()='$name']"; + my @nodes = $node->_findnodes($xpath); + return wantarray ? @nodes : XML::LibXML::NodeList->new_from_ref(\@nodes, 1); +} + +sub getElementsByTagNameNS { + my ( $node, $nsURI, $name ) = @_; + my $xpath; + if ( $name eq '*' ) { + if ( $nsURI eq '*' ) { + $xpath = "descendant::*"; + } else { + $xpath = "descendant::*[namespace-uri()='$nsURI']"; + } + } elsif ( $nsURI eq '*' ) { + $xpath = "descendant::*[local-name()='$name']"; + } else { + $xpath = "descendant::*[local-name()='$name' and namespace-uri()='$nsURI']"; + } + my @nodes = $node->_findnodes($xpath); + return wantarray ? @nodes : XML::LibXML::NodeList->new_from_ref(\@nodes, 1); +} + +sub getElementsByLocalName { + my ( $node,$name ) = @_; + my $xpath; + if ($name eq '*') { + $xpath = "descendant::*"; + } else { + $xpath = "descendant::*[local-name()='$name']"; + } + my @nodes = $node->_findnodes($xpath); + return wantarray ? @nodes : XML::LibXML::NodeList->new_from_ref(\@nodes, 1); +} + +sub getChildrenByTagName { + my ( $node, $name ) = @_; + my @nodes; + if ($name eq '*') { + @nodes = grep { $_->nodeType == XML_ELEMENT_NODE() } + $node->childNodes(); + } else { + @nodes = grep { $_->nodeName eq $name } $node->childNodes(); + } + return wantarray ? @nodes : XML::LibXML::NodeList->new_from_ref(\@nodes, 1); +} + +sub getChildrenByLocalName { + my ( $node, $name ) = @_; + # my @nodes; + # if ($name eq '*') { + # @nodes = grep { $_->nodeType == XML_ELEMENT_NODE() } + # $node->childNodes(); + # } else { + # @nodes = grep { $_->nodeType == XML_ELEMENT_NODE() and + # $_->localName eq $name } $node->childNodes(); + # } + # return wantarray ? @nodes : XML::LibXML::NodeList->new_from_ref(\@nodes, 1); + my @nodes = $node->_getChildrenByTagNameNS('*',$name); + return wantarray ? @nodes : XML::LibXML::NodeList->new_from_ref(\@nodes, 1); +} + +sub getChildrenByTagNameNS { + my ( $node, $nsURI, $name ) = @_; + my @nodes = $node->_getChildrenByTagNameNS($nsURI,$name); + return wantarray ? @nodes : XML::LibXML::NodeList->new_from_ref(\@nodes, 1); +} + +sub appendWellBalancedChunk { + my ( $self, $chunk ) = @_; + + my $local_parser = XML::LibXML->new(); + my $frag = $local_parser->parse_xml_chunk( $chunk ); + + $self->appendChild( $frag ); +} + +1; + +#-------------------------------------------------------------------------# +# XML::LibXML::Text Interface # +#-------------------------------------------------------------------------# +package XML::LibXML::Text; + +use vars qw(@ISA); +@ISA = ('XML::LibXML::Node'); + +sub attributes { return; } + +sub deleteDataString { + my ($node, $string, $all) = @_; + + return $node->replaceDataString($string, '', $all); +} + +sub replaceDataString { + my ( $node, $left_proto, $right,$all ) = @_; + + # Assure we exchange the strings and not expressions! + my $left = quotemeta($left_proto); + + my $datastr = $node->nodeValue(); + if ( $all ) { + $datastr =~ s/$left/$right/g; + } + else{ + $datastr =~ s/$left/$right/; + } + $node->setData( $datastr ); +} + +sub replaceDataRegEx { + my ( $node, $leftre, $rightre, $flags ) = @_; + return unless defined $leftre; + $rightre ||= ""; + + my $datastr = $node->nodeValue(); + my $restr = "s/" . $leftre . "/" . $rightre . "/"; + $restr .= $flags if defined $flags; + + eval '$datastr =~ '. $restr; + + $node->setData( $datastr ); +} + +1; + +package XML::LibXML::Comment; + +use vars qw(@ISA); +@ISA = ('XML::LibXML::Text'); + +1; + +package XML::LibXML::CDATASection; + +use vars qw(@ISA); +@ISA = ('XML::LibXML::Text'); + +1; + +#-------------------------------------------------------------------------# +# XML::LibXML::Attribute Interface # +#-------------------------------------------------------------------------# +package XML::LibXML::Attr; +use vars qw( @ISA ) ; +@ISA = ('XML::LibXML::Node') ; + +sub setNamespace { + my ($self,$href,$prefix) = @_; + my $n = $self->localname; + if ( $self->_setNamespace($href,$prefix) ) { + $self->setNodeName($n); + return 1; + } + + return 0; +} + +1; + +#-------------------------------------------------------------------------# +# XML::LibXML::Dtd Interface # +#-------------------------------------------------------------------------# +# this is still under construction +# +package XML::LibXML::Dtd; +use vars qw( @ISA ); +@ISA = ('XML::LibXML::Node'); + +# at least DESTROY and CLONE_SKIP must be inherited + +1; + +#-------------------------------------------------------------------------# +# XML::LibXML::PI Interface # +#-------------------------------------------------------------------------# +package XML::LibXML::PI; +use vars qw( @ISA ); +@ISA = ('XML::LibXML::Node'); + +sub setData { + my $pi = shift; + + my $string = ""; + if ( scalar @_ == 1 ) { + $string = shift; + } + else { + my %h = @_; + $string = join " ", map {$_.'="'.$h{$_}.'"'} keys %h; + } + + # the spec says any char but "?>" [17] + $pi->_setData( $string ) unless $string =~ /\?>/; +} + +1; + +#-------------------------------------------------------------------------# +# XML::LibXML::Namespace Interface # +#-------------------------------------------------------------------------# +package XML::LibXML::Namespace; + +sub CLONE_SKIP { 1 } + +# In fact, this is not a node! +sub prefix { return "xmlns"; } +sub getPrefix { return "xmlns"; } +sub getNamespaceURI { return "http://www.w3.org/2000/xmlns/" }; + +sub getNamespaces { return (); } + +sub nodeName { + my $self = shift; + my $nsP = $self->localname; + return ( defined($nsP) && length($nsP) ) ? "xmlns:$nsP" : "xmlns"; +} +sub name { goto &nodeName } +sub getName { goto &nodeName } + +sub isEqualNode { + my ( $self, $ref ) = @_; + if ( ref($ref) eq "XML::LibXML::Namespace" ) { + return $self->_isEqual($ref); + } + return 0; +} + +sub isSameNode { + my ( $self, $ref ) = @_; + if ( $$self == $$ref ){ + return 1; + } + return 0; +} + +1; + +#-------------------------------------------------------------------------# +# XML::LibXML::NamedNodeMap Interface # +#-------------------------------------------------------------------------# +package XML::LibXML::NamedNodeMap; + +use XML::LibXML qw(:libxml); + +sub CLONE_SKIP { + return $XML::LibXML::__threads_shared ? 0 : 1; +} + +sub new { + my $class = shift; + my $self = bless { Nodes => [@_] }, $class; + $self->{NodeMap} = { map { $_->nodeName => $_ } @_ }; + return $self; +} + +sub length { return scalar( @{$_[0]->{Nodes}} ); } +sub nodes { return $_[0]->{Nodes}; } +sub item { $_[0]->{Nodes}->[$_[1]]; } + +sub getNamedItem { + my $self = shift; + my $name = shift; + + return $self->{NodeMap}->{$name}; +} + +sub setNamedItem { + my $self = shift; + my $node = shift; + + my $retval; + if ( defined $node ) { + if ( scalar @{$self->{Nodes}} ) { + my $name = $node->nodeName(); + if ( $node->nodeType() == XML_NAMESPACE_DECL ) { + return; + } + if ( defined $self->{NodeMap}->{$name} ) { + if ( $node->isSameNode( $self->{NodeMap}->{$name} ) ) { + return; + } + $retval = $self->{NodeMap}->{$name}->replaceNode( $node ); + } + else { + $self->{Nodes}->[0]->addSibling($node); + } + + $self->{NodeMap}->{$name} = $node; + push @{$self->{Nodes}}, $node; + } + else { + # not done yet + # can this be properly be done??? + warn "not done yet\n"; + } + } + return $retval; +} + +sub removeNamedItem { + my $self = shift; + my $name = shift; + my $retval; + if ( $name =~ /^xmlns/ ) { + warn "not done yet\n"; + } + elsif ( exists $self->{NodeMap}->{$name} ) { + $retval = $self->{NodeMap}->{$name}; + $retval->unbindNode; + delete $self->{NodeMap}->{$name}; + $self->{Nodes} = [grep {not($retval->isSameNode($_))} @{$self->{Nodes}}]; + } + + return $retval; +} + +sub getNamedItemNS { + my $self = shift; + my $nsURI = shift; + my $name = shift; + return undef; +} + +sub setNamedItemNS { + my $self = shift; + my $nsURI = shift; + my $node = shift; + return undef; +} + +sub removeNamedItemNS { + my $self = shift; + my $nsURI = shift; + my $name = shift; + return undef; +} + +1; + +package XML::LibXML::_SAXParser; + +# this is pseudo class!!! and it will be removed as soon all functions +# moved to XS level + +use XML::SAX::Exception; + +sub CLONE_SKIP { + return $XML::LibXML::__threads_shared ? 0 : 1; +} + +# these functions will use SAX exceptions as soon i know how things really work +sub warning { + my ( $parser, $message, $line, $col ) = @_; + my $error = XML::SAX::Exception::Parse->new( LineNumber => $line, + ColumnNumber => $col, + Message => $message, ); + $parser->{HANDLER}->warning( $error ); +} + +sub error { + my ( $parser, $message, $line, $col ) = @_; + + my $error = XML::SAX::Exception::Parse->new( LineNumber => $line, + ColumnNumber => $col, + Message => $message, ); + $parser->{HANDLER}->error( $error ); +} + +sub fatal_error { + my ( $parser, $message, $line, $col ) = @_; + my $error = XML::SAX::Exception::Parse->new( LineNumber => $line, + ColumnNumber => $col, + Message => $message, ); + $parser->{HANDLER}->fatal_error( $error ); +} + +1; + +package XML::LibXML::RelaxNG; + +sub CLONE_SKIP { 1 } + +sub new { + my $class = shift; + my %args = @_; + + my $self = undef; + if ( defined $args{location} ) { + $self = $class->parse_location( $args{location}, XML::LibXML->_parser_options(\%args), $args{recover} ); + } + elsif ( defined $args{string} ) { + $self = $class->parse_buffer( $args{string}, XML::LibXML->_parser_options(\%args), $args{recover} ); + } + elsif ( defined $args{DOM} ) { + $self = $class->parse_document( $args{DOM}, XML::LibXML->_parser_options(\%args), $args{recover} ); + } + + return $self; +} + +1; + +package XML::LibXML::Schema; + +sub CLONE_SKIP { 1 } + +sub new { + my $class = shift; + my %args = @_; + + my $self = undef; + if ( defined $args{location} ) { + $self = $class->parse_location( $args{location}, XML::LibXML->_parser_options(\%args), $args{recover} ); + } + elsif ( defined $args{string} ) { + $self = $class->parse_buffer( $args{string}, XML::LibXML->_parser_options(\%args), $args{recover} ); + } + + return $self; +} + +1; + +#-------------------------------------------------------------------------# +# XML::LibXML::Pattern Interface # +#-------------------------------------------------------------------------# + +package XML::LibXML::Pattern; + +sub CLONE_SKIP { 1 } + +sub new { + my $class = shift; + my ($pattern,$ns_map)=@_; + my $self = undef; + + unless (UNIVERSAL::can($class,'_compilePattern')) { + croak("Cannot create XML::LibXML::Pattern - ". + "your libxml2 is compiled without pattern support!"); + } + + if (ref($ns_map) eq 'HASH') { + # translate prefix=>URL hash to a (URL,prefix) list + $self = $class->_compilePattern($pattern,0,[reverse %$ns_map]); + } else { + $self = $class->_compilePattern($pattern,0); + } + return $self; +} + +1; + +#-------------------------------------------------------------------------# +# XML::LibXML::RegExp Interface # +#-------------------------------------------------------------------------# + +package XML::LibXML::RegExp; + +sub CLONE_SKIP { 1 } + +sub new { + my $class = shift; + my ($regexp)=@_; + unless (UNIVERSAL::can($class,'_compile')) { + croak("Cannot create XML::LibXML::RegExp - ". + "your libxml2 is compiled without regexp support!"); + } + return $class->_compile($regexp); +} + +1; + +#-------------------------------------------------------------------------# +# XML::LibXML::XPathExpression Interface # +#-------------------------------------------------------------------------# + +package XML::LibXML::XPathExpression; + +sub CLONE_SKIP { 1 } + +1; + + +#-------------------------------------------------------------------------# +# XML::LibXML::InputCallback Interface # +#-------------------------------------------------------------------------# +package XML::LibXML::InputCallback; + +use vars qw($_CUR_CB @_GLOBAL_CALLBACKS @_CB_STACK $_CB_NESTED_DEPTH @_CB_NESTED_STACK); + +BEGIN { + $_CUR_CB = undef; + @_GLOBAL_CALLBACKS = (); + @_CB_STACK = (); + $_CB_NESTED_DEPTH = 0; + @_CB_NESTED_STACK = (); +} + +sub CLONE_SKIP { + return $XML::LibXML::__threads_shared ? 0 : 1; +} + +#-------------------------------------------------------------------------# +# global callbacks # +#-------------------------------------------------------------------------# +sub _callback_match { + my $uri = shift; + my $retval = 0; + + # loop through the callbacks, and find the first matching one. + # The callbacks are stored in execution order (reverse stack order). + # Any new global callbacks are shifted to the callback stack. + foreach my $cb ( @_GLOBAL_CALLBACKS ) { + + # callbacks have to return 1, 0 or undef, while 0 and undef + # are handled the same way. + # in fact, if callbacks return other values, the global match + # assumes silently that the callback failed. + + $retval = $cb->[0]->($uri); + + if ( defined $retval and $retval == 1 ) { + # make the other callbacks use this callback + $_CUR_CB = $cb; + unshift @_CB_STACK, $cb; + last; + } + } + + return $retval; +} + +sub _callback_open { + my $uri = shift; + my $retval = undef; + + # the open callback has to return a defined value. + # if one works on files this can be a file handle. But + # depending on the needs of the callback it also can be a + # database handle or a integer labeling a certain dataset. + + if ( defined $_CUR_CB ) { + $retval = $_CUR_CB->[1]->( $uri ); + + # reset the callbacks, if one callback cannot open an uri + if ( not defined $retval or $retval == 0 ) { + shift @_CB_STACK; + $_CUR_CB = $_CB_STACK[0]; + } + } + + return $retval; +} + +sub _callback_read { + my $fh = shift; + my $buflen = shift; + + my $retval = undef; + + if ( defined $_CUR_CB ) { + $retval = $_CUR_CB->[2]->( $fh, $buflen ); + } + + return $retval; +} + +sub _callback_close { + my $fh = shift; + my $retval = 0; + + if ( defined $_CUR_CB ) { + $retval = $_CUR_CB->[3]->( $fh ); + shift @_CB_STACK; + $_CUR_CB = $_CB_STACK[0]; + } + + return $retval; +} + +#-------------------------------------------------------------------------# +# member functions and methods # +#-------------------------------------------------------------------------# + +sub new { + my $CLASS = shift; + return bless {'_CALLBACKS' => []}, $CLASS; +} + +# add a callback set to the callback stack +# synopsis: $icb->register_callbacks( [$match_cb, $open_cb, $read_cb, $close_cb] ); +sub register_callbacks { + my $self = shift; + my $cbset = shift; + + # test if callback set is complete + if ( ref $cbset eq "ARRAY" and scalar( @$cbset ) == 4 ) { + unshift @{$self->{_CALLBACKS}}, $cbset; + } +} + +# remove a callback set to the callback stack +# if a callback set is passed, this function will check for the match function +sub unregister_callbacks { + my $self = shift; + my $cbset = shift; + if ( ref $cbset eq "ARRAY" and scalar( @$cbset ) == 4 ) { + $self->{_CALLBACKS} = [grep { $_->[0] != $cbset->[0] } @{$self->{_CALLBACKS}}]; + } + else { + shift @{$self->{_CALLBACKS}}; + } +} + +# make libxml2 use the callbacks +sub init_callbacks { + my $self = shift; + my $parser = shift; + + #initialize the libxml2 callbacks unless this is a nested callback + $self->lib_init_callbacks() unless($_CB_NESTED_DEPTH); + + #store the callbacks for any outer executing parser instance + $_CB_NESTED_DEPTH++; + push @_CB_NESTED_STACK, [ + $_CUR_CB, + [@_CB_STACK], + [@_GLOBAL_CALLBACKS], + ]; + + #initialize the callback variables for the current parser + $_CUR_CB = undef; + @_CB_STACK = (); + @_GLOBAL_CALLBACKS = @{ $self->{_CALLBACKS} }; + + #attach parser specific callbacks + if($parser) { + my $mcb = $parser->match_callback(); + my $ocb = $parser->open_callback(); + my $rcb = $parser->read_callback(); + my $ccb = $parser->close_callback(); + if ( defined $mcb and defined $ocb and defined $rcb and defined $ccb ) { + unshift @_GLOBAL_CALLBACKS, [$mcb, $ocb, $rcb, $ccb]; + } + } + + #attach global callbacks + if ( defined $XML::LibXML::match_cb and + defined $XML::LibXML::open_cb and + defined $XML::LibXML::read_cb and + defined $XML::LibXML::close_cb ) { + push @_GLOBAL_CALLBACKS, [$XML::LibXML::match_cb, + $XML::LibXML::open_cb, + $XML::LibXML::read_cb, + $XML::LibXML::close_cb]; + } +} + +# reset libxml2's callbacks +sub cleanup_callbacks { + my $self = shift; + + #restore the callbacks for the outer parser instance + $_CB_NESTED_DEPTH--; + my $saved = pop @_CB_NESTED_STACK; + $_CUR_CB = $saved->[0]; + @_CB_STACK = (@{$saved->[1]}); + @_GLOBAL_CALLBACKS = (@{$saved->[2]}); + + #clean up the libxml2 callbacks unless there are still outer parsing instances + $self->lib_cleanup_callbacks() unless($_CB_NESTED_DEPTH); +} + +$XML::LibXML::__loaded=1; + +1; + +__END__ diff --git a/LibXML.pod b/LibXML.pod new file mode 100644 index 0000000..73a9e50 --- /dev/null +++ b/LibXML.pod @@ -0,0 +1,527 @@ +=head1 NAME + +XML::LibXML - Perl Binding for libxml2 + +=head1 SYNOPSIS + + + + use XML::LibXML; + my $dom = XML::LibXML->load_xml(string => <<'EOT'); + + EOT + + $Version_String = XML::LibXML::LIBXML_DOTTED_VERSION; + $Version_ID = XML::LibXML::LIBXML_VERSION; + $DLL_Version = XML::LibXML::LIBXML_RUNTIME_VERSION; + $libxmlnode = XML::LibXML->import_GDOME( $node, $deep ); + $gdomenode = XML::LibXML->export_GDOME( $node, $deep ); + +=head1 DESCRIPTION + +This module is an interface to libxml2, providing XML and HTML parsers with +DOM, SAX and XMLReader interfaces, a large subset of DOM Layer 3 interface and +a XML::XPath-like interface to XPath API of libxml2. The module is split into +several packages which are not described in this section; unless stated +otherwise, you only need to C<<<<<< use XML::LibXML; >>>>>> in your programs. + +Check out XML::LibXML by Example (L<<<<<< http://grantm.github.io/perl-libxml-by-example/ >>>>>>) for a tutorial. + +For further information, please check the following documentation: + +=over 4 + +=item L<<<<<< XML::LibXML::Parser >>>>>> + +Parsing XML files with XML::LibXML + + +=item L<<<<<< XML::LibXML::DOM >>>>>> + +XML::LibXML Document Object Model (DOM) Implementation + + +=item L<<<<<< XML::LibXML::SAX >>>>>> + +XML::LibXML direct SAX parser + + +=item L<<<<<< XML::LibXML::Reader >>>>>> + +Reading XML with a pull-parser + + +=item L<<<<<< XML::LibXML::Dtd >>>>>> + +XML::LibXML frontend for DTD validation + + +=item L<<<<<< XML::LibXML::RelaxNG >>>>>> + +XML::LibXML frontend for RelaxNG schema validation + + +=item L<<<<<< XML::LibXML::Schema >>>>>> + +XML::LibXML frontend for W3C Schema schema validation + + +=item L<<<<<< XML::LibXML::XPathContext >>>>>> + +API for evaluating XPath expressions with enhanced support for the evaluation +context + + +=item L<<<<<< XML::LibXML::InputCallback >>>>>> + +Implementing custom URI Resolver and input callbacks + + +=item L<<<<<< XML::LibXML::Common >>>>>> + +Common functions for XML::LibXML related Classes + + + +=back + +The nodes in the Document Object Model (DOM) are represented by the following +classes (most of which "inherit" from L<<<<<< XML::LibXML::Node >>>>>>): + +=over 4 + +=item L<<<<<< XML::LibXML::Document >>>>>> + +XML::LibXML class for DOM document nodes + + +=item L<<<<<< XML::LibXML::Node >>>>>> + +Abstract base class for XML::LibXML DOM nodes + + +=item L<<<<<< XML::LibXML::Element >>>>>> + +XML::LibXML class for DOM element nodes + + +=item L<<<<<< XML::LibXML::Text >>>>>> + +XML::LibXML class for DOM text nodes + + +=item L<<<<<< XML::LibXML::Comment >>>>>> + +XML::LibXML class for comment DOM nodes + + +=item L<<<<<< XML::LibXML::CDATASection >>>>>> + +XML::LibXML class for DOM CDATA sections + + +=item L<<<<<< XML::LibXML::Attr >>>>>> + +XML::LibXML DOM attribute class + + +=item L<<<<<< XML::LibXML::DocumentFragment >>>>>> + +XML::LibXML's DOM L2 Document Fragment implementation + + +=item L<<<<<< XML::LibXML::Namespace >>>>>> + +XML::LibXML DOM namespace nodes + + +=item L<<<<<< XML::LibXML::PI >>>>>> + +XML::LibXML DOM processing instruction nodes + + + +=back + + +=head1 ENCODINGS SUPPORT IN XML::LIBXML + +Recall that since version 5.6.1, Perl distinguishes between character strings +(internally encoded in UTF-8) and so called binary data and, accordingly, +applies either character or byte semantics to them. A scalar representing a +character string is distinguished from a byte string by special flag (UTF8). +Please refer to I<<<<<< perlunicode >>>>>> for details. + +XML::LibXML's API is designed to deal with many encodings of XML documents +completely transparently, so that the application using XML::LibXML can be +completely ignorant about the encoding of the XML documents it works with. On +the other hand, functions like C<<<<<< XML::LibXML::Document-EsetEncoding >>>>>> give the user control over the document encoding. + +To ensure the aforementioned transparency and uniformity, most functions of +XML::LibXML that work with in-memory trees accept and return data as character +strings (i.e. UTF-8 encoded with the UTF8 flag on) regardless of the original +document encoding; however, the functions related to I/O operations (i.e. +parsing and saving) operate with binary data (in the original document +encoding) obeying the encoding declaration of the XML documents. + +Below we summarize basic rules and principles regarding encoding: + + +=over 4 + +=item 1. + +Do NOT apply any encoding-related PerlIO layers (C<<<<<< :utf8 >>>>>> or C<<<<<< :encoding(...) >>>>>>) to file handles that are an input for the parses or an output for a +serializer of (full) XML documents. This is because the conversion of the data +to/from the internal character representation is provided by libxml2 itself +which must be able to enforce the encoding specified by the C<<<<<< E?xml version="1.0" encoding="..."?E >>>>>> declaration. Here is an example to follow: + + use XML::LibXML; + # load + open my $fh, '<', 'file.xml'; + binmode $fh; # drop all PerlIO layers possibly created by a use open pragma + $doc = XML::LibXML->load_xml(IO => $fh); + + # save + open my $out, '>', 'out.xml'; + binmode $out; # as above + $doc->toFH($out); + # or + print {$out} $doc->toString(); + + + + + +=item 2. + +All functions working with DOM accept and return character strings (UTF-8 +encoded with UTF8 flag on). E.g. + + my $doc = XML::LibXML::Document->new('1.0',$some_encoding); + my $element = $doc->createElement($name); + $element->appendText($text); + $xml_fragment = $element->toString(); # returns a character string + $xml_document = $doc->toString(); # returns a byte string + +where C<<<<<< $some_encoding >>>>>> is the document encoding that will be used when saving the document, and C<<<<<< $name >>>>>> and C<<<<<< $text >>>>>> contain character strings (UTF-8 encoded with UTF8 flag on). Note that the +method C<<<<<< toString >>>>>> returns XML as a character string if applied to other node than the Document +node and a byte string containing the appropriate + + + +declaration if applied to a L<<<<<< XML::LibXML::Document >>>>>>. + + + +=item 3. + +DOM methods also accept binary strings in the original encoding of the document +to which the node belongs (UTF-8 is assumed if the node is not attached to any +document). Exploiting this feature is NOT RECOMMENDED since it is considered +bad practice. + + + + my $doc = XML::LibXML::Document->new('1.0','iso-8859-2'); + my $text = $doc->createTextNode($some_latin2_encoded_byte_string); + # WORKS, BUT NOT RECOMMENDED! + + + +=back + +I<<<<<< NOTE: >>>>>> libxml2 support for many encodings is based on the iconv library. The actual +list of supported encodings may vary from platform to platform. To test if your +platform works correctly with your language encoding, build a simple document +in the particular encoding and try to parse it with XML::LibXML to see if the +parser produces any errors. Occasional crashes were reported on rare platforms +that ship with a broken version of iconv. + + +=head1 THREAD SUPPORT + +XML::LibXML since 1.67 partially supports Perl threads in Perl >= 5.8.8. +XML::LibXML can be used with threads in two ways: + +By default, all XML::LibXML classes use CLONE_SKIP class method to prevent Perl +from copying XML::LibXML::* objects when a new thread is spawn. In this mode, +all XML::LibXML::* objects are thread specific. This is the safest way to work +with XML::LibXML in threads. + +Alternatively, one may use + + + + use threads; + use XML::LibXML qw(:threads_shared); + +to indicate, that all XML::LibXML node and parser objects should be shared +between the main thread and any thread spawn from there. For example, in + + + + my $doc = XML::LibXML->load_xml(location => $filename); + my $thr = threads->new(sub{ + # code working with $doc + 1; + }); + $thr->join; + +the variable C<<<<<< $doc >>>>>> refers to the exact same XML::LibXML::Document in the spawned thread as in the +main thread. + +Without using mutex locks, parallel threads may read the same document (i.e. +any node that belongs to the document), parse files, and modify different +documents. + +However, if there is a chance that some of the threads will attempt to modify a +document (or even create new nodes based on that document, e.g. with C<<<<<< $doc-EcreateElement >>>>>>) that other threads may be reading at the same time, the user is responsible +for creating a mutex lock and using it in I<<<<<< both >>>>>> in the thread that modifies and the thread that reads: + + + + my $doc = XML::LibXML->load_xml(location => $filename); + my $mutex : shared; + my $thr = threads->new(sub{ + lock $mutex; + my $el = $doc->createElement('foo'); + # ... + 1; + }); + { + lock $mutex; + my $root = $doc->documentElement; + say $root->name; + } + $thr->join; + +Note that libxml2 uses dictionaries to store short strings and these +dictionaries are kept on a document node. Without mutex locks, it could happen +in the previous example that the thread modifies the dictionary while other +threads attempt to read from it, which could easily lead to a crash. + + +=head1 VERSION INFORMATION + +Sometimes it is useful to figure out, for which version XML::LibXML was +compiled for. In most cases this is for debugging or to check if a given +installation meets all functionality for the package. The functions +XML::LibXML::LIBXML_DOTTED_VERSION and XML::LibXML::LIBXML_VERSION provide this +version information. Both functions simply pass through the values of the +similar named macros of libxml2. Similarly, XML::LibXML::LIBXML_RUNTIME_VERSION +returns the version of the (usually dynamically) linked libxml2. + +=over 4 + +=item XML::LibXML::LIBXML_DOTTED_VERSION + + $Version_String = XML::LibXML::LIBXML_DOTTED_VERSION; + +Returns the version string of the libxml2 version XML::LibXML was compiled for. +This will be "2.6.2" for "libxml2 2.6.2". + + +=item XML::LibXML::LIBXML_VERSION + + $Version_ID = XML::LibXML::LIBXML_VERSION; + +Returns the version id of the libxml2 version XML::LibXML was compiled for. +This will be "20602" for "libxml2 2.6.2". Don't mix this version id with +$XML::LibXML::VERSION. The latter contains the version of XML::LibXML itself +while the first contains the version of libxml2 XML::LibXML was compiled for. + + +=item XML::LibXML::LIBXML_RUNTIME_VERSION + + $DLL_Version = XML::LibXML::LIBXML_RUNTIME_VERSION; + +Returns a version string of the libxml2 which is (usually dynamically) linked +by XML::LibXML. This will be "20602" for libxml2 released as "2.6.2" and +something like "20602-CVS2032" for a CVS build of libxml2. + +XML::LibXML issues a warning if the version of libxml2 dynamically linked to it +is less than the version of libxml2 which it was compiled against. + + + +=back + + +=head1 EXPORTS + +By default the module exports all constants and functions listed in the :all +tag, described below. + + +=head1 EXPORT TAGS + +=over 4 + +=item C<<<<<< :all >>>>>> + +Includes the tags C<<<<<< :libxml >>>>>>, C<<<<<< :encoding >>>>>>, and C<<<<<< :ns >>>>>> described below. + + +=item C<<<<<< :libxml >>>>>> + +Exports integer constants for DOM node types. + + + + XML_ELEMENT_NODE => 1 + XML_ATTRIBUTE_NODE => 2 + XML_TEXT_NODE => 3 + XML_CDATA_SECTION_NODE => 4 + XML_ENTITY_REF_NODE => 5 + XML_ENTITY_NODE => 6 + XML_PI_NODE => 7 + XML_COMMENT_NODE => 8 + XML_DOCUMENT_NODE => 9 + XML_DOCUMENT_TYPE_NODE => 10 + XML_DOCUMENT_FRAG_NODE => 11 + XML_NOTATION_NODE => 12 + XML_HTML_DOCUMENT_NODE => 13 + XML_DTD_NODE => 14 + XML_ELEMENT_DECL => 15 + XML_ATTRIBUTE_DECL => 16 + XML_ENTITY_DECL => 17 + XML_NAMESPACE_DECL => 18 + XML_XINCLUDE_START => 19 + XML_XINCLUDE_END => 20 + + +=item C<<<<<< :encoding >>>>>> + +Exports two encoding conversion functions from XML::LibXML::Common. + + + + encodeToUTF8() + decodeFromUTF8() + + +=item C<<<<<< :ns >>>>>> + +Exports two convenience constants: the implicit namespace of the reserved C<<<<<< xml: >>>>>> prefix, and the implicit namespace for the reserved C<<<<<< xmlns: >>>>>> prefix. + + + + XML_XML_NS => 'http://www.w3.org/XML/1998/namespace' + XML_XMLNS_NS => 'http://www.w3.org/2000/xmlns/' + + + +=back + + +=head1 RELATED MODULES + +The modules described in this section are not part of the XML::LibXML package +itself. As they support some additional features, they are mentioned here. + +=over 4 + +=item L<<<<<< XML::LibXSLT >>>>>> + +XSLT 1.0 Processor using libxslt and XML::LibXML + + +=item L<<<<<< XML::LibXML::Iterator >>>>>> + +XML::LibXML Implementation of the DOM Traversal Specification + + +=item L<<<<<< XML::CompactTree::XS >>>>>> + +Uses XML::LibXML::Reader to very efficiently to parse XML document or element +into native Perl data structures, which are less flexible but significantly +faster to process then DOM. + + + +=back + + +=head1 XML::LIBXML AND XML::GDOME + +Note: I<<<<<< THE FUNCTIONS DESCRIBED HERE ARE STILL EXPERIMENTAL >>>>>> + +Although both modules make use of libxml2's XML capabilities, the DOM +implementation of both modules are not compatible. But still it is possible to +exchange nodes from one DOM to the other. The concept of this exchange is +pretty similar to the function cloneNode(): The particular node is copied on +the low-level to the opposite DOM implementation. + +Since the DOM implementations cannot coexist within one document, one is forced +to copy each node that should be used. Because you are always keeping two nodes +this may cause quite an impact on a machines memory usage. + +XML::LibXML provides two functions to export or import GDOME nodes: +import_GDOME() and export_GDOME(). Both function have two parameters: the node +and a flag for recursive import. The flag works as in cloneNode(). + +The two functions allow one to export and import XML::GDOME nodes explicitly, +however, XML::LibXML also allows the transparent import of XML::GDOME nodes in +functions such as appendChild(), insertAfter() and so on. While native nodes +are automatically adopted in most functions XML::GDOME nodes are always cloned +in advance. Thus if the original node is modified after the operation, the node +in the XML::LibXML document will not have this information. + +=over 4 + +=item import_GDOME + + $libxmlnode = XML::LibXML->import_GDOME( $node, $deep ); + +This clones an XML::GDOME node to an XML::LibXML node explicitly. + + +=item export_GDOME + + $gdomenode = XML::LibXML->export_GDOME( $node, $deep ); + +Allows one to clone an XML::LibXML node into an XML::GDOME node. + + + +=back + + +=head1 CONTACTS + +For bug reports, please use the CPAN request tracker on +http://rt.cpan.org/NoAuth/Bugs.html?Dist=XML-LibXML + +For suggestions etc., and other issues related to XML::LibXML you may use the +perl XML mailing list (C<<<<<< perl-xml@listserv.ActiveState.com >>>>>>), where most XML-related Perl modules are discussed. In case of problems you +should check the archives of that list first. Many problems are already +discussed there. You can find the list's archives and subscription options at L<<<<<< http://aspn.activestate.com/ASPN/Mail/Browse/Threaded/perl-xml >>>>>>. + +=head1 AUTHORS + +Matt Sergeant, +Christian Glahn, +Petr Pajas + + +=head1 VERSION + +2.0207 + +=head1 COPYRIGHT + +2001-2007, AxKit.com Ltd. + +2002-2006, Christian Glahn. + +2006-2009, Petr Pajas. + +=cut + + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. + diff --git a/LibXML.xs b/LibXML.xs new file mode 100644 index 0000000..51bb3be --- /dev/null +++ b/LibXML.xs @@ -0,0 +1,9713 @@ +/* $Id$ + * + * This is free software, you may use it and distribute it under the same terms as + * Perl itself. + * + * Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas +*/ + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(_MSC_VER) +#define _CRT_SECURE_NO_DEPRECATE 1 +#define _CRT_NONSTDC_NO_DEPRECATE 1 +#endif + +/* perl stuff */ +#include "EXTERN.h" +#include "perl.h" +#include "XSUB.h" +#define NEED_newRV_noinc_GLOBAL +#define NEED_sv_2pv_flags +#include "ppport.h" +#include "Av_CharPtrPtr.h" /* XS_*_charPtrPtr() */ + +#include + +#ifndef WIN32 +#include +#endif + +/* libxml2 configuration properties */ +#include + +#define DEBUG_C14N + +/* libxml2 stuff */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +/* #include */ +#include +#include +#include + +#ifdef LIBXML_PATTERN_ENABLED +#include +#endif + +#ifdef LIBXML_REGEXP_ENABLED +#include +#endif + +#if LIBXML_VERSION >= 20510 +#define HAVE_SCHEMAS +#include +#include +#endif + +#if LIBXML_VERSION >= 20621 +#define WITH_SERRORS +#ifdef LIBXML_READER_ENABLED +#define HAVE_READER_SUPPORT +#include +#endif +#endif + +#ifdef LIBXML_CATALOG_ENABLED +#include +#endif + +#ifdef HAVE_READER_SUPPORT + +typedef enum { + XML_TEXTREADER_NONE = -1, + XML_TEXTREADER_START= 0, + XML_TEXTREADER_ELEMENT= 1, + XML_TEXTREADER_END= 2, + XML_TEXTREADER_EMPTY= 3, + XML_TEXTREADER_BACKTRACK= 4, + XML_TEXTREADER_DONE= 5, + XML_TEXTREADER_ERROR= 6 +} xmlTextReaderState; + +typedef enum { + XML_TEXTREADER_NOT_VALIDATE = 0, + XML_TEXTREADER_VALIDATE_DTD = 1, + XML_TEXTREADER_VALIDATE_RNG = 2, + XML_TEXTREADER_VALIDATE_XSD = 4 +} xmlTextReaderValidate; + +#endif /* HAVE_READER_SUPPORT */ + +/* GDOME support + * libgdome installs only the core functions to the system. + * this is not enough for XML::LibXML <-> XML::GDOME conversion. + * therefore there is the need to ship as well the GDOME core headers. + */ +#ifdef XML_LIBXML_GDOME_SUPPORT + +#include +#include + +#endif + + +#if LIBXML_VERSION < 20621 +/* HTML_PARSE_RECOVER was added in libxml2 2.6.21 */ +# define HTML_PARSE_RECOVER XML_PARSE_RECOVER +#endif + + +/* XML::LibXML stuff */ +#include "perl-libxml-mm.h" +#include "perl-libxml-sax.h" + +#include "dom.h" +#include "xpath.h" +#include "xpathcontext.h" + +#ifdef __cplusplus +} +#endif + + +#define TEST_PERL_FLAG(flag) \ + SvTRUE(get_sv(flag, FALSE)) ? 1 : 0 + +#ifdef HAVE_READER_SUPPORT +#define LIBXML_READER_TEST_ELEMENT(reader,name,nsURI) \ + (xmlTextReaderNodeType(reader) == XML_READER_TYPE_ELEMENT) && \ + ((!nsURI && !name) \ + || \ + (!nsURI && xmlStrcmp((const xmlChar*)name, xmlTextReaderConstName(reader) ) == 0 ) \ + || \ + (nsURI && xmlStrcmp((const xmlChar*)nsURI, xmlTextReaderConstNamespaceUri(reader))==0 \ + && \ + (!name || xmlStrcmp((const xmlChar*)name, xmlTextReaderConstLocalName(reader)) == 0))) +#endif + +/* this should keep the default */ +static xmlExternalEntityLoader LibXML_old_ext_ent_loader = NULL; + +/* global external entity loader */ +SV *EXTERNAL_ENTITY_LOADER_FUNC = (SV *)NULL; + +SV* PROXY_NODE_REGISTRY_MUTEX = NULL; + +/* **************************************************************** + * Error handler + * **************************************************************** */ + +#ifdef WITH_SERRORS + +#define INIT_READER_ERROR_HANDLER(reader) + +#define PREINIT_SAVED_ERROR SV* saved_error = sv_2mortal(newSV(0)); + +#define INIT_ERROR_HANDLER \ + xmlSetGenericErrorFunc((void *)saved_error, \ + (xmlGenericErrorFunc) LibXML_flat_handler); \ + xmlSetStructuredErrorFunc((void *)saved_error, \ + (xmlStructuredErrorFunc)LibXML_struct_error_handler) + +#define REPORT_ERROR(recover) LibXML_report_error_ctx(saved_error, recover) + +#define CLEANUP_ERROR_HANDLER xmlSetGenericErrorFunc(NULL,NULL); \ + xmlSetStructuredErrorFunc(NULL,NULL) + +#else /* WITH_SERRORS */ + +#define INIT_READER_ERROR_HANDLER(reader) \ + if (reader) \ + xmlTextReaderSetErrorHandler(reader, LibXML_reader_error_handler, \ + sv_2mortal(newSVpv("",0))); + +#define PREINIT_SAVED_ERROR SV* saved_error = sv_2mortal(newSVpv("",0)); + +#define INIT_ERROR_HANDLER \ + xmlSetGenericErrorFunc((void *) saved_error, \ + (xmlGenericErrorFunc) LibXML_error_handler_ctx) + +#define REPORT_ERROR(recover) LibXML_report_error_ctx(saved_error, recover) + +#define CLEANUP_ERROR_HANDLER xmlSetGenericErrorFunc(NULL,NULL); + + +#endif /* WITH_SERRORS */ + +#ifdef WITH_SERRORS +void +LibXML_struct_error_callback(SV * saved_error, SV * libErr ) +{ + + dTHX; + dSP; + + if ( saved_error == NULL ) { + warn( "have no save_error\n" ); + } + + ENTER; + SAVETMPS; + PUSHMARK(SP); + + XPUSHs(sv_2mortal(libErr)); + if ( saved_error != NULL && SvOK(saved_error) ) { + XPUSHs(saved_error); + } + PUTBACK; + + if ( saved_error != NULL ) { + call_pv( "XML::LibXML::Error::_callback_error", G_SCALAR | G_EVAL ); + } else { + call_pv( "XML::LibXML::Error::_instant_error_callback", G_SCALAR ); + } + SPAGAIN; + + if ( SvTRUE(ERRSV) ) { + (void) POPs; + croak_obj; + } else { + sv_setsv(saved_error, POPs); + } + + PUTBACK; + FREETMPS; + LEAVE; +} + +void +LibXML_struct_error_handler(SV * saved_error, xmlErrorPtr error ) +{ + const char * CLASS = "XML::LibXML::LibError"; + SV* libErr; + + libErr = NEWSV(0,0); + sv_setref_pv( libErr, CLASS, (void*)error ); + LibXML_struct_error_callback( saved_error, libErr); +} + + +void +LibXML_flat_handler(SV * saved_error, const char * msg, ...) +{ + SV* sv; + va_list args; + + sv = newSVpv("",0); + va_start(args, msg); + sv_vcatpvf(sv, msg, &args); + va_end(args); + xs_warn("flat error\n"); + LibXML_struct_error_callback( saved_error, sv); +} + +#endif /* WITH_SERRORS */ + + +/* If threads-support is working correctly in libxml2 then + * this method will be called with the correct thread-context */ +void +LibXML_error_handler_ctx(void * ctxt, const char * msg, ...) +{ + va_list args; + SV * saved_error = (SV *) ctxt; + + /* If saved_error is null we croak with the error */ + if( NULL == saved_error ) { + SV * sv = sv_2mortal(newSV(0)); + va_start(args, msg); + /* vfprintf(stderr, msg, args); */ + sv_vsetpvfn(sv, msg, strlen(msg), &args, NULL, 0, NULL); + va_end(args); + croak("%s", SvPV_nolen(sv)); + /* Otherwise, save the error */ + } else { + va_start(args, msg); + /* vfprintf(stderr, msg, args); */ + sv_vcatpvfn(saved_error, msg, strlen(msg), &args, NULL, 0, NULL); + va_end(args); + } +} + +static void +LibXML_validity_error_ctx(void * ctxt, const char *msg, ...) +{ + va_list args; + SV * saved_error = (SV *) ctxt; + + /* If saved_error is null we croak with the error */ + if( NULL == saved_error ) { + SV * sv = sv_2mortal(newSV(0)); + va_start(args, msg); + sv_vsetpvfn(sv, msg, strlen(msg), &args, NULL, 0, NULL); + va_end(args); + croak("%s", SvPV_nolen(sv)); + /* Otherwise, save the error */ + } else { + va_start(args, msg); + sv_vcatpvfn(saved_error, msg, strlen(msg), &args, NULL, 0, NULL); + va_end(args); + } +} + +static void +LibXML_validity_warning_ctx(void * ctxt, const char *msg, ...) +{ + va_list args; + SV * saved_error = (SV *) ctxt; + STRLEN len; + + /* If saved_error is null we croak with the error */ + if( NULL == saved_error ) { + SV * sv = sv_2mortal(newSV(0)); + va_start(args, msg); + sv_vsetpvfn(sv, msg, strlen(msg), &args, NULL, 0, NULL); + va_end(args); + croak("LibXML_validity_warning_ctx internal error: context was null (%s)", SvPV_nolen(sv)); + /* Otherwise, give the warning */ + } else { + va_start(args, msg); + sv_vcatpvfn(saved_error, msg, strlen(msg), &args, NULL, 0, NULL); + va_end(args); + warn("validation error: %s", SvPV(saved_error, len)); + } +} + +static int +LibXML_will_die_ctx(SV * saved_error, int recover) +{ +#ifdef WITH_SERRORS + if( saved_error!=NULL && SvOK(saved_error) ) { + if ( recover == 0 ) { + return 1; + } + } +#else + if( 0 < SvCUR( saved_error ) ) { + if ( recover == 0 ) { + return 1; + } + } +#endif + return 0; +} + + +static void +LibXML_report_error_ctx(SV * saved_error, int recover) +{ +#ifdef WITH_SERRORS + if( saved_error!=NULL && SvOK( saved_error ) ) { + if (!recover || recover==1) { + dTHX; + dSP; + + ENTER; + SAVETMPS; + PUSHMARK(SP); + EXTEND(SP, 1); + PUSHs(saved_error); + PUTBACK; + if (recover==1) { + call_pv( "XML::LibXML::Error::_report_warning", G_SCALAR | G_DISCARD); + } else { + call_pv( "XML::LibXML::Error::_report_error", G_SCALAR | G_DISCARD); + } + SPAGAIN; + + PUTBACK; + FREETMPS; + LEAVE; + } + } +#else + if( 0 < SvCUR( saved_error ) ) { + if( recover ) { + if ( recover == 1 ) { + warn("%s", SvPV_nolen(saved_error)); + } /* else recover silently */ + } else { + croak("%s", SvPV_nolen(saved_error)); + } + } +#endif +} + +#ifdef HAVE_READER_SUPPORT + +#ifndef WITH_SERRORS +static void +LibXML_reader_error_handler(void * ctxt, + const char * msg, + xmlParserSeverities severity, + xmlTextReaderLocatorPtr locator) +{ + int line = xmlTextReaderLocatorLineNumber(locator); + xmlChar * filename = xmlTextReaderLocatorBaseURI(locator); + SV * msg_sv = sv_2mortal(C2Sv((xmlChar*) msg,NULL)); + SV * error = sv_2mortal(newSVpv("", 0)); + + switch (severity) { + case XML_PARSER_SEVERITY_VALIDITY_WARNING: + sv_catpv(error, "Validity WARNING"); + break; + case XML_PARSER_SEVERITY_WARNING: + sv_catpv(error, "Reader WARNING"); + break; + case XML_PARSER_SEVERITY_VALIDITY_ERROR: + sv_catpv(error, "Validity ERROR"); + break; + case XML_PARSER_SEVERITY_ERROR: + sv_catpv(error, "Reader ERROR"); + break; + } + if (filename) { + sv_catpvf(error, " in %s", filename); + xmlFree(filename); + } + if (line >= 0) { + sv_catpvf(error, " at line %d", line); + } + sv_catpvf(error, ": %s", SvPV_nolen(msg_sv)); + if (severity == XML_PARSER_SEVERITY_VALIDITY_WARNING || + severity == XML_PARSER_SEVERITY_WARNING ) { + warn("%s", SvPV_nolen(error)); + } else { + SV * error_sv = (SV*) ctxt; + if (error_sv) { + sv_catpvf(error_sv, "%s ", SvPV_nolen(error)); + } else { + croak("%s",SvPV_nolen(error)); + } + } +} +#endif /* !defined WITH_SERRORS */ + +SV * +LibXML_get_reader_error_data(xmlTextReaderPtr reader) +{ + SV * saved_error = NULL; + xmlTextReaderErrorFunc f = NULL; + xmlTextReaderGetErrorHandler(reader, &f, (void **) &saved_error); + return saved_error; +} + +#ifndef WITH_SERRORS +static void +LibXML_report_reader_error(xmlTextReaderPtr reader) +{ + SV * saved_error = NULL; + xmlTextReaderErrorFunc f = NULL; + xmlTextReaderGetErrorHandler(reader, &f, (void **) &saved_error); + if ( saved_error && SvOK( saved_error) && 0 < SvCUR( saved_error ) ) { + croak("%s", SvPV_nolen(saved_error)); + } +} +#endif /* !defined WITH_SERRORS */ + +#endif /* HAVE_READER_SUPPORT */ + +static int +LibXML_get_recover(HV * real_obj) +{ + SV** item = hv_fetch( real_obj, "XML_LIBXML_RECOVER", 18, 0 ); + return ( item != NULL && SvTRUE(*item) ) ? SvIV(*item) : 0; +} + +static SV * +LibXML_NodeToSv(HV * real_obj, xmlNodePtr real_doc) +{ + SV** item = hv_fetch( real_obj, "XML_LIBXML_GDOME", 16, 0 ); + + if ( item != NULL && SvTRUE(*item) ) { + return PmmNodeToGdomeSv(real_doc); + } + else { + return PmmNodeToSv(real_doc, NULL); + } +} + +/* **************************************************************** + * IO callbacks + * **************************************************************** */ + +int +LibXML_read_perl (SV * ioref, char * buffer, int len) +{ + dTHX; + dSP; + + int cnt; + SV * read_results; + IV read_results_iv; + STRLEN read_length; + char * chars; + SV * tbuff = NEWSV(0,len); + SV * tsize = newSViv(len); + + ENTER; + SAVETMPS; + + PUSHMARK(SP); + EXTEND(SP, 3); + PUSHs(ioref); + PUSHs(sv_2mortal(tbuff)); + PUSHs(sv_2mortal(tsize)); + PUTBACK; + + if (sv_isobject(ioref)) { + cnt = call_method("read", G_SCALAR | G_EVAL); + } + else { + cnt = call_pv("XML::LibXML::__read", G_SCALAR | G_EVAL); + } + + SPAGAIN; + + if (cnt != 1) { + croak("read method call failed"); + } + + if (SvTRUE(ERRSV)) { + (void) POPs; + croak_obj; + } + + read_results = POPs; + + if (!SvOK(read_results)) { + croak("read error"); + } + + read_results_iv = SvIV(read_results); + + chars = SvPV(tbuff, read_length); + + /* + * If the file handle uses an encoding layer, the length parameter is + * interpreted as character count, not as byte count. So it's possible + * that more than len bytes are read which would overflow the buffer. + * Check for this condition also by comparing the return value. + */ + if (read_results_iv != read_length || read_length > len) { + croak("Read more bytes than requested. Do you use an encoding-related" + " PerlIO layer?"); + } + strncpy(buffer, chars, read_length); + + PUTBACK; + FREETMPS; + LEAVE; + + return read_length; +} + +/* used only by Reader */ +int +LibXML_close_perl (SV * ioref) +{ + SvREFCNT_dec(ioref); + return 0; +} + +int +LibXML_input_match(char const * filename) +{ + int results; + int count; + SV * res; + + results = 0; + + { + dTHX; + dSP; + + ENTER; + SAVETMPS; + + PUSHMARK(SP); + EXTEND(SP, 1); + PUSHs(sv_2mortal(newSVpv((char*)filename, 0))); + PUTBACK; + + count = call_pv("XML::LibXML::InputCallback::_callback_match", + G_SCALAR | G_EVAL); + + SPAGAIN; + + if (count != 1) { + croak("match callback must return a single value"); + } + + if (SvTRUE(ERRSV)) { + (void) POPs; + croak_obj; + } + + res = POPs; + + if (SvTRUE(res)) { + results = 1; + } + + PUTBACK; + FREETMPS; + LEAVE; + } + return results; +} + +void * +LibXML_input_open(char const * filename) +{ + SV * results; + int count; + + dTHX; + dSP; + + ENTER; + SAVETMPS; + + PUSHMARK(SP); + EXTEND(SP, 1); + PUSHs(sv_2mortal(newSVpv((char*)filename, 0))); + PUTBACK; + + count = call_pv("XML::LibXML::InputCallback::_callback_open", + G_SCALAR | G_EVAL); + + SPAGAIN; + + if (count != 1) { + croak("open callback must return a single value"); + } + + if (SvTRUE(ERRSV)) { + (void) POPs; + croak_obj; + } + + results = POPs; + + (void)SvREFCNT_inc(results); + + PUTBACK; + FREETMPS; + LEAVE; + + return (void *)results; +} + +int +LibXML_input_read(void * context, char * buffer, int len) +{ + STRLEN res_len; + const char * output; + SV * ctxt; + SV * output_sv; + + res_len = 0; + ctxt = (SV *)context; + + { + int count; + + dTHX; + dSP; + + ENTER; + SAVETMPS; + + PUSHMARK(SP); + EXTEND(SP, 2); + PUSHs(ctxt); + PUSHs(sv_2mortal(newSViv(len))); + PUTBACK; + + count = call_pv("XML::LibXML::InputCallback::_callback_read", + G_SCALAR | G_EVAL); + + SPAGAIN; + + if (count != 1) { + croak("read callback must return a single value"); + } + + if (SvTRUE(ERRSV)) { + (void) POPs; + croak_obj; + } + + /* + * Handle undef()s gracefully, to avoid using POPpx which warns upon $^W + * being set. See t/49callbacks_returning_undef.t and: + * https://rt.cpan.org/Ticket/Display.html?id=70321 + * */ + + output_sv = POPs; + output = SvOK(output_sv) ? SvPV_nolen(output_sv) : NULL; + + if (output != NULL) { + res_len = strlen(output); + if (res_len) { + strncpy(buffer, output, res_len); + } + else { + buffer[0] = 0; + } + } + + PUTBACK; + FREETMPS; + LEAVE; + } + return res_len; +} + +void +LibXML_input_close(void * context) +{ + SV * ctxt; + + ctxt = (SV *)context; + + { + dTHX; + dSP; + + ENTER; + SAVETMPS; + + PUSHMARK(SP); + EXTEND(SP, 1); + PUSHs(ctxt); + PUTBACK; + + call_pv("XML::LibXML::InputCallback::_callback_close", + G_SCALAR | G_EVAL | G_DISCARD); + + SvREFCNT_dec(ctxt); + + if (SvTRUE(ERRSV)) { + croak_obj; + } + + FREETMPS; + LEAVE; + } +} + +int +LibXML_output_write_handler(void * ioref, char * buffer, int len) +{ + if ( buffer != NULL && len > 0) { + dTHX; + dSP; + + SV * tbuff = newSVpv(buffer,len); + SV * tsize = newSViv(len); + + + ENTER; + SAVETMPS; + + PUSHMARK(SP); + EXTEND(SP, 3); + PUSHs((SV*)ioref); + PUSHs(sv_2mortal(tbuff)); + PUSHs(sv_2mortal(tsize)); + PUTBACK; + + call_pv("XML::LibXML::__write", G_SCALAR | G_EVAL | G_DISCARD ); + + if (SvTRUE(ERRSV)) { + croak_obj; + } + + FREETMPS; + LEAVE; + } + return len; +} + +int +LibXML_output_close_handler( void * handler ) +{ + return 1; +} + +xmlParserInputPtr +LibXML_load_external_entity( + const char * URL, + const char * ID, + xmlParserCtxtPtr ctxt) +{ + SV ** func; + int count; + SV * results; + STRLEN results_len; + const char * results_pv; + xmlParserInputBufferPtr input_buf; + + if (ctxt->_private == NULL && EXTERNAL_ENTITY_LOADER_FUNC == NULL) + { + return xmlNewInputFromFile(ctxt, URL); + } + + if (URL == NULL) { + URL = ""; + } + if (ID == NULL) { + ID = ""; + } + + /* fetch entity loader function */ + if(EXTERNAL_ENTITY_LOADER_FUNC != NULL) + { + func = &EXTERNAL_ENTITY_LOADER_FUNC; + } + else + { + SV * self; + HV * real_obj; + + self = (SV *)ctxt->_private; + real_obj = (HV *)SvRV(self); + func = hv_fetch(real_obj, "ext_ent_handler", 15, 0); + } + + if (func != NULL && SvTRUE(*func)) { + dTHX; + dSP; + + ENTER; + SAVETMPS; + + PUSHMARK(SP) ; + XPUSHs(sv_2mortal(newSVpv((char*)URL, 0))); + XPUSHs(sv_2mortal(newSVpv((char*)ID, 0))); + PUTBACK; + + count = call_sv(*func, G_SCALAR | G_EVAL); + + SPAGAIN; + + if (count == 0) { + croak("external entity handler did not return a value"); + } + + if (SvTRUE(ERRSV)) { + (void) POPs; + croak_obj; + } + + results = POPs; + + results_pv = SvPV(results, results_len); + input_buf = xmlParserInputBufferCreateMem( + results_pv, + results_len, + XML_CHAR_ENCODING_NONE + ); + + PUTBACK; + FREETMPS; + LEAVE; + + return xmlNewIOInputStream(ctxt, input_buf, XML_CHAR_ENCODING_NONE); + } + else { + if (URL == NULL) { + return NULL; + } + return xmlNewInputFromFile(ctxt, URL); + } +} + +/* **************************************************************** + * Helper functions + * **************************************************************** */ + +HV* +LibXML_init_parser( SV * self, xmlParserCtxtPtr ctxt ) { + /* we fetch all switches and callbacks from the hash */ + HV* real_obj = NULL; + SV** item = NULL; + int parserOptions = XML_PARSE_NODICT; + + /* A NOTE ABOUT xmlInitParser(); */ + /* xmlInitParser() should be used only at startup and*/ + /* not for initializing a single parser. libxml2's */ + /* documentation is quite clear about this. If */ + /* something fails it is a problem elsewhere. Simply */ + /* resetting the entire module will lead to unwanted */ + /* results in server environments, such as if */ + /* mod_perl is used together with php's xml module. */ + /* calling xmlInitParser() here is definitely wrong! */ + /* xmlInitParser(); */ + +#ifndef WITH_SERRORS + xmlGetWarningsDefaultValue = 0; +#endif + if ( self != NULL ) { + /* first fetch the values from the hash */ + real_obj = (HV *)SvRV(self); + + item = hv_fetch( real_obj, "XML_LIBXML_PARSER_OPTIONS", 25, 0 ); + if (item != NULL && SvOK(*item)) parserOptions = sv_2iv(*item); + + /* compatibility with old implementation: + absence of XML_PARSE_DTDLOAD (load_ext_dtd) implies absence of + all DTD related flags + */ + if ((parserOptions & XML_PARSE_DTDLOAD) == 0) { + parserOptions &= ~(XML_PARSE_DTDVALID | XML_PARSE_DTDATTR | XML_PARSE_NOENT ); + } + if (ctxt) xmlCtxtUseOptions(ctxt, parserOptions ); /* Note: sets ctxt->linenumbers = 1 */ + + /* + * Without this if/else conditional, NOBLANKS has no effect. + * + * For more information, see: + * + * https://rt.cpan.org/Ticket/Display.html?id=76696 + * + * */ + if (parserOptions & XML_PARSE_NOBLANKS) { + xmlKeepBlanksDefault(0); + } + else { + xmlKeepBlanksDefault(1); + } + + item = hv_fetch( real_obj, "XML_LIBXML_LINENUMBERS", 22, 0 ); + if ( item != NULL && SvTRUE(*item) ) { + if (ctxt) ctxt->linenumbers = 1; + } + else { + if (ctxt) ctxt->linenumbers = 0; + } + + if(EXTERNAL_ENTITY_LOADER_FUNC == NULL) + { + item = hv_fetch(real_obj, "ext_ent_handler", 15, 0); + if (item != NULL && SvTRUE(*item)) { + LibXML_old_ext_ent_loader = xmlGetExternalEntityLoader(); + xmlSetExternalEntityLoader( (xmlExternalEntityLoader)LibXML_load_external_entity ); + } + else + { + if (parserOptions & XML_PARSE_NONET) + { + LibXML_old_ext_ent_loader = xmlGetExternalEntityLoader(); + xmlSetExternalEntityLoader( xmlNoNetExternalEntityLoader ); + } + /* LibXML_old_ext_ent_loader = NULL; */ + } + } + } + + return real_obj; +} + +void +LibXML_cleanup_parser() { +#ifndef WITH_SERRORS + xmlGetWarningsDefaultValue = 0; +#endif + if (EXTERNAL_ENTITY_LOADER_FUNC == NULL && LibXML_old_ext_ent_loader != NULL) + { + xmlSetExternalEntityLoader( (xmlExternalEntityLoader)LibXML_old_ext_ent_loader ); + } +} + +int +LibXML_test_node_name( xmlChar * name ) +{ + xmlChar * cur = name; + int tc = 0; + int len = 0; + + if ( cur == NULL || *cur == 0 ) { + /* warn("name is empty" ); */ + return(0); + } + + tc = domParseChar( cur, &len ); + + if ( !( IS_LETTER( tc ) || (tc == '_') || (tc == ':')) ) { + /* warn( "is not a letter\n" ); */ + return(0); + } + + tc = 0; + cur += len; + + while (*cur != 0 ) { + tc = domParseChar( cur, &len ); + + if (!(IS_LETTER(tc) || IS_DIGIT(tc) || (tc == '_') || + (tc == '-') || (tc == ':') || (tc == '.') || + IS_COMBINING(tc) || IS_EXTENDER(tc)) ) { + /* warn( "is not a letter\n" ); */ + return(0); + } + tc = 0; + cur += len; + } + + /* warn("name is ok"); */ + return(1); +} + +/* Assumes that the node has a proxy. */ +static void +LibXML_reparent_removed_node(xmlNodePtr node) { + /* + * Attribute nodes can't be added to document fragments. Adding + * DTD nodes would cause a memory leak. + */ + if (node->type != XML_ATTRIBUTE_NODE + && node->type != XML_DTD_NODE) { + ProxyNodePtr docfrag = PmmNewFragment(node->doc); + xmlAddChild(PmmNODE(docfrag), node); + PmmFixOwner(PmmPROXYNODE(node), docfrag); + } +} + +static void +LibXML_set_int_subset(xmlDocPtr doc, xmlNodePtr dtd) { + xmlNodePtr old_dtd = (xmlNodePtr)doc->intSubset; + if (old_dtd == dtd) { + return; + } + + if (old_dtd != NULL) { + xmlUnlinkNode(old_dtd); + + if (PmmPROXYNODE(old_dtd) == NULL) { + xmlFreeDtd((xmlDtdPtr)old_dtd); + } + } + + doc->intSubset = (xmlDtdPtr)dtd; +} + +/* **************************************************************** + * XPathContext helper functions + * **************************************************************** */ + +/* Temporary node pool: * + * Stores pnode in context node-pool hash table in order to preserve * + * at least one reference. * + * If pnode is NULL, only return current value for hashkey */ +static SV* +LibXML_XPathContext_pool ( xmlXPathContextPtr ctxt, void * hashkey, SV * pnode ) { + SV ** value; + SV * key; + STRLEN len; + char * strkey; + dTHX; + + if (XPathContextDATA(ctxt)->pool == NULL) { + if (pnode == NULL) { + return &PL_sv_undef; + } else { + xs_warn("initializing node pool"); + XPathContextDATA(ctxt)->pool = newHV(); + } + } + + key = newSViv(PTR2IV(hashkey)); + strkey = SvPV(key, len); + if (pnode != NULL && !hv_exists(XPathContextDATA(ctxt)->pool,strkey,len)) { + value = hv_store(XPathContextDATA(ctxt)->pool,strkey,len, SvREFCNT_inc(pnode),0); + } else { + value = hv_fetch(XPathContextDATA(ctxt)->pool,strkey,len, 0); + } + SvREFCNT_dec(key); + + if (value == NULL) { + return &PL_sv_undef; + } else { + return *value; + } +} + +/* convert perl result structures to LibXML structures */ +static xmlXPathObjectPtr +LibXML_perldata_to_LibXMLdata(xmlXPathParserContextPtr ctxt, + SV* perl_result) { + dTHX; + + if (!SvOK(perl_result)) { + return (xmlXPathObjectPtr)xmlXPathNewCString(""); + } + if (SvROK(perl_result) && + SvTYPE(SvRV(perl_result)) == SVt_PVAV) { + /* consider any array ref to be a nodelist */ + int i; + int length; + SV ** pnode; + AV * array_result; + xmlXPathObjectPtr ret; + + ret = (xmlXPathObjectPtr) xmlXPathNewNodeSet(INT2PTR(xmlNodePtr,NULL)); + array_result = (AV*)SvRV(perl_result); + length = av_len(array_result); + for( i = 0; i <= length ; i++ ) { + pnode = av_fetch(array_result,i,0); + if (pnode != NULL && sv_isobject(*pnode) && + sv_derived_from(*pnode,"XML::LibXML::Node")) { + xmlXPathNodeSetAdd(ret->nodesetval, + INT2PTR(xmlNodePtr,PmmSvNode(*pnode))); + if(ctxt) { + LibXML_XPathContext_pool(ctxt->context, + PmmSvNode(*pnode), *pnode); + } + } else { + warn("XPathContext: ignoring non-node member of a nodelist"); + } + } + return ret; + } else if (sv_isobject(perl_result) && + (SvTYPE(SvRV(perl_result)) == SVt_PVMG)) + { + if (sv_derived_from(perl_result, "XML::LibXML::Node")) { + xmlNodePtr tmp_node; + xmlXPathObjectPtr ret; + + ret = INT2PTR(xmlXPathObjectPtr,xmlXPathNewNodeSet(NULL)); + tmp_node = INT2PTR(xmlNodePtr,PmmSvNode(perl_result)); + xmlXPathNodeSetAdd(ret->nodesetval,tmp_node); + if(ctxt) { + LibXML_XPathContext_pool(ctxt->context, PmmSvNode(perl_result), + perl_result); + } + + return ret; + } + else if (sv_isa(perl_result, "XML::LibXML::Boolean")) { + return (xmlXPathObjectPtr) + xmlXPathNewBoolean(SvIV(SvRV(perl_result))); + } + else if (sv_isa(perl_result, "XML::LibXML::Literal")) { + return (xmlXPathObjectPtr) + xmlXPathNewCString(SvPV_nolen(SvRV(perl_result))); + } + else if (sv_isa(perl_result, "XML::LibXML::Number")) { + return (xmlXPathObjectPtr) + xmlXPathNewFloat(SvNV(SvRV(perl_result))); + } + } else if (SvNOK(perl_result) || SvIOK(perl_result)) { + return (xmlXPathObjectPtr)xmlXPathNewFloat(SvNV(perl_result)); + } else { + return (xmlXPathObjectPtr) + xmlXPathNewCString(SvPV_nolen(perl_result)); + } + return NULL; +} + + +/* save XPath context and XPathContextDATA for recursion */ +static xmlXPathContextPtr +LibXML_save_context(xmlXPathContextPtr ctxt) +{ + xmlXPathContextPtr copy; + copy = xmlMalloc(sizeof(xmlXPathContext)); + if (copy) { + /* backup ctxt */ + memcpy(copy, ctxt, sizeof(xmlXPathContext)); + /* clear namespaces so that they are not freed and overwritten + by configure_namespaces */ + ctxt->namespaces = NULL; + /* backup data */ + copy->user = xmlMalloc(sizeof(XPathContextData)); + if (XPathContextDATA(copy)) { + memcpy(XPathContextDATA(copy), XPathContextDATA(ctxt),sizeof(XPathContextData)); + /* clear ctxt->pool, so that it is not used freed during re-entrance */ + XPathContextDATA(ctxt)->pool = NULL; + } + } + return copy; +} + +/* restore XPath context and XPathContextDATA from a saved copy */ +static void +LibXML_restore_context(xmlXPathContextPtr ctxt, xmlXPathContextPtr copy) +{ + dTHX; + /* cleanup */ + if (XPathContextDATA(ctxt)) { + /* cleanup newly created pool */ + if (XPathContextDATA(ctxt)->pool != NULL && + SvOK(XPathContextDATA(ctxt)->pool)) { + SvREFCNT_dec((SV *)XPathContextDATA(ctxt)->pool); + } + } + if (ctxt->namespaces) { + /* free namespaces allocated during recursion */ + xmlFree( ctxt->namespaces ); + } + + /* restore context */ + if (copy) { + /* 1st restore our data */ + if (XPathContextDATA(copy)) { + memcpy(XPathContextDATA(ctxt),XPathContextDATA(copy),sizeof(XPathContextData)); + xmlFree(XPathContextDATA(copy)); + copy->user = XPathContextDATA(ctxt); + } + /* now copy the rest */ + memcpy(ctxt, copy, sizeof(xmlXPathContext)); + xmlFree(copy); + } +} + + +/* **************************************************************** + * Variable Lookup + * **************************************************************** */ +/* Much of the code is borrowed from Matt Sergeant's XML::LibXSLT */ +static xmlXPathObjectPtr +LibXML_generic_variable_lookup(void* varLookupData, + const xmlChar *name, + const xmlChar *ns_uri) +{ + xmlXPathObjectPtr ret; + xmlXPathContextPtr ctxt; + xmlXPathContextPtr copy; + XPathContextDataPtr data; + I32 count; + dTHX; + dSP; + + ctxt = (xmlXPathContextPtr) varLookupData; + if ( ctxt == NULL ) + croak("XPathContext: missing xpath context"); + data = XPathContextDATA(ctxt); + if ( data == NULL ) + croak("XPathContext: missing xpath context private data"); + if ( data->varLookup == NULL || !SvROK(data->varLookup) || + SvTYPE(SvRV(data->varLookup)) != SVt_PVCV ) + croak("XPathContext: lost variable lookup function!"); + + ENTER; + SAVETMPS; + PUSHMARK(SP); + + XPUSHs( (data->varData != NULL) ? data->varData : &PL_sv_undef ); + XPUSHs(sv_2mortal(C2Sv(name,NULL))); + XPUSHs(sv_2mortal(C2Sv(ns_uri,NULL))); + + /* save context to allow recursive usage of XPathContext */ + copy = LibXML_save_context(ctxt); + + PUTBACK ; + count = call_sv(data->varLookup, G_SCALAR|G_EVAL); + SPAGAIN; + + /* restore the xpath context */ + LibXML_restore_context(ctxt, copy); + + if (SvTRUE(ERRSV)) { + (void) POPs; + croak_obj; + } + if (count != 1) croak("XPathContext: variable lookup function returned none or more than one argument!"); + + ret = LibXML_perldata_to_LibXMLdata(NULL, POPs); + + PUTBACK; + FREETMPS; + LEAVE; + return ret; +} + +/* **************************************************************** + * Generic Extension Function + * **************************************************************** */ +/* Much of the code is borrowed from Matt Sergeant's XML::LibXSLT */ +static void +LibXML_generic_extension_function(xmlXPathParserContextPtr ctxt, int nargs) +{ + xmlXPathObjectPtr obj,ret; + xmlNodeSetPtr nodelist = NULL; + int count; + SV * perl_dispatch; + int i; + STRLEN len; + ProxyNodePtr owner = NULL; + SV *key; + char *strkey; + const char *function, *uri; + SV **perl_function; + dTHX; + dSP; + SV * data; + xmlXPathContextPtr copy; + + /* warn("entered LibXML_generic_extension_function for %s\n",ctxt->context->function); */ + data = (SV *) ctxt->context->funcLookupData; + if (ctxt->context->funcLookupData == NULL || !SvROK(data) || + SvTYPE(SvRV(data)) != SVt_PVHV) { + croak("XPathContext: lost function lookup data structure!"); + } + + function = (char*) ctxt->context->function; + uri = (char*) ctxt->context->functionURI; + + key = newSVpvn("",0); + if (uri && *uri) { + sv_catpv(key, "{"); + sv_catpv(key, (const char*)uri); + sv_catpv(key, "}"); + } + sv_catpv(key, (const char*)function); + strkey = SvPV(key, len); + perl_function = + hv_fetch((HV*)SvRV(data), strkey, len, 0); + if ( perl_function == NULL || !SvOK(*perl_function) || + !(SvPOK(*perl_function) || + (SvROK(*perl_function) && + SvTYPE(SvRV(*perl_function)) == SVt_PVCV))) { + croak("XPathContext: lost perl extension function!"); + } + SvREFCNT_dec(key); + + ENTER; + SAVETMPS; + PUSHMARK(SP); + + XPUSHs(*perl_function); + + /* set up call to perl dispatcher function */ + for (i = 0; i < nargs; i++) { + obj = (xmlXPathObjectPtr)valuePop(ctxt); + switch (obj->type) { + case XPATH_XSLT_TREE: + case XPATH_NODESET: + nodelist = obj->nodesetval; + if ( nodelist ) { + XPUSHs(sv_2mortal(newSVpv("XML::LibXML::NodeList", 0))); + XPUSHs(sv_2mortal(newSViv(nodelist->nodeNr))); + if ( nodelist->nodeNr > 0 ) { + int j; + const char * cls = "XML::LibXML::Node"; + xmlNodePtr tnode; + SV * element; + int l = nodelist->nodeNr; + + for( j = 0 ; j < l; j++){ + tnode = nodelist->nodeTab[j]; + if( tnode != NULL && tnode->doc != NULL) { + owner = PmmOWNERPO(PmmNewNode(INT2PTR(xmlNodePtr,tnode->doc))); + } else { + owner = NULL; + } + if (tnode->type == XML_NAMESPACE_DECL) { + element = NEWSV(0,0); + cls = PmmNodeTypeName( tnode ); + element = sv_setref_pv( element, + (const char *)cls, + (void *)xmlCopyNamespace((xmlNsPtr)tnode) + ); + } + else { + element = PmmNodeToSv(tnode, owner); + } + XPUSHs( sv_2mortal(element) ); + } + } + } else { + /* PP: We can't simply leave out an empty nodelist as Matt does! */ + /* PP: The number of arguments must match! */ + XPUSHs(sv_2mortal(newSVpv("XML::LibXML::NodeList", 0))); + XPUSHs(sv_2mortal(newSViv(0))); + } + /* prevent libxml2 from freeing the actual nodes */ + if (obj->boolval) obj->boolval=0; + break; + case XPATH_BOOLEAN: + XPUSHs(sv_2mortal(newSVpv("XML::LibXML::Boolean", 0))); + XPUSHs(sv_2mortal(newSViv(obj->boolval))); + break; + case XPATH_NUMBER: + XPUSHs(sv_2mortal(newSVpv("XML::LibXML::Number", 0))); + XPUSHs(sv_2mortal(newSVnv(obj->floatval))); + break; + case XPATH_STRING: + XPUSHs(sv_2mortal(newSVpv("XML::LibXML::Literal", 0))); + XPUSHs(sv_2mortal(C2Sv(obj->stringval, 0))); + break; + default: + warn("Unknown XPath return type (%d) in call to {%s}%s - assuming string", obj->type, uri, function); + XPUSHs(sv_2mortal(newSVpv("XML::LibXML::Literal", 0))); + XPUSHs(sv_2mortal(C2Sv(xmlXPathCastToString(obj), 0))); + } + xmlXPathFreeObject(obj); + } + + /* save context to allow recursive usage of XPathContext */ + copy = LibXML_save_context(ctxt->context); + + /* call perl dispatcher */ + PUTBACK; + perl_dispatch = sv_2mortal(newSVpv("XML::LibXML::XPathContext::_perl_dispatcher",0)); + count = call_sv(perl_dispatch, G_SCALAR|G_EVAL); + SPAGAIN; + + /* restore the xpath context */ + LibXML_restore_context(ctxt->context, copy); + + if (SvTRUE(ERRSV)) { + (void) POPs; + croak_obj; + } + + if (count != 1) croak("XPathContext: perl-dispatcher in pm file returned none or more than one argument!"); + + ret = LibXML_perldata_to_LibXMLdata(ctxt, POPs); + + valuePush(ctxt, ret); + PUTBACK; + FREETMPS; + LEAVE; +} + +static void +LibXML_configure_namespaces( xmlXPathContextPtr ctxt ) { + xmlNodePtr node = ctxt->node; + + if (ctxt->namespaces != NULL) { + xmlFree( ctxt->namespaces ); + ctxt->namespaces = NULL; + } + if (node != NULL) { + if (node->type == XML_DOCUMENT_NODE) { + ctxt->namespaces = xmlGetNsList( node->doc, + xmlDocGetRootElement( node->doc ) ); + } else { + ctxt->namespaces = xmlGetNsList(node->doc, node); + } + ctxt->nsNr = 0; + if (ctxt->namespaces != NULL) { + int cur=0; + xmlNsPtr ns; + /* we now walk through the list and + drop every ns that was declared via registration */ + while (ctxt->namespaces[cur] != NULL) { + ns = ctxt->namespaces[cur]; + if (ns->prefix==NULL || + xmlHashLookup(ctxt->nsHash, ns->prefix) != NULL) { + /* drop it */ + ctxt->namespaces[cur]=NULL; + } else { + if (cur != ctxt->nsNr) { + /* move the item to the new tail */ + ctxt->namespaces[ctxt->nsNr]=ns; + ctxt->namespaces[cur]=NULL; + } + ctxt->nsNr++; + } + cur++; + } + } + } +} + +static void +LibXML_configure_xpathcontext( xmlXPathContextPtr ctxt ) { + xmlNodePtr node = PmmSvNode(XPathContextDATA(ctxt)->node); + + if (node != NULL) { + ctxt->doc = node->doc; + } else { + ctxt->doc = NULL; + } + ctxt->node = node; + LibXML_configure_namespaces(ctxt); +} + +#ifdef HAVE_READER_SUPPORT + +static void +LibXML_set_reader_preserve_flag( xmlTextReaderPtr reader ) { + HV *hash; + char key[32]; + + hash = get_hv("XML::LibXML::Reader::_preserve_flag", 0); + if (!hash) { + return; + } + + (void) snprintf(key, sizeof(key), "%p", reader); + (void) hv_store(hash, key, strlen(key), newSV(0), 0); +} + +static int +LibXML_get_reader_preserve_flag( xmlTextReaderPtr reader ) { + HV *hash; + char key[32]; + + hash = get_hv("XML::LibXML::Reader::_preserve_flag", 0); + if (!hash) { + return 0; + } + + (void) snprintf(key, sizeof(key), "%p", reader); + if ( hv_exists(hash, key, strlen(key)) ) { + (void) hv_delete(hash, key, strlen(key), G_DISCARD); + return 1; + } + + return 0; +} + +#endif /* HAVE_READER_SUPPORT */ + +extern void boot_XML__LibXML__Devel(pTHX_ CV*); + +MODULE = XML::LibXML PACKAGE = XML::LibXML + +PROTOTYPES: DISABLE + +BOOT: + /* Load Devel first, so debug_memory can + be called before any allocation. */ + + /* The ++ is a bit hacky, but boot_blahblah_Devel, being an + * XSUB body, will try to pop once more the mark we have just + * (implicitly) popped, this boot sector also being an XSUB body */ + PL_markstack_ptr++; + boot_XML__LibXML__Devel(aTHX_ cv); + LIBXML_TEST_VERSION + xmlInitParser(); + PmmSAXInitialize(aTHX); +#ifndef WITH_SERRORS + xmlGetWarningsDefaultValue = 0; +#endif +#ifdef LIBXML_CATALOG_ENABLED + /* xmlCatalogSetDebug(10); */ + xmlInitializeCatalog(); /* use catalog data */ +#endif + + +void +_CLONE( class ) + CODE: +#ifdef XML_LIBXML_THREADS + if( PmmUSEREGISTRY ) + PmmCloneProxyNodes(); +#endif + +int +_leaked_nodes() + CODE: + RETVAL = 0; +#ifdef XML_LIBXML_THREADS + if( PmmUSEREGISTRY ) + RETVAL = PmmProxyNodeRegistrySize(); +#endif + OUTPUT: + RETVAL + +void +_dump_registry() + PPCODE: +#ifdef XML_LIBXML_THREADS + if( PmmUSEREGISTRY ) + PmmDumpRegistry(PmmREGISTRY); +#endif + +const char * +LIBXML_DOTTED_VERSION() + CODE: + RETVAL = LIBXML_DOTTED_VERSION; + OUTPUT: + RETVAL + + +int +LIBXML_VERSION() + CODE: + RETVAL = LIBXML_VERSION; + OUTPUT: + RETVAL + +int +HAVE_STRUCT_ERRORS() + CODE: +#ifdef WITH_SERRORS + RETVAL = 1; +#else + RETVAL = 0; +#endif + OUTPUT: + RETVAL + +int +HAVE_SCHEMAS() + CODE: +#ifdef HAVE_SCHEMAS + RETVAL = 1; +# if LIBXML_VERSION == 20904 + /* exists but broken https://github.com/shlomif/libxml2-2.9.4-reader-schema-regression */ + RETVAL = 0; +# endif +#else + RETVAL = 0; +#endif + OUTPUT: + RETVAL + +int +HAVE_READER() + CODE: +#ifdef HAVE_READER_SUPPORT + RETVAL = 1; +#else + RETVAL = 0; +#endif + OUTPUT: + RETVAL + +int +HAVE_THREAD_SUPPORT() + CODE: +#ifdef XML_LIBXML_THREADS + RETVAL = (PmmUSEREGISTRY ? 1 : 0); +#else + RETVAL = 0; +#endif + OUTPUT: + RETVAL + + +const char * +LIBXML_RUNTIME_VERSION() + CODE: + RETVAL = xmlParserVersion; + OUTPUT: + RETVAL + +void +END() + CODE: + xmlCleanupParser(); + +int +INIT_THREAD_SUPPORT() + CODE: +#ifdef XML_LIBXML_THREADS + SV *threads = get_sv("threads::threads", 0); /* no create */ + if( threads && SvOK(threads) && SvTRUE(threads) ) { + PROXY_NODE_REGISTRY_MUTEX = get_sv("XML::LibXML::__PROXY_NODE_REGISTRY_MUTEX",0); + RETVAL = 1; + } else { + croak("XML::LibXML ':threads_shared' can only be used after 'use threads'"); + } +#else + RETVAL = 0; +#endif + OUTPUT: + RETVAL + +void +DISABLE_THREAD_SUPPORT() + CODE: +#ifdef XML_LIBXML_THREADS + PROXY_NODE_REGISTRY_MUTEX = NULL; +#else + croak("XML::LibXML compiled without threads!"); +#endif + +SV* +_parse_string(self, string, dir = &PL_sv_undef) + SV * self + SV * string + SV * dir + PREINIT: + char * directory = NULL; + STRLEN len; + const char * ptr; + HV * real_obj; + int well_formed; + int valid; + int validate; + xmlDocPtr real_doc; + int recover = 0; + PREINIT_SAVED_ERROR + INIT: + if (SvPOK(dir)) { + directory = SvPV(dir, len); + if (len <= 0) { + directory = NULL; + } + } + /* If string is a reference to a string - dereference it. + * See: https://rt.cpan.org/Ticket/Display.html?id=64051 (broke it) + * https://rt.cpan.org/Ticket/Display.html?id=77864 (fixed it) */ + if (SvROK(string) && !SvOBJECT(SvRV(string))) { + string = SvRV(string); + } + ptr = SvPV_const(string, len); + if (len <= 0) { + croak("Empty string\n"); + XSRETURN_UNDEF; + } + CODE: + RETVAL = &PL_sv_undef; + INIT_ERROR_HANDLER; + { + xmlParserCtxtPtr ctxt = xmlCreateMemoryParserCtxt(ptr, len); + if (ctxt == NULL) { + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(1); + croak("Could not create memory parser context!\n"); + } + xs_warn( "context created\n"); + real_obj = LibXML_init_parser(self, ctxt); + recover = LibXML_get_recover(real_obj); + + + if ( directory != NULL ) { + ctxt->directory = directory; + } + ctxt->_private = (void*)self; + + /* make libxml2-2.6 display line number on error */ + if ( ctxt->input != NULL ) { + if (directory != NULL) { + ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) directory); + } else { + ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) ""); + } + } + + xs_warn( "context initialized\n" ); + + xmlParseDocument(ctxt); + xs_warn( "document parsed \n"); + + ctxt->directory = NULL; + well_formed = ctxt->wellFormed; + valid = ctxt->valid; + validate = ctxt->validate; + real_doc = ctxt->myDoc; + ctxt->myDoc = NULL; + xmlFreeParserCtxt(ctxt); + } + if ( real_doc != NULL ) { + if (real_doc->URL != NULL) { /* free "" assigned above */ + xmlFree((char*) real_doc->URL); + real_doc->URL = NULL; + } + + if ( directory == NULL ) { + SV * newURI = sv_2mortal(newSVpvf("unknown-%p", (void*)real_doc)); + real_doc->URL = xmlStrdup((const xmlChar*)SvPV_nolen(newURI)); + } else { + real_doc->URL = xmlStrdup((const xmlChar*)directory); + } + if ( ! LibXML_will_die_ctx(saved_error, recover) && + (recover || ( well_formed && + ( !validate + || ( valid || ( real_doc->intSubset == NULL + && real_doc->extSubset == NULL )))))) { + RETVAL = LibXML_NodeToSv( real_obj, INT2PTR(xmlNodePtr,real_doc) ); + } else { + xmlFreeDoc(real_doc); + real_doc=NULL; + } + } + + LibXML_cleanup_parser(); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(recover); + OUTPUT: + RETVAL + +int +_parse_sax_string(self, string) + SV * self + SV * string + PREINIT: + STRLEN len; + char * ptr; + HV * real_obj; + int recover = 0; + PREINIT_SAVED_ERROR + INIT: + ptr = SvPV(string, len); + if (len <= 0) { + croak("Empty string\n"); + XSRETURN_UNDEF; + } + CODE: + RETVAL = 0; + INIT_ERROR_HANDLER; + + { + xmlParserCtxtPtr ctxt = xmlCreateMemoryParserCtxt((const char*)ptr, len); + if (ctxt == NULL) { + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(recover ? recover : 1); + croak("Could not create memory parser context!\n"); + } + xs_warn( "context created\n"); + real_obj = LibXML_init_parser(self, ctxt); + recover = LibXML_get_recover(real_obj); + + PmmSAXInitContext( ctxt, self, saved_error ); + xs_warn( "context initialized \n"); + { + RETVAL = xmlParseDocument(ctxt); + xs_warn( "document parsed \n"); + } + + PmmSAXCloseContext(ctxt); + xmlFreeParserCtxt(ctxt); + } + + LibXML_cleanup_parser(); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(recover); + OUTPUT: + RETVAL + +SV* +_parse_fh(self, fh, dir = &PL_sv_undef) + SV * self + SV * fh + SV * dir + PREINIT: + STRLEN len; + char * directory = NULL; + HV * real_obj; + int well_formed; + int valid; + int validate; + xmlDocPtr real_doc; + int recover = 0; + PREINIT_SAVED_ERROR + INIT: + if (SvPOK(dir)) { + directory = SvPV(dir, len); + if (len <= 0) { + directory = NULL; + } + } + CODE: + RETVAL = &PL_sv_undef; + INIT_ERROR_HANDLER; + + { + int read_length; + char buffer[1024]; + xmlParserCtxtPtr ctxt; + + read_length = LibXML_read_perl(fh, buffer, 4); + if (read_length <= 0) { + CLEANUP_ERROR_HANDLER; + croak( "Empty Stream\n" ); + } + + ctxt = xmlCreatePushParserCtxt(NULL, NULL, buffer, read_length, NULL); + if (ctxt == NULL) { + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(1); + croak("Could not create xml push parser context!\n"); + } + xs_warn( "context created\n"); + real_obj = LibXML_init_parser(self, ctxt); + recover = LibXML_get_recover(real_obj); +#if LIBXML_VERSION > 20600 + /* dictionaries not support yet */ + ctxt->dictNames = 0; +#endif + if ( directory != NULL ) { + ctxt->directory = directory; + } + ctxt->_private = (void*)self; + xs_warn( "context initialized \n"); + { + int ret; + while ((read_length = LibXML_read_perl(fh, buffer, 1024))) { + ret = xmlParseChunk(ctxt, buffer, read_length, 0); + if ( ret != 0 ) { + break; + } + } + ret = xmlParseChunk(ctxt, buffer, 0, 1); + xs_warn( "document parsed \n"); + } + + ctxt->directory = NULL; + well_formed = ctxt->wellFormed; + valid = ctxt->valid; + validate = ctxt->validate; + real_doc = ctxt->myDoc; + ctxt->myDoc = NULL; + xmlFreeParserCtxt(ctxt); + } + + if ( real_doc != NULL ) { + + if ( directory == NULL ) { + SV * newURI = sv_2mortal(newSVpvf("unknown-%p", (void*)real_doc)); + real_doc->URL = xmlStrdup((const xmlChar*)SvPV_nolen(newURI)); + } else { + real_doc->URL = xmlStrdup((const xmlChar*)directory); + } + + if ( ! LibXML_will_die_ctx(saved_error, recover) && + (recover || ( well_formed && + ( !validate + || ( valid || ( real_doc->intSubset == NULL + && real_doc->extSubset == NULL )))))) { + RETVAL = LibXML_NodeToSv( real_obj, INT2PTR(xmlNodePtr,real_doc) ); + } else { + xmlFreeDoc(real_doc); + real_doc=NULL; + } + } + + LibXML_cleanup_parser(); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(recover); + OUTPUT: + RETVAL + +void +_parse_sax_fh(self, fh, dir = &PL_sv_undef) + SV * self + SV * fh + SV * dir + PREINIT: + STRLEN len; + char * directory = NULL; + HV * real_obj; + int recover = 0; + PREINIT_SAVED_ERROR + INIT: + if (SvPOK(dir)) { + directory = SvPV(dir, len); + if (len <= 0) { + directory = NULL; + } + } + CODE: + INIT_ERROR_HANDLER; + { + int read_length; + char buffer[1024]; + xmlSAXHandlerPtr sax; + xmlParserCtxtPtr ctxt; + + read_length = LibXML_read_perl(fh, buffer, 4); + if (read_length <= 0) { + CLEANUP_ERROR_HANDLER; + croak( "Empty Stream\n" ); + } + + sax = PSaxGetHandler(); + ctxt = xmlCreatePushParserCtxt(sax, NULL, buffer, read_length, NULL); + if (ctxt == NULL) { + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(recover ? recover : 1); + croak("Could not create xml push parser context!\n"); + } + xs_warn( "context created\n"); + real_obj = LibXML_init_parser(self, ctxt); + recover = LibXML_get_recover(real_obj); + + if ( directory != NULL ) { + ctxt->directory = directory; + } + PmmSAXInitContext( ctxt, self, saved_error ); + xs_warn( "context initialized \n"); + + { + int ret; + while ((read_length = LibXML_read_perl(fh, buffer, 1024))) { + ret = xmlParseChunk(ctxt, buffer, read_length, 0); + if ( ret != 0 ) { + break; + } + } + ret = xmlParseChunk(ctxt, buffer, 0, 1); + xs_warn( "document parsed \n"); + } + + ctxt->directory = NULL; + xmlFree(ctxt->sax); + ctxt->sax = NULL; + xmlFree(sax); + PmmSAXCloseContext(ctxt); + xmlFreeParserCtxt(ctxt); + } + CLEANUP_ERROR_HANDLER; + LibXML_cleanup_parser(); + REPORT_ERROR(recover); + +SV* +_parse_file(self, filename_sv) + SV * self + SV * filename_sv + PREINIT: + STRLEN len; + char * filename; + HV * real_obj; + int well_formed; + int valid; + int validate; + xmlDocPtr real_doc; + int recover = 0; + PREINIT_SAVED_ERROR + INIT: + filename = SvPV(filename_sv, len); + if (len <= 0) { + croak("Empty filename\n"); + XSRETURN_UNDEF; + } + CODE: + RETVAL = &PL_sv_undef; + INIT_ERROR_HANDLER; + + { + xmlParserCtxtPtr ctxt = xmlCreateFileParserCtxt(filename); + if (ctxt == NULL) { + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(1); + croak("Could not create file parser context for file \"%s\": %s\n", + filename, strerror(errno)); + } + xs_warn( "context created\n"); + real_obj = LibXML_init_parser(self, ctxt); + recover = LibXML_get_recover(real_obj); + + ctxt->_private = (void*)self; + + xs_warn( "context initialized\n" ); + xmlParseDocument(ctxt); + xs_warn( "document parsed \n"); + + well_formed = ctxt->wellFormed; + valid = ctxt->valid; + validate = ctxt->validate; + real_doc = ctxt->myDoc; + ctxt->myDoc = NULL; + xmlFreeParserCtxt(ctxt); + } + + if ( real_doc != NULL ) { + if ( ! LibXML_will_die_ctx(saved_error, recover) && + (recover || ( well_formed && + ( !validate + || ( valid || ( real_doc->intSubset == NULL + && real_doc->extSubset == NULL )))))) { + RETVAL = LibXML_NodeToSv( real_obj, INT2PTR(xmlNodePtr,real_doc) ); + } else { + xmlFreeDoc(real_doc); + real_doc=NULL; + } + } + + LibXML_cleanup_parser(); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(recover); + OUTPUT: + RETVAL + +void +_parse_sax_file(self, filename_sv) + SV * self + SV * filename_sv + PREINIT: + STRLEN len; + char * filename; + HV * real_obj; + int recover = 0; + PREINIT_SAVED_ERROR + INIT: + filename = SvPV(filename_sv, len); + if (len <= 0) { + croak("Empty filename\n"); + XSRETURN_UNDEF; + } + CODE: + INIT_ERROR_HANDLER; + + { + xmlParserCtxtPtr ctxt = xmlCreateFileParserCtxt(filename); + if (ctxt == NULL) { + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(recover ? recover : 1); + croak("Could not create file parser context for file \"%s\": %s\n", + filename, strerror(errno)); + } + xs_warn( "context created\n"); + real_obj = LibXML_init_parser(self, ctxt); + recover = LibXML_get_recover(real_obj); + + ctxt->sax = PSaxGetHandler(); + PmmSAXInitContext( ctxt, self, saved_error ); + xs_warn( "context initialized \n"); + + { + xmlParseDocument(ctxt); + xs_warn( "document parsed \n"); + } + + PmmSAXCloseContext(ctxt); + xmlFreeParserCtxt(ctxt); + } + + LibXML_cleanup_parser(); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(recover); + +SV* +_parse_html_string(self, string, svURL, svEncoding, options = 0) + SV * self + SV * string + SV * svURL + SV * svEncoding + int options + PREINIT: + STRLEN len; + char * ptr; + char* URL = NULL; + const char * encoding = NULL; + HV * real_obj; + htmlDocPtr real_doc; + int recover = 0; + PREINIT_SAVED_ERROR + INIT: + /* If string is a reference to a string - dereference it. + * See: https://rt.cpan.org/Ticket/Display.html?id=64051 (broke it) + * https://rt.cpan.org/Ticket/Display.html?id=77864 (fixed it) */ + if (SvROK(string) && !SvOBJECT(SvRV(string))) { + string = SvRV(string); + } + ptr = SvPV(string, len); + if (len <= 0) { + croak("Empty string\n"); + XSRETURN_UNDEF; + } + if (SvOK(svURL)) + URL = SvPV_nolen( svURL ); + if (SvOK(svEncoding)) + encoding = SvPV_nolen( svEncoding ); + CODE: + RETVAL = &PL_sv_undef; + INIT_ERROR_HANDLER; + real_obj = LibXML_init_parser(self,NULL); + if (encoding == NULL && SvUTF8( string )) { + encoding = "UTF-8"; + } + if (options & HTML_PARSE_RECOVER) { + recover = ((options & HTML_PARSE_NOERROR) ? 2 : 1); + } +#if LIBXML_VERSION >= 20627 + real_doc = htmlReadDoc((xmlChar*)ptr, URL, encoding, options); +#else + real_doc = htmlParseDoc((xmlChar*)ptr, encoding); + if ( real_doc ) { + if (real_doc->URL) xmlFree((xmlChar *)real_doc->URL); + if (URL) { + real_doc->URL = xmlStrdup((const xmlChar*) URL); + } + } +#endif + if ( real_doc ) { + if (URL==NULL) { + SV * newURI = sv_2mortal(newSVpvf("unknown-%p", (void*)real_doc)); + real_doc->URL = xmlStrdup((const xmlChar*)SvPV_nolen(newURI)); + } + /* This HTML memory parser doesn't use a ctxt; there is no "well-formed" + * distinction, and if it manages to parse the HTML, it returns non-null. */ + RETVAL = LibXML_NodeToSv( real_obj, INT2PTR(xmlNodePtr,real_doc) ); + } + + LibXML_cleanup_parser(); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(recover); + OUTPUT: + RETVAL + + +SV* +_parse_html_file(self, filename_sv, svURL, svEncoding, options = 0) + SV * self + SV * filename_sv + SV * svURL + SV * svEncoding + int options + PREINIT: + STRLEN len; + char * filename; + char * URL = NULL; + char * encoding = NULL; + HV * real_obj; + htmlDocPtr real_doc; + int recover = 0; + PREINIT_SAVED_ERROR + INIT: + filename = SvPV(filename_sv, len); + if (len <= 0) { + croak("Empty filename\n"); + XSRETURN_UNDEF; + } + if (SvOK(svURL)) + URL = SvPV_nolen( svURL ); + if (SvOK(svEncoding)) + encoding = SvPV_nolen( svEncoding ); + CODE: + RETVAL = &PL_sv_undef; + INIT_ERROR_HANDLER; + real_obj = LibXML_init_parser(self,NULL); + if (options & HTML_PARSE_RECOVER) { + recover = ((options & HTML_PARSE_NOERROR) ? 2 : 1); + } +#if LIBXML_VERSION >= 20627 + real_doc = htmlReadFile((const char *)filename, + encoding, + options); +#else + real_doc = htmlParseFile((const char *)filename, encoding); +#endif + if ( real_doc != NULL ) { + + /* This HTML file parser doesn't use a ctxt; there is no "well-formed" + * distinction, and if it manages to parse the HTML, it returns non-null. */ + if (URL) { + if (real_doc->URL) xmlFree((xmlChar*) real_doc->URL); + real_doc->URL = xmlStrdup((const xmlChar*) URL); + } + RETVAL = LibXML_NodeToSv( real_obj, INT2PTR(xmlNodePtr,real_doc) ); + + } + CLEANUP_ERROR_HANDLER; + LibXML_cleanup_parser(); + REPORT_ERROR(recover); + OUTPUT: + RETVAL + +SV* +_parse_html_fh(self, fh, svURL, svEncoding, options = 0) + SV * self + SV * fh + SV * svURL + SV * svEncoding + int options + PREINIT: + HV * real_obj; + htmlDocPtr real_doc; + int recover = 0; + char * URL = NULL; + PREINIT_SAVED_ERROR +#if LIBXML_VERSION >= 20627 + char * encoding = NULL; +#else + xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; +#endif + INIT: + if (SvOK(svURL)) + URL = SvPV_nolen( svURL ); +#if LIBXML_VERSION >= 20627 + if (SvOK(svEncoding)) + encoding = SvPV_nolen( svEncoding ); +#else + if (SvOK(svEncoding)) + enc = xmlParseCharEncoding(SvPV_nolen( svEncoding )); +#endif + CODE: + RETVAL = &PL_sv_undef; + INIT_ERROR_HANDLER; + real_obj = LibXML_init_parser(self,NULL); + if (options & HTML_PARSE_RECOVER) { + recover = ((options & HTML_PARSE_NOERROR) ? 2 : 1); + } +#if LIBXML_VERSION >= 20627 + + real_doc = htmlReadIO((xmlInputReadCallback) LibXML_read_perl, + NULL, + (void *) fh, + URL, + encoding, + options); +#else /* LIBXML_VERSION >= 20627 */ + { + int read_length; + int well_formed; + char buffer[1024]; + htmlParserCtxtPtr ctxt; + + read_length = LibXML_read_perl(fh, buffer, 4); + if (read_length <= 0) { + CLEANUP_ERROR_HANDLER; + croak( "Empty Stream\n" ); + } + ctxt = htmlCreatePushParserCtxt(NULL, NULL, buffer, read_length, + URL, enc); + if (ctxt == NULL) { + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(recover ? recover : 1); + croak("Could not create html push parser context!\n"); + } + ctxt->_private = (void*)self; + { + int ret; + while ((read_length = LibXML_read_perl(fh, buffer, 1024))) { + ret = htmlParseChunk(ctxt, buffer, read_length, 0); + if ( ret != 0 ) { + break; + } + } + ret = htmlParseChunk(ctxt, buffer, 0, 1); + } + well_formed = ctxt->wellFormed; + real_doc = ctxt->myDoc; + ctxt->myDoc = NULL; + htmlFreeParserCtxt(ctxt); + } +#endif /* LIBXML_VERSION >= 20627 */ + if ( real_doc != NULL ) { + if (real_doc->URL) xmlFree((xmlChar*) real_doc->URL); + if (URL) { + real_doc->URL = xmlStrdup((const xmlChar*) URL); + } else { + SV * newURI = sv_2mortal(newSVpvf("unknown-%p", (void*)real_doc)); + real_doc->URL = xmlStrdup((const xmlChar*)SvPV_nolen(newURI)); + } + + RETVAL = LibXML_NodeToSv( real_obj, INT2PTR(xmlNodePtr,real_doc) ); + } + + LibXML_cleanup_parser(); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(recover); + OUTPUT: + RETVAL + +SV* +_parse_xml_chunk(self, svchunk, enc = &PL_sv_undef) + SV * self + SV * svchunk + SV * enc + PREINIT: + STRLEN len; + const char * encoding = "UTF-8"; + HV * real_obj; + int recover = 0; + xmlChar * chunk; + xmlNodePtr rv = NULL; + PREINIT_SAVED_ERROR + INIT: + if (SvPOK(enc)) { + encoding = SvPV(enc, len); + if (len <= 0) { + encoding = "UTF-8"; + } + } + CODE: + RETVAL = &PL_sv_undef; + INIT_ERROR_HANDLER; + real_obj = LibXML_init_parser(self,NULL); + + chunk = Sv2C(svchunk, (const xmlChar*)encoding); + + if ( chunk != NULL ) { + recover = LibXML_get_recover(real_obj); + + rv = domReadWellBalancedString( NULL, chunk, recover ); + + if ( rv != NULL ) { + xmlNodePtr fragment= NULL; + xmlNodePtr rv_end = NULL; + + /* now we append the nodelist to a document + fragment which is unbound to a Document!!!! */ + + /* step 1: create the fragment */ + fragment = xmlNewDocFragment( NULL ); + RETVAL = LibXML_NodeToSv(real_obj, fragment); + + /* step 2: set the node list to the fragment */ + fragment->children = rv; + rv_end = rv; + while ( rv_end->next != NULL ) { + rv_end->parent = fragment; + rv_end = rv_end->next; + } + /* the following line is important, otherwise we'll have + occasional segmentation faults + */ + rv_end->parent = fragment; + fragment->last = rv_end; + } + + /* free the chunk we created */ + xmlFree( chunk ); + } + + LibXML_cleanup_parser(); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(recover); + + if (rv == NULL) { + croak("_parse_xml_chunk: chunk parsing failed\n"); + } + OUTPUT: + RETVAL + +void +_parse_sax_xml_chunk(self, svchunk, enc = &PL_sv_undef) + SV * self + SV * svchunk + SV * enc + PREINIT: + STRLEN len; + char * ptr; + const char * encoding = "UTF-8"; + HV * real_obj; + int recover = 0; + xmlChar * chunk; + int retCode = -1; + xmlNodePtr nodes = NULL; + xmlSAXHandlerPtr handler = NULL; + PREINIT_SAVED_ERROR + INIT: + if (SvPOK(enc)) { + encoding = SvPV(enc, len); + if (len <= 0) { + encoding = "UTF-8"; + } + } + ptr = SvPV(svchunk, len); + if (len <= 0) { + croak("Empty string\n"); + } + CODE: + INIT_ERROR_HANDLER; + + chunk = Sv2C(svchunk, (const xmlChar*)encoding); + + if ( chunk != NULL ) { + xmlParserCtxtPtr ctxt = xmlCreateMemoryParserCtxt((const char*)ptr, len); + if (ctxt == NULL) { + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(recover ? recover : 1); + croak("Could not create memory parser context!\n"); + } + xs_warn( "context created\n"); + real_obj = LibXML_init_parser(self,ctxt); + recover = LibXML_get_recover(real_obj); + + PmmSAXInitContext( ctxt, self, saved_error ); + handler = PSaxGetHandler(); + + retCode = xmlParseBalancedChunkMemory( NULL, + handler, + ctxt, + 0, + chunk, + &nodes ); + + xmlFree( handler ); + PmmSAXCloseContext(ctxt); + xmlFreeParserCtxt(ctxt); + + /* free the chunk we created */ + xmlFree( chunk ); + } + + LibXML_cleanup_parser(); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(recover); + + if (retCode == -1) { + croak("_parse_sax_xml_chunk: chunk parsing failed\n"); + } + +int +_processXIncludes(self, doc, options=0) + SV * self + SV * doc + int options + PREINIT: + xmlDocPtr real_doc; + HV * real_obj; + int recover = 0; + PREINIT_SAVED_ERROR + INIT: + real_doc = (xmlDocPtr) PmmSvNode(doc); + if (real_doc == NULL) { + croak("No document to process!\n"); + XSRETURN_UNDEF; + } + CODE: + RETVAL = 0; + INIT_ERROR_HANDLER; + real_obj = LibXML_init_parser(self,NULL); + recover = LibXML_get_recover(real_obj); + + RETVAL = xmlXIncludeProcessFlags(real_doc,options); + + LibXML_cleanup_parser(); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(recover); + + if ( RETVAL < 0 ) { + croak( "unknown error during XInclude processing\n" ); + XSRETURN_UNDEF; + } else if ( RETVAL == 0 ) { + RETVAL = 1; + } + OUTPUT: + RETVAL + +SV* +_start_push(self, with_sax=0) + SV * self + int with_sax + PREINIT: + HV * real_obj; + int recover = 0; + xmlParserCtxtPtr ctxt = NULL; + PREINIT_SAVED_ERROR + CODE: + RETVAL = &PL_sv_undef; + INIT_ERROR_HANDLER; + + /* create empty context */ + ctxt = xmlCreatePushParserCtxt( NULL, NULL, NULL, 0, NULL ); + real_obj = LibXML_init_parser(self,ctxt); + recover = LibXML_get_recover(real_obj); + if ( with_sax == 1 ) { + PmmSAXInitContext( ctxt, self, saved_error ); + } + + RETVAL = PmmContextSv( ctxt ); + + LibXML_cleanup_parser(); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(recover); + OUTPUT: + RETVAL + +int +_push(self, pctxt, data) + SV * self + SV * pctxt + SV * data + PREINIT: + HV * real_obj; + int recover = 0; + xmlParserCtxtPtr ctxt = NULL; + STRLEN len = 0; + char * chunk = NULL; + PREINIT_SAVED_ERROR + INIT: + ctxt = PmmSvContext( pctxt ); + if ( ctxt == NULL ) { + croak( "parser context already freed\n" ); + XSRETURN_UNDEF; + } + if ( data == &PL_sv_undef ) { + XSRETURN_UNDEF; + } + chunk = SvPV( data, len ); + if ( len <= 0 ) { + xs_warn( "empty string" ); + XSRETURN_UNDEF; + } + CODE: + RETVAL = 0; + INIT_ERROR_HANDLER; + real_obj = LibXML_init_parser(self,NULL); + recover = LibXML_get_recover(real_obj); + + xmlParseChunk(ctxt, (const char *)chunk, len, 0); + + LibXML_cleanup_parser(); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(recover); + + if ( ctxt->wellFormed == 0 ) { + croak( "XML not well-formed in xmlParseChunk\n" ); + XSRETURN_UNDEF; + } + RETVAL = 1; + OUTPUT: + RETVAL + +SV* +_end_push(self, pctxt, restore) + SV * self + SV * pctxt + int restore + PREINIT: + HV * real_obj; + int well_formed; + xmlParserCtxtPtr ctxt = NULL; + xmlDocPtr real_doc = NULL; + PREINIT_SAVED_ERROR + INIT: + ctxt = PmmSvContext( pctxt ); + if ( ctxt == NULL ) { + croak( "parser context already freed\n" ); + XSRETURN_UNDEF; + } + CODE: + RETVAL = &PL_sv_undef; + INIT_ERROR_HANDLER; + real_obj = LibXML_init_parser(self,NULL); + + xmlParseChunk(ctxt, "", 0, 1); /* finish the parse */ + xs_warn( "Finished with push parser\n" ); + + well_formed = ctxt->wellFormed; + real_doc = ctxt->myDoc; + ctxt->myDoc = NULL; + xmlFreeParserCtxt(ctxt); + PmmNODE( SvPROXYNODE( pctxt ) ) = NULL; + + if ( real_doc != NULL ) { + if ( restore || well_formed ) { + RETVAL = LibXML_NodeToSv( real_obj, INT2PTR(xmlNodePtr,real_doc) ); + } else { + xmlFreeDoc(real_doc); + real_doc = NULL; + } + } + + LibXML_cleanup_parser(); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(restore); + + if ( real_doc == NULL ){ + croak( "no document found!\n" ); + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + +void +_end_sax_push(self, pctxt) + SV * self + SV * pctxt + PREINIT: + HV * real_obj; + xmlParserCtxtPtr ctxt = NULL; + PREINIT_SAVED_ERROR + INIT: + ctxt = PmmSvContext( pctxt ); + if ( ctxt == NULL ) { + croak( "parser context already freed\n" ); + } + CODE: + INIT_ERROR_HANDLER; + real_obj = LibXML_init_parser(self,NULL); + + xmlParseChunk(ctxt, "", 0, 1); /* finish the parse */ + xs_warn( "Finished with SAX push parser\n" ); + + xmlFree(ctxt->sax); + ctxt->sax = NULL; + PmmSAXCloseContext(ctxt); + xmlFreeParserCtxt(ctxt); + PmmNODE( SvPROXYNODE( pctxt ) ) = NULL; + + LibXML_cleanup_parser(); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(0); + +SV* +import_GDOME( CLASS, sv_gdome, deep=1 ) + SV * sv_gdome + int deep + PREINIT: + xmlNodePtr node = NULL; + INIT: + RETVAL = &PL_sv_undef; +#ifndef XML_LIBXML_GDOME_SUPPORT + croak( "GDOME Support not compiled" ); +#endif + if ( sv_gdome == NULL || sv_gdome == &PL_sv_undef ) { + croak( "no XML::GDOME data found" ); + } +#ifdef XML_LIBXML_GDOME_SUPPORT + else { + GdomeNode* gnode = NULL; + gnode = (GdomeNode*)SvIV((SV*)SvRV( sv_gdome )); + if ( gnode == NULL ) { + croak( "no XML::GDOME data found (datastructure empty)" ); + } + + node = gdome_xml_n_get_xmlNode( gnode ); + if ( node == NULL ) { + croak( "no XML::LibXML node found in GDOME object" ); + } + } +#endif + CODE: + if ( node->type == XML_NAMESPACE_DECL ) { + const char * CLASS = "XML::LibXML::Namespace"; + RETVAL = NEWSV(0,0); + RETVAL = sv_setref_pv( RETVAL, + CLASS, + (void*)xmlCopyNamespace((xmlNsPtr)node) ); + } + else { + RETVAL = PmmNodeToSv( PmmCloneNode( node, deep ), NULL ); + } + OUTPUT: + RETVAL + + +SV* +export_GDOME( CLASS, sv_libxml, deep=1 ) + SV * sv_libxml + int deep + PREINIT: + xmlNodePtr node = NULL, retnode = NULL; + INIT: + RETVAL = &PL_sv_undef; +#ifndef XML_LIBXML_GDOME_SUPPORT + croak( "GDOME Support not configured!" ); +#endif + if ( sv_libxml == NULL || sv_libxml == &PL_sv_undef ) { + croak( "no XML::LibXML data found" ); + } + node = PmmSvNode( sv_libxml ); + if ( node == NULL ) { + croak( "no XML::LibXML data found (empty structure)" ); + } + CODE: + retnode = PmmCloneNode( node, deep ); + if ( retnode == NULL ) { + croak( "Copy node failed" ); + } + + RETVAL = PmmNodeToGdomeSv( retnode ); + OUTPUT: + RETVAL + + +int +load_catalog( self, filename ) + SV * filename + PREINIT: + const char * fn = (const char *) Sv2C(filename, NULL); + INIT: + if ( fn == NULL || xmlStrlen( (xmlChar *)fn ) == 0 ) { + croak( "cannot load catalog" ); + } + CODE: +#ifdef LIBXML_CATALOG_ENABLED + RETVAL = xmlLoadCatalog( fn ); +#else + XSRETURN_UNDEF; +#endif + OUTPUT: + RETVAL + + + +int +_default_catalog( self, catalog ) + SV * catalog + PREINIT: +#ifdef LIBXML_CATALOG_ENABLED + xmlCatalogPtr catal = INT2PTR(xmlCatalogPtr,SvIV(SvRV(catalog))); +#endif + INIT: + if ( catal == NULL ) { + croak( "empty catalog\n" ); + } + CODE: + warn( "this feature is not implemented" ); + RETVAL = 0; + OUTPUT: + RETVAL + +SV* +_externalEntityLoader( loader ) + SV* loader + CODE: + { + RETVAL = EXTERNAL_ENTITY_LOADER_FUNC; + if(EXTERNAL_ENTITY_LOADER_FUNC == NULL) + { + EXTERNAL_ENTITY_LOADER_FUNC = newSVsv(loader); + } + + if (LibXML_old_ext_ent_loader == NULL ) + { + LibXML_old_ext_ent_loader = xmlGetExternalEntityLoader(); + xmlSetExternalEntityLoader((xmlExternalEntityLoader)LibXML_load_external_entity); + } + } + OUTPUT: + RETVAL + +MODULE = XML::LibXML PACKAGE = XML::LibXML::HashTable + +xmlHashTablePtr +new(CLASS) + const char * CLASS + CODE: + RETVAL = xmlHashCreate(8); + OUTPUT: + RETVAL + +void +DESTROY( table ) + xmlHashTablePtr table + CODE: + xs_warn("DESTROY XMLHASHTABLE\n"); + PmmFreeHashTable(table); + +MODULE = XML::LibXML PACKAGE = XML::LibXML::ParserContext + +void +DESTROY( self ) + SV * self + CODE: + xs_warn( "DROP PARSER CONTEXT!" ); + PmmContextREFCNT_dec( SvPROXYNODE( self ) ); + + +MODULE = XML::LibXML PACKAGE = XML::LibXML::Document + +SV * +_toString(self, format=0) + xmlDocPtr self + int format + PREINIT: + xmlChar *result=NULL; + int len=0; + SV* internalFlag = NULL; + int oldTagFlag = xmlSaveNoEmptyTags; + xmlDtdPtr intSubset = NULL; + /* PREINIT_SAVED_ERROR */ + CODE: + RETVAL = &PL_sv_undef; + internalFlag = get_sv("XML::LibXML::setTagCompression", 0); + if( internalFlag ) { + xmlSaveNoEmptyTags = SvTRUE(internalFlag); + } + + internalFlag = get_sv("XML::LibXML::skipDTD", 0); + if ( internalFlag && SvTRUE(internalFlag) ) { + intSubset = xmlGetIntSubset( self ); + if ( intSubset ) + xmlUnlinkNode( INT2PTR(xmlNodePtr,intSubset) ); + } + + /* INIT_ERROR_HANDLER; */ + + if ( format <= 0 ) { + xs_warn( "use no formated toString!" ); + xmlDocDumpMemory(self, &result, &len); + } + else { + int t_indent_var = xmlIndentTreeOutput; + xs_warn( "use formated toString!" ); + xmlIndentTreeOutput = 1; + xmlDocDumpFormatMemory( self, &result, &len, format ); + xmlIndentTreeOutput = t_indent_var; + } + + if ( intSubset != NULL ) { + if (self->children == NULL) { + xmlAddChild(INT2PTR(xmlNodePtr,self), INT2PTR(xmlNodePtr,intSubset)); + } + else { + xmlAddPrevSibling(self->children, INT2PTR(xmlNodePtr,intSubset)); + } + } + + xmlSaveNoEmptyTags = oldTagFlag; + + /* REPORT_ERROR(0); */ + + if (result == NULL) { + xs_warn("Failed to convert doc to string"); + XSRETURN_UNDEF; + } else { + /* warn("%s, %d\n",result, len); */ + RETVAL = newSVpvn( (const char *)result, len ); + /* C2Sv( result, self->encoding ); */ + xmlFree(result); + } + OUTPUT: + RETVAL + +int +toFH( self, filehandler, format=0 ) + xmlDocPtr self + SV * filehandler + int format + PREINIT: + xmlOutputBufferPtr buffer; + const xmlChar * encoding = NULL; + xmlCharEncodingHandlerPtr handler = NULL; + SV* internalFlag = NULL; + int oldTagFlag = xmlSaveNoEmptyTags; + xmlDtdPtr intSubset = NULL; + int t_indent_var = xmlIndentTreeOutput; + PREINIT_SAVED_ERROR + CODE: + internalFlag = get_sv("XML::LibXML::setTagCompression", 0); + if( internalFlag ) { + xmlSaveNoEmptyTags = SvTRUE(internalFlag); + } + + internalFlag = get_sv("XML::LibXML::skipDTD", 0); + if ( internalFlag && SvTRUE(internalFlag) ) { + intSubset = xmlGetIntSubset( self ); + if ( intSubset ) + xmlUnlinkNode( INT2PTR(xmlNodePtr,intSubset) ); + } + + xmlRegisterDefaultOutputCallbacks(); + encoding = (self)->encoding; + if ( encoding != NULL ) { + if ( xmlParseCharEncoding((const char*)encoding) != XML_CHAR_ENCODING_UTF8) { + handler = xmlFindCharEncodingHandler((const char*)encoding); + } + + } + else { + xs_warn("no encoding?"); + } + + buffer = xmlOutputBufferCreateIO( (xmlOutputWriteCallback) &LibXML_output_write_handler, + (xmlOutputCloseCallback)&LibXML_output_close_handler, + filehandler, + handler ); + + if ( format <= 0 ) { + format = 0; + xmlIndentTreeOutput = 0; + } + else { + xmlIndentTreeOutput = 1; + } + + INIT_ERROR_HANDLER; + + RETVAL = xmlSaveFormatFileTo( buffer, + self, + (const char *) encoding, + format); + + if ( intSubset != NULL ) { + if (self->children == NULL) { + xmlAddChild(INT2PTR(xmlNodePtr,self), INT2PTR(xmlNodePtr,intSubset)); + } + else { + xmlAddPrevSibling(self->children, INT2PTR(xmlNodePtr,intSubset)); + } + } + + xmlIndentTreeOutput = t_indent_var; + xmlSaveNoEmptyTags = oldTagFlag; + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(0); + OUTPUT: + RETVAL + +int +toFile( self, filename, format=0 ) + xmlDocPtr self + char * filename + int format + PREINIT: + SV* internalFlag = NULL; + int oldTagFlag = xmlSaveNoEmptyTags; + PREINIT_SAVED_ERROR + CODE: + internalFlag = get_sv("XML::LibXML::setTagCompression", 0); + if( internalFlag ) { + xmlSaveNoEmptyTags = SvTRUE(internalFlag); + } + + INIT_ERROR_HANDLER; + + if ( format <= 0 ) { + xs_warn( "use no formated toFile!" ); + RETVAL = xmlSaveFile( filename, self ); + } + else { + int t_indent_var = xmlIndentTreeOutput; + xmlIndentTreeOutput = 1; + RETVAL =xmlSaveFormatFile( filename, + self, + format); + xmlIndentTreeOutput = t_indent_var; + } + + xmlSaveNoEmptyTags = oldTagFlag; + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(0); + + if ( RETVAL > 0 ) + RETVAL = 1; + else + XSRETURN_UNDEF; + OUTPUT: + RETVAL + +SV * +toStringHTML(self) + xmlDocPtr self + ALIAS: + XML::LibXML::Document::serialize_html = 1 + PREINIT: + xmlChar *result=NULL; + int len = 0; + PREINIT_SAVED_ERROR + CODE: + PERL_UNUSED_VAR(ix); + xs_warn( "use no formated toString!" ); + INIT_ERROR_HANDLER; + htmlDocDumpMemory(self, &result, &len); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(0); + + if (result == NULL) { + XSRETURN_UNDEF; + } else { + /* warn("%s, %d\n",result, len); */ + RETVAL = newSVpvn((char *)result, (STRLEN)len); + xmlFree(result); + } + OUTPUT: + RETVAL + + +const char * +URI( self ) + xmlDocPtr self + ALIAS: + XML::LibXML::Document::documentURI = 1 + CODE: + PERL_UNUSED_VAR(ix); + RETVAL = (const char*)xmlStrdup(self->URL ); + OUTPUT: + RETVAL + +void +setURI( self, new_URI ) + xmlDocPtr self + char * new_URI + CODE: + if (new_URI) { + xmlFree((xmlChar*)self->URL ); + self->URL = xmlStrdup((const xmlChar*)new_URI); + } + +SV* +createDocument( CLASS, version="1.0", encoding=NULL ) + char * version + char * encoding + ALIAS: + XML::LibXML::Document::new = 1 + PREINIT: + xmlDocPtr doc=NULL; + CODE: + PERL_UNUSED_VAR(ix); + doc = xmlNewDoc((const xmlChar*)version); + if (encoding && *encoding != 0) { + doc->encoding = (const xmlChar*)xmlStrdup((const xmlChar*)encoding); + } + RETVAL = PmmNodeToSv(INT2PTR(xmlNodePtr,doc),NULL); + OUTPUT: + RETVAL + +SV* +createInternalSubset( self, Pname, extID, sysID ) + xmlDocPtr self + SV * Pname + SV * extID + SV * sysID + PREINIT: + xmlDtdPtr dtd = NULL; + xmlChar * name = NULL; + xmlChar * externalID = NULL; + xmlChar * systemID = NULL; + CODE: + name = Sv2C( Pname, NULL ); + if ( name == NULL ) { + XSRETURN_UNDEF; + } + + externalID = Sv2C(extID, NULL); + systemID = Sv2C(sysID, NULL); + + dtd = xmlCreateIntSubset( self, name, externalID, systemID ); + xmlFree(externalID); + xmlFree(systemID); + xmlFree(name); + if ( dtd ) { + RETVAL = PmmNodeToSv( INT2PTR(xmlNodePtr,dtd), PmmPROXYNODE(self) ); + } + else { + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + +SV* +createExternalSubset( self, Pname, extID, sysID ) + xmlDocPtr self + SV * Pname + SV * extID + SV * sysID + PREINIT: + xmlDtdPtr dtd = NULL; + xmlChar * name = NULL; + xmlChar * externalID = NULL; + xmlChar * systemID = NULL; + CODE: + name = Sv2C( Pname, NULL ); + if ( name == NULL ) { + XSRETURN_UNDEF; + } + + externalID = Sv2C(extID, NULL); + systemID = Sv2C(sysID, NULL); + + dtd = xmlNewDtd( self, name, externalID, systemID ); + + xmlFree(externalID); + xmlFree(systemID); + xmlFree(name); + if ( dtd ) { + RETVAL = PmmNodeToSv( (xmlNodePtr)dtd, PmmPROXYNODE(self) ); + } + else { + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + +SV* +createDTD( self, Pname, extID, sysID ) + xmlDocPtr self + SV * Pname + SV * extID + SV * sysID + PREINIT: + xmlDtdPtr dtd = NULL; + xmlChar * name = NULL; + xmlChar * externalID = NULL; + xmlChar * systemID = NULL; + CODE: + name = Sv2C( Pname, NULL ); + if ( name == NULL ) { + XSRETURN_UNDEF; + } + + externalID = Sv2C(extID, NULL); + systemID = Sv2C(sysID, NULL); + + dtd = xmlNewDtd( NULL, name, externalID, systemID ); + dtd->doc = self; + + xmlFree(externalID); + xmlFree(systemID); + xmlFree(name); + if ( dtd ) { + RETVAL = PmmNodeToSv( (xmlNodePtr)dtd, PmmPROXYNODE(self) ); + } + else { + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + +SV* +createDocumentFragment( self ) + xmlDocPtr self + CODE: + RETVAL = PmmNodeToSv(xmlNewDocFragment(self), PmmPROXYNODE(self)); + OUTPUT: + RETVAL + +SV* +createElement( self, name ) + xmlDocPtr self + SV* name + PREINIT: + xmlNodePtr newNode; + xmlChar * elname = NULL; + ProxyNodePtr docfrag = NULL; + CODE: + elname = nodeSv2C( name , (xmlNodePtr) self); + if ( !LibXML_test_node_name( elname ) ) { + xmlFree( elname ); + croak( "bad name" ); + } + + newNode = xmlNewNode(NULL , elname); + xmlFree(elname); + if ( newNode != NULL ) { + docfrag = PmmNewFragment( self ); + newNode->doc = self; + xmlAddChild(PmmNODE(docfrag), newNode); + RETVAL = PmmNodeToSv(newNode,docfrag); + } + else { + xs_warn( "no node created!" ); + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + +SV* +createRawElement( self, name ) + xmlDocPtr self + SV* name + PREINIT: + xmlNodePtr newNode; + xmlChar * elname = NULL; + ProxyNodePtr docfrag = NULL; + CODE: + elname = nodeSv2C( name , (xmlNodePtr) self); + if ( !elname || xmlStrlen(elname) <= 0 ) { + xmlFree( elname ); + croak( "bad name" ); + } + + newNode = xmlNewDocNode(self,NULL , elname, NULL); + xmlFree(elname); + if ( newNode != NULL ) { + docfrag = PmmNewFragment( self ); + xmlAddChild(PmmNODE(docfrag), newNode); + RETVAL = PmmNodeToSv(newNode,docfrag); + } + else { + xs_warn( "no node created!" ); + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + +SV* +createElementNS( self, nsURI, name ) + xmlDocPtr self + SV * nsURI + SV * name + PREINIT: + xmlChar * ename = NULL; + xmlChar * prefix = NULL; + xmlChar * localname = NULL; + xmlChar * eURI = NULL; + xmlNsPtr ns = NULL; + ProxyNodePtr docfrag = NULL; + xmlNodePtr newNode = NULL; + CODE: + ename = nodeSv2C( name , (xmlNodePtr) self ); + if ( !LibXML_test_node_name( ename ) ) { + xmlFree( ename ); + croak( "bad name" ); + } + + eURI = Sv2C( nsURI , NULL ); + + if ( eURI != NULL && xmlStrlen(eURI)!=0 ){ + localname = xmlSplitQName2(ename, &prefix); + if ( localname == NULL ) { + localname = xmlStrdup( ename ); + } + + ns = xmlNewNs( NULL, eURI, prefix ); + newNode = xmlNewDocNode( self, ns, localname, NULL ); + newNode->nsDef = ns; + + xmlFree(localname); + } + else { + xs_warn( " ordinary element " ); + /* ordinary element */ + localname = ename; + + newNode = xmlNewDocNode( self, NULL , localname, NULL ); + } + + docfrag = PmmNewFragment( self ); + xmlAddChild(PmmNODE(docfrag), newNode); + RETVAL = PmmNodeToSv(newNode, docfrag); + + if ( prefix != NULL ) { + xmlFree(prefix); + } + if ( eURI != NULL ) { + xmlFree(eURI); + } + xmlFree(ename); + OUTPUT: + RETVAL + +SV* +createRawElementNS( self, nsURI, name ) + xmlDocPtr self + SV * nsURI + SV * name + PREINIT: + xmlChar * ename = NULL; + xmlChar * prefix = NULL; + xmlChar * localname = NULL; + xmlChar * eURI = NULL; + xmlNsPtr ns = NULL; + ProxyNodePtr docfrag = NULL; + xmlNodePtr newNode = NULL; + CODE: + ename = nodeSv2C( name , (xmlNodePtr) self ); + if ( !LibXML_test_node_name( ename ) ) { + xmlFree( ename ); + croak( "bad name" ); + } + + eURI = Sv2C( nsURI , NULL ); + + if ( eURI != NULL && xmlStrlen(eURI)!=0 ){ + localname = xmlSplitQName2(ename, &prefix); + if ( localname == NULL ) { + localname = xmlStrdup( ename ); + } + + newNode = xmlNewDocNode( self,NULL , localname, NULL ); + + ns = xmlSearchNsByHref( self, newNode, eURI ); + if ( ns == NULL ) { + /* create a new NS if the NS does not already exists */ + ns = xmlNewNs(newNode, eURI , prefix ); + } + + if ( ns == NULL ) { + xmlFreeNode( newNode ); + xmlFree(eURI); + xmlFree(localname); + if ( prefix != NULL ) { + xmlFree(prefix); + } + xmlFree(ename); + XSRETURN_UNDEF; + } + + xmlFree(localname); + } + else { + xs_warn( " ordinary element " ); + /* ordinary element */ + localname = ename; + + newNode = xmlNewDocNode( self, NULL , localname, NULL ); + } + + xmlSetNs(newNode, ns); + docfrag = PmmNewFragment( self ); + xmlAddChild(PmmNODE(docfrag), newNode); + RETVAL = PmmNodeToSv(newNode, docfrag); + + if ( prefix != NULL ) { + xmlFree(prefix); + } + if ( eURI != NULL ) { + xmlFree(eURI); + } + xmlFree(ename); + OUTPUT: + RETVAL + +SV * +createTextNode( self, content ) + xmlDocPtr self + SV * content + PREINIT: + xmlNodePtr newNode; + xmlChar * elname = NULL; + ProxyNodePtr docfrag = NULL; + CODE: + elname = nodeSv2C( content , (xmlNodePtr) self ); + if ( elname != NULL || xmlStrlen(elname) > 0 ) { + newNode = xmlNewDocText( self, elname ); + xmlFree(elname); + if ( newNode != NULL ) { + docfrag = PmmNewFragment( self ); + newNode->doc = self; + xmlAddChild(PmmNODE(docfrag), newNode); + RETVAL = PmmNodeToSv(newNode,docfrag); + } + else { + xs_warn( "no node created!" ); + XSRETURN_UNDEF; + } + } + else { + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + +SV * +createComment( self , content ) + xmlDocPtr self + SV * content + PREINIT: + xmlNodePtr newNode; + xmlChar * elname = NULL; + ProxyNodePtr docfrag = NULL; + CODE: + elname = nodeSv2C( content , (xmlNodePtr) self ); + if ( elname != NULL || xmlStrlen(elname) > 0 ) { + newNode = xmlNewDocComment( self, elname ); + xmlFree(elname); + if ( newNode != NULL ) { + docfrag = PmmNewFragment( self ); + newNode->doc = self; + xmlAddChild(PmmNODE(docfrag), newNode); + xs_warn( newNode->name ); + RETVAL = PmmNodeToSv(newNode,docfrag); + } + else { + xs_warn( "no node created!" ); + XSRETURN_UNDEF; + } + } + else { + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + +SV * +createCDATASection( self, content ) + xmlDocPtr self + SV * content + PREINIT: + xmlNodePtr newNode; + xmlChar * elname = NULL; + ProxyNodePtr docfrag = NULL; + CODE: + elname = nodeSv2C( content , (xmlNodePtr)self ); + if ( elname != NULL || xmlStrlen(elname) > 0 ) { + newNode = xmlNewCDataBlock( self, elname, xmlStrlen(elname) ); + xmlFree(elname); + if ( newNode != NULL ) { + docfrag = PmmNewFragment( self ); + newNode->doc = self; + xmlAddChild(PmmNODE(docfrag), newNode); + xs_warn( "[CDATA section]" ); + RETVAL = PmmNodeToSv(newNode,docfrag); + } + else { + xs_warn( "no node created!" ); + XSRETURN_UNDEF; + } + } + else { + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + +SV* +createEntityReference( self , pname ) + xmlDocPtr self + SV * pname + PREINIT: + xmlNodePtr newNode; + xmlChar * name = Sv2C( pname, NULL ); + ProxyNodePtr docfrag = NULL; + CODE: + if ( name == NULL ) { + XSRETURN_UNDEF; + } + newNode = xmlNewReference( self, name ); + xmlFree(name); + if ( newNode == NULL ) { + XSRETURN_UNDEF; + } + docfrag = PmmNewFragment( self ); + xmlAddChild(PmmNODE(docfrag), newNode); + RETVAL = PmmNodeToSv( newNode, docfrag ); + OUTPUT: + RETVAL + +SV* +createAttribute( self, pname, pvalue=&PL_sv_undef ) + xmlDocPtr self + SV * pname + SV * pvalue + PREINIT: + xmlChar * name = NULL; + xmlChar * value = NULL; + xmlAttrPtr newAttr = NULL; + xmlChar * buffer = NULL; + CODE: + name = nodeSv2C( pname , (xmlNodePtr) self ); + if ( !LibXML_test_node_name( name ) ) { + xmlFree(name); + XSRETURN_UNDEF; + } + + value = nodeSv2C( pvalue , (xmlNodePtr) self ); + /* unlike xmlSetProp, xmlNewDocProp does not encode entities in value */ + buffer = xmlEncodeEntitiesReentrant(self, value); + newAttr = xmlNewDocProp( self, name, buffer ); + RETVAL = PmmNodeToSv((xmlNodePtr)newAttr, PmmPROXYNODE(self)); + + xmlFree(name); + xmlFree(buffer); + if ( value ) { + xmlFree(value); + } + OUTPUT: + RETVAL + +SV* +createAttributeNS( self, URI, pname, pvalue=&PL_sv_undef ) + xmlDocPtr self + SV * URI + SV * pname + SV * pvalue + PREINIT: + xmlChar * name = NULL; + xmlChar * value = NULL; + xmlChar * prefix = NULL; + const xmlChar * pchar = NULL; + xmlChar * localname = NULL; + xmlChar * nsURI = NULL; + xmlAttrPtr newAttr = NULL; + xmlNsPtr ns = NULL; + CODE: + name = nodeSv2C( pname , (xmlNodePtr) self ); + if ( !LibXML_test_node_name( name ) ) { + xmlFree(name); + XSRETURN_UNDEF; + } + + nsURI = Sv2C( URI , NULL ); + value = nodeSv2C( pvalue, (xmlNodePtr) self ); + + if ( nsURI != NULL && xmlStrlen(nsURI) > 0 ) { + xmlNodePtr root = xmlDocGetRootElement(self ); + if ( root ) { + pchar = xmlStrchr(name, ':'); + if ( pchar != NULL ) { + localname = xmlSplitQName2(name, &prefix); + } + else { + localname = xmlStrdup( name ); + } + ns = xmlSearchNsByHref( self, root, nsURI ); + if ( ns == NULL ) { + /* create a new NS if the NS does not already exists */ + ns = xmlNewNs(root, nsURI , prefix ); + } + + if ( ns == NULL ) { + xmlFree(nsURI); + xmlFree(localname); + if ( prefix ) { + xmlFree(prefix); + } + xmlFree(name); + if ( value ) { + xmlFree(value); + } + XSRETURN_UNDEF; + } + + newAttr = xmlNewDocProp( self, localname, value ); + xmlSetNs((xmlNodePtr)newAttr, ns); + + RETVAL = PmmNodeToSv((xmlNodePtr)newAttr, PmmPROXYNODE(self) ); + + xmlFree(nsURI); + xmlFree(name); + if ( prefix ) { + xmlFree(prefix); + } + xmlFree(localname); + if ( value ) { + xmlFree(value); + } + } + else { + croak( "can't create a new namespace on an attribute!" ); + xmlFree(name); + if ( value ) { + xmlFree(value); + } + XSRETURN_UNDEF; + } + } + else { + xmlChar *buffer; + /* unlike xmlSetProp, xmlNewDocProp does not encode entities in value */ + buffer = xmlEncodeEntitiesReentrant(self, value); + newAttr = xmlNewDocProp( self, name, buffer ); + RETVAL = PmmNodeToSv((xmlNodePtr)newAttr,PmmPROXYNODE(self)); + xmlFree(name); + xmlFree(buffer); + if ( value ) { + xmlFree(value); + } + } + OUTPUT: + RETVAL + +SV* +createProcessingInstruction(self, name, value=&PL_sv_undef) + xmlDocPtr self + SV * name + SV * value + ALIAS: + createPI = 1 + PREINIT: + xmlChar * n = NULL; + xmlChar * v = NULL; + xmlNodePtr newNode = NULL; + ProxyNodePtr docfrag = NULL; + CODE: + PERL_UNUSED_VAR(ix); + n = nodeSv2C(name, (xmlNodePtr)self); + if ( !n ) { + XSRETURN_UNDEF; + } + v = nodeSv2C(value, (xmlNodePtr)self); + newNode = xmlNewPI(n,v); + xmlFree(v); + xmlFree(n); + if ( newNode != NULL ) { + docfrag = PmmNewFragment( self ); + newNode->doc = self; + xmlAddChild(PmmNODE(docfrag), newNode); + RETVAL = PmmNodeToSv(newNode,docfrag); + } else { + xs_warn( "no node created!" ); + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + +void +_setDocumentElement( self , proxy ) + xmlDocPtr self + SV * proxy + PREINIT: + xmlNodePtr elem, oelem; + INIT: + elem = PmmSvNode(proxy); + if ( elem == NULL ) { + XSRETURN_UNDEF; + } + CODE: + /* please correct me if i am wrong: the document element HAS to be + * an ELEMENT NODE + */ + if ( elem->type == XML_ELEMENT_NODE ) { + if ( self != elem->doc ) { + domImportNode( self, elem, 1, 1 ); + } + + oelem = xmlDocGetRootElement( self ); + if ( oelem == NULL || oelem->_private == NULL ) { + xmlDocSetRootElement( self, elem ); + } + else { + ProxyNodePtr docfrag = PmmNewFragment( self ); + xmlReplaceNode( oelem, elem ); + xmlAddChild( PmmNODE(docfrag), oelem ); + PmmFixOwner( ((ProxyNodePtr)oelem->_private), docfrag); + } + + if ( elem->_private != NULL ) { + PmmFixOwner( SvPROXYNODE(proxy), PmmPROXYNODE(self)); + } + } else { + croak("setDocumentElement: ELEMENT node required"); + } + +SV * +documentElement( self ) + xmlDocPtr self + ALIAS: + XML::LibXML::Document::getDocumentElement = 1 + PREINIT: + xmlNodePtr elem; + CODE: + PERL_UNUSED_VAR(ix); + elem = xmlDocGetRootElement( self ); + if ( elem ) { + RETVAL = PmmNodeToSv(elem, PmmPROXYNODE(self)); + } + else { + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + +SV * +externalSubset( self ) + xmlDocPtr self + PREINIT: + xmlDtdPtr dtd; + CODE: + if ( self->extSubset == NULL ) { + XSRETURN_UNDEF; + } + + dtd = self->extSubset; + RETVAL = PmmNodeToSv((xmlNodePtr)dtd, PmmPROXYNODE(self)); + OUTPUT: + RETVAL + +SV * +internalSubset( self ) + xmlDocPtr self + PREINIT: + xmlDtdPtr dtd; + CODE: + if ( self->intSubset == NULL ) { + XSRETURN_UNDEF; + } + + dtd = self->intSubset; +RETVAL = PmmNodeToSv(INT2PTR(xmlNodePtr,dtd), PmmPROXYNODE(self)); + OUTPUT: + RETVAL + +void +setExternalSubset( self, extdtd ) + xmlDocPtr self + SV * extdtd + PREINIT: + xmlDtdPtr dtd = NULL; + xmlDtdPtr olddtd = NULL; + INIT: + dtd = (xmlDtdPtr)PmmSvNode(extdtd); + if ( dtd == NULL ) { + croak( "lost DTD node" ); + } + CODE: + if ( dtd && dtd != self->extSubset ) { + if ( dtd->doc == NULL ) { + xmlSetTreeDoc( (xmlNodePtr) dtd, self ); + } else if ( dtd->doc != self ) { + domImportNode( self, (xmlNodePtr) dtd,1,1); + } + + if ( dtd == self->intSubset ) { + xmlUnlinkNode( (xmlNodePtr)dtd ); + self->intSubset = NULL; + } + + olddtd = self->extSubset; + if ( olddtd && olddtd->_private == NULL ) { + xmlFreeDtd( olddtd ); + } + self->extSubset = dtd; + } + +void +setInternalSubset( self, extdtd ) + xmlDocPtr self + SV * extdtd + PREINIT: + xmlDtdPtr dtd = NULL; + xmlDtdPtr olddtd = NULL; + INIT: + dtd = (xmlDtdPtr)PmmSvNode(extdtd); + if ( dtd == NULL ) { + croak( "lost DTD node" ); + } + CODE: + if ( dtd && dtd != self->intSubset ) { + if ( dtd->doc != self ) { + croak( "can't import DTDs" ); + domImportNode( self, (xmlNodePtr) dtd,1,1); + } + + if ( dtd == self->extSubset ) { + self->extSubset = NULL; + } + + olddtd = xmlGetIntSubset( self ); + if( olddtd ) { + xmlReplaceNode( (xmlNodePtr)olddtd, (xmlNodePtr) dtd ); + if ( olddtd->_private == NULL ) { + xmlFreeDtd( olddtd ); + } + } + else { + if (self->children == NULL) + xmlAddChild((xmlNodePtr) self, (xmlNodePtr) dtd); + else + xmlAddPrevSibling(self->children, (xmlNodePtr) dtd); + } + self->intSubset = dtd; + } + +SV * +removeInternalSubset( self ) + xmlDocPtr self + PREINIT: + xmlDtdPtr dtd = NULL; + CODE: + dtd = xmlGetIntSubset(self); + if ( !dtd ) { + XSRETURN_UNDEF; + } + xmlUnlinkNode( (xmlNodePtr)dtd ); + self->intSubset = NULL; + RETVAL = PmmNodeToSv( (xmlNodePtr)dtd, PmmPROXYNODE(self) ); + OUTPUT: + RETVAL + +SV * +removeExternalSubset( self ) + xmlDocPtr self + PREINIT: + xmlDtdPtr dtd = NULL; + CODE: + dtd = self->extSubset; + if ( !dtd ) { + XSRETURN_UNDEF; + } + self->extSubset = NULL; + RETVAL = PmmNodeToSv( (xmlNodePtr)dtd, PmmPROXYNODE(self) ); + OUTPUT: + RETVAL + +SV * +importNode( self, node, dummy=0 ) + xmlDocPtr self + xmlNodePtr node + int dummy + PREINIT: + xmlNodePtr ret = NULL; + ProxyNodePtr docfrag = NULL; + CODE: + if ( node->type == XML_DOCUMENT_NODE + || node->type == XML_HTML_DOCUMENT_NODE ) { + croak( "Can't import Documents!" ); + XSRETURN_UNDEF; + } + if (node->type == XML_DTD_NODE) { + croak("Can't import DTD nodes"); + } + + ret = domImportNode( self, node, 0, 1 ); + if ( ret ) { + docfrag = PmmNewFragment( self ); + xmlAddChild( PmmNODE(docfrag), ret ); + RETVAL = PmmNodeToSv( ret, docfrag); + } + else { + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + +SV * +adoptNode( self, node ) + xmlDocPtr self + xmlNodePtr node + PREINIT: + xmlNodePtr ret = NULL; + ProxyNodePtr docfrag = NULL; + CODE: + if ( node->type == XML_DOCUMENT_NODE + || node->type == XML_HTML_DOCUMENT_NODE ) { + croak( "Can't adopt Documents!" ); + XSRETURN_UNDEF; + } + if (node->type == XML_DTD_NODE) { + croak("Can't adopt DTD nodes"); + } + + ret = domImportNode( self, node, 1, 1 ); + + if ( ret ) { + docfrag = PmmNewFragment( self ); + RETVAL = PmmNodeToSv(node, docfrag); + xmlAddChild( PmmNODE(docfrag), ret ); + PmmFixOwner(SvPROXYNODE(RETVAL), docfrag); + } + else { + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + +char* +encoding( self ) + xmlDocPtr self + ALIAS: + XML::LibXML::Document::getEncoding = 1 + XML::LibXML::Document::xmlEncoding = 2 + CODE: + PERL_UNUSED_VAR(ix); + RETVAL = (char *) self->encoding; + OUTPUT: + RETVAL + +void +setEncoding( self, encoding = NULL ) + xmlDocPtr self + char *encoding + PREINIT: + int charset = XML_CHAR_ENCODING_ERROR; + CODE: + if ( self->encoding != NULL ) { + xmlFree( (xmlChar*) self->encoding ); + } + if (encoding!=NULL && strlen(encoding)) { + self->encoding = xmlStrdup( (const xmlChar *)encoding ); + charset = (int)xmlParseCharEncoding( (const char*)self->encoding ); + if ( charset <= 0 ) { + charset = XML_CHAR_ENCODING_ERROR; + } + } else { + self->encoding=NULL; + charset = XML_CHAR_ENCODING_UTF8; + } + SetPmmNodeEncoding(self, charset); + + +int +standalone( self ) + xmlDocPtr self + ALIAS: + XML::LibXML::Document::xmlStandalone = 1 + CODE: + PERL_UNUSED_VAR(ix); + RETVAL = self->standalone; + OUTPUT: + RETVAL + +void +setStandalone( self, value = 0 ) + xmlDocPtr self + int value + CODE: + if ( value > 0 ) { + self->standalone = 1; + } + else if ( value < 0 ) { + self->standalone = -1; + } + else { + self->standalone = 0; + } + +char* +version( self ) + xmlDocPtr self + ALIAS: + XML::LibXML::Document::getVersion = 1 + XML::LibXML::Document::xmlVersion = 2 + CODE: + PERL_UNUSED_VAR(ix); + RETVAL = (char *) self->version; + OUTPUT: + RETVAL + +void +setVersion( self, version ) + xmlDocPtr self + char *version + CODE: + if ( self->version != NULL ) { + xmlFree( (xmlChar*) self->version ); + } + self->version = xmlStrdup( (const xmlChar*)version ); + +int +compression( self ) + xmlDocPtr self + CODE: + RETVAL = xmlGetDocCompressMode(self); + OUTPUT: + RETVAL + +void +setCompression( self, zLevel ) + xmlDocPtr self + int zLevel + CODE: + xmlSetDocCompressMode(self, zLevel); + + +int +is_valid(self, ...) + xmlDocPtr self + PREINIT: + xmlValidCtxt cvp; + xmlDtdPtr dtd = NULL; + SV * dtd_sv; + PREINIT_SAVED_ERROR + CODE: + INIT_ERROR_HANDLER; + + cvp.userData = saved_error; + cvp.error = (xmlValidityErrorFunc)LibXML_validity_error_ctx; + cvp.warning = (xmlValidityWarningFunc)LibXML_validity_warning_ctx; + + /* we need to initialize the node stack, because perl might + * already have messed it up. + */ + cvp.nodeNr = 0; + cvp.nodeTab = NULL; + cvp.vstateNr = 0; + cvp.vstateTab = NULL; + + PmmClearPSVI(self); + PmmInvalidatePSVI(self); + if (items > 1) { + dtd_sv = ST(1); + if ( sv_isobject(dtd_sv) && (SvTYPE(SvRV(dtd_sv)) == SVt_PVMG) ) { + dtd = (xmlDtdPtr)PmmSvNode(dtd_sv); + } + RETVAL = xmlValidateDtd(&cvp, self, dtd); + } + else { + RETVAL = xmlValidateDocument(&cvp, self); + } + CLEANUP_ERROR_HANDLER; + /* REPORT_ERROR(1); */ + OUTPUT: + RETVAL + +int +validate(self, ...) + xmlDocPtr self + PREINIT: + xmlValidCtxt cvp; + xmlDtdPtr dtd; + SV * dtd_sv; + PREINIT_SAVED_ERROR + CODE: + INIT_ERROR_HANDLER; + + cvp.userData = saved_error; + cvp.error = (xmlValidityErrorFunc)LibXML_validity_error_ctx; + cvp.warning = (xmlValidityWarningFunc)LibXML_validity_warning_ctx; + /* we need to initialize the node stack, because perl might + * already have messed it up. + */ + cvp.nodeNr = 0; + cvp.nodeTab = NULL; + cvp.vstateNr = 0; + cvp.vstateTab = NULL; + + PmmClearPSVI(self); + PmmInvalidatePSVI(self); + + if (items > 1) { + dtd_sv = ST(1); + if ( sv_isobject(dtd_sv) && (SvTYPE(SvRV(dtd_sv)) == SVt_PVMG) ) { + dtd = (xmlDtdPtr)PmmSvNode(dtd_sv); + } + else { + CLEANUP_ERROR_HANDLER; + croak("is_valid: argument must be a DTD object"); + } + RETVAL = xmlValidateDtd(&cvp, self , dtd); + } + else { + RETVAL = xmlValidateDocument(&cvp, self); + } + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(RETVAL ? 1 : 0); + OUTPUT: + RETVAL + +SV* +cloneNode( self, deep=0 ) + xmlDocPtr self + int deep + PREINIT: + xmlDocPtr ret = NULL; + CODE: + ret = xmlCopyDoc( self, deep ); + if ( ret == NULL ) { + XSRETURN_UNDEF; + } + RETVAL = PmmNodeToSv((xmlNodePtr)ret, NULL); + OUTPUT: + RETVAL + +SV* +getElementById( self, id ) + xmlDocPtr self + const char * id + ALIAS: + XML::LibXML::Document::getElementsById = 1 + PREINIT: + xmlNodePtr elem; + xmlAttrPtr attr; + CODE: + PERL_UNUSED_VAR(ix); + if ( id != NULL ) { + attr = xmlGetID(self, (xmlChar *) id); + if (attr == NULL) + elem = NULL; + else if (attr->type == XML_ATTRIBUTE_NODE) + elem = attr->parent; + else if (attr->type == XML_ELEMENT_NODE) + elem = (xmlNodePtr) attr; + else + elem = NULL; + if (elem != NULL) { + RETVAL = PmmNodeToSv(elem, PmmPROXYNODE(self)); + } + else { + XSRETURN_UNDEF; + } + } + else { + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + +int +indexElements ( self ) + xmlDocPtr self + CODE: +#if LIBXML_VERSION >= 20508 + RETVAL = xmlXPathOrderDocElems( self ); +#else + RETVAL = -2; +#endif + OUTPUT: + RETVAL + +MODULE = XML::LibXML PACKAGE = XML::LibXML::Node + +void +DESTROY( node ) + SV * node + PREINIT: + int count; + SV *is_shared; + CODE: +#ifdef XML_LIBXML_THREADS + if ( (is_shared = get_sv("XML::LibXML::__threads_shared", 0)) == NULL ) { + is_shared = &PL_sv_undef; + } + if ( SvTRUE(is_shared) ) { + dSP; + ENTER; + SAVETMPS; + PUSHMARK(SP); + XPUSHs(node); + PUTBACK; + count = call_pv("threads::shared::is_shared", G_SCALAR); + SPAGAIN; + if (count != 1) + croak("Couldn't checks if the variable is shared or not\n"); + is_shared = POPs; + PUTBACK; + FREETMPS; + LEAVE; + if (is_shared != &PL_sv_undef) { + XSRETURN_UNDEF; + } + } + if( PmmUSEREGISTRY ) { + SvLOCK(PROXY_NODE_REGISTRY_MUTEX); + PmmRegistryREFCNT_dec(SvPROXYNODE(node)); + } +#endif + PmmREFCNT_dec(SvPROXYNODE(node)); +#ifdef XML_LIBXML_THREADS + if( PmmUSEREGISTRY ) + SvUNLOCK(PROXY_NODE_REGISTRY_MUTEX); +#endif + +SV* +nodeName( self ) + xmlNodePtr self + ALIAS: + XML::LibXML::Node::getName = 1 + XML::LibXML::Element::tagName = 2 + PREINIT: + xmlChar * name = NULL; + CODE: + PERL_UNUSED_VAR(ix); + name = (xmlChar*)domName( self ); + if ( name != NULL ) { + RETVAL = C2Sv(name,NULL); + xmlFree( name ); + } + else { + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + +SV* +localname( self ) + xmlNodePtr self + ALIAS: + XML::LibXML::Node::getLocalName = 1 + XML::LibXML::Attr::name = 2 + XML::LibXML::Node::localName = 3 + CODE: + PERL_UNUSED_VAR(ix); + if ( self->type == XML_ELEMENT_NODE + || self->type == XML_ATTRIBUTE_NODE + || self->type == XML_ELEMENT_DECL + || self->type == XML_ATTRIBUTE_DECL ) { + RETVAL = C2Sv(self->name,NULL); + } + else { + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + +SV* +prefix( self ) + xmlNodePtr self + ALIAS: + XML::LibXML::Node::getPrefix = 1 + CODE: + PERL_UNUSED_VAR(ix); + if( ( self->type == XML_ELEMENT_NODE + || self->type == XML_ATTRIBUTE_NODE + || self->type == XML_PI_NODE ) + && self->ns != NULL + && self->ns->prefix != NULL ) { + RETVAL = C2Sv(self->ns->prefix, NULL); + } + else { + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + +SV* +namespaceURI( self ) + xmlNodePtr self + ALIAS: + getNamespaceURI = 1 + PREINIT: + xmlChar * nsURI; + CODE: + PERL_UNUSED_VAR(ix); + if ( ( self->type == XML_ELEMENT_NODE + || self->type == XML_ATTRIBUTE_NODE + || self->type == XML_PI_NODE ) + && self->ns != NULL + && self->ns->href != NULL ) { + nsURI = xmlStrdup(self->ns->href); + RETVAL = C2Sv( nsURI, NULL ); + xmlFree( nsURI ); + } + else { + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + + +SV* +lookupNamespaceURI( self, svprefix=&PL_sv_undef ) + xmlNodePtr self + SV * svprefix + PREINIT: + xmlChar * nsURI; + xmlChar * prefix = NULL; + xmlNsPtr ns; + CODE: + prefix = nodeSv2C( svprefix , self ); + if ( prefix != NULL && xmlStrlen(prefix) == 0) { + xmlFree( prefix ); + prefix = NULL; + } + ns = xmlSearchNs( self->doc, self, prefix ); + if ( prefix != NULL) { + xmlFree( prefix ); + } + if ( ns != NULL ) { + nsURI = xmlStrdup(ns->href); + RETVAL = C2Sv( nsURI, NULL ); + xmlFree( nsURI ); + } + else { + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + +SV* +lookupNamespacePrefix( self, svuri ) + xmlNodePtr self + SV * svuri + PREINIT: + xmlChar * nsprefix; + xmlChar * href = NULL; + CODE: + href = nodeSv2C( svuri , self ); + if ( href != NULL && xmlStrlen(href) > 0) { + xmlNsPtr ns = xmlSearchNsByHref( self->doc, self, href ); + xmlFree( href ); + if ( ns != NULL ) { + if ( ns->prefix != NULL ) { + nsprefix = xmlStrdup( ns->prefix ); + RETVAL = C2Sv( nsprefix, NULL ); + xmlFree(nsprefix); + } else { + RETVAL = newSVpv("",0); + } + } + else { + XSRETURN_UNDEF; + } + } + else { + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + + +void +setNodeName( self , value ) + xmlNodePtr self + SV* value + ALIAS: + setName = 1 + PREINIT: + xmlChar* string; + xmlChar* localname; + xmlChar* prefix; + CODE: + PERL_UNUSED_VAR(ix); + string = nodeSv2C( value , self ); + if ( !LibXML_test_node_name( string ) ) { + xmlFree(string); + croak( "bad name" ); + } + if( ( self->type == XML_ELEMENT_NODE + || self->type == XML_ATTRIBUTE_NODE + || self->type == XML_PI_NODE) + && self->ns ){ + localname = xmlSplitQName2(string, &prefix); + if ( localname == NULL ) { + localname = xmlStrdup( string ); + } + xmlNodeSetName(self, localname ); + xmlFree(localname); + xmlFree(prefix); + } + else { + xs_warn("node name normal\n"); + xmlNodeSetName(self, string ); + } + xmlFree(string); + +void +setRawName( self, value ) + xmlNodePtr self + SV * value + PREINIT: + xmlChar* string; + xmlChar* localname; + xmlChar* prefix; + CODE: + string = nodeSv2C( value , self ); + if ( !string || xmlStrlen( string) <= 0 ) { + xmlFree(string); + XSRETURN_UNDEF; + } + if( ( self->type == XML_ELEMENT_NODE + || self->type == XML_ATTRIBUTE_NODE + || self->type == XML_PI_NODE) + && self->ns ){ + localname = xmlSplitQName2(string, &prefix); + xmlNodeSetName(self, localname ); + xmlFree(localname); + xmlFree(prefix); + } + else { + xmlNodeSetName(self, string ); + } + xmlFree(string); + + +SV* +nodeValue( self, useDomEncoding = &PL_sv_undef ) + xmlNodePtr self + SV * useDomEncoding + ALIAS: + XML::LibXML::Attr::value = 1 + XML::LibXML::Attr::getValue = 2 + XML::LibXML::Text::data = 3 + XML::LibXML::Node::getValue = 4 + XML::LibXML::Node::getData = 5 + PREINIT: + xmlChar * content = NULL; + CODE: + PERL_UNUSED_VAR(ix); + content = domGetNodeValue( self ); + + if ( content != NULL ) { + if ( SvTRUE(useDomEncoding) ) { + RETVAL = nodeC2Sv(content, self); + } + else { + RETVAL = C2Sv(content, NULL); + } + xmlFree(content); + } + else { + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + +int +nodeType( self ) + xmlNodePtr self + ALIAS: + XML::LibXML::Node::getType = 1 + CODE: + PERL_UNUSED_VAR(ix); + RETVAL = self->type; + OUTPUT: + RETVAL + +SV* +parentNode( self ) + xmlNodePtr self + ALIAS: + XML::LibXML::Attr::ownerElement = 1 + XML::LibXML::Node::getParentNode = 2 + XML::LibXML::Attr::getOwnerElement = 3 + CODE: + PERL_UNUSED_VAR(ix); + RETVAL = PmmNodeToSv( self->parent, + PmmOWNERPO( PmmPROXYNODE(self) ) ); + OUTPUT: + RETVAL + +SV* +nextSibling( self ) + xmlNodePtr self + ALIAS: + getNextSibling = 1 + CODE: + PERL_UNUSED_VAR(ix); + RETVAL = PmmNodeToSv( self->next, + PmmOWNERPO(PmmPROXYNODE(self)) ); + OUTPUT: + RETVAL + +SV* +nextNonBlankSibling( self ) + xmlNodePtr self + PREINIT: + xmlNodePtr next; + CODE: + next = self->next; + while (next != NULL && xmlIsBlankNode(next)) + next = next->next; + RETVAL = PmmNodeToSv( next, + PmmOWNERPO(PmmPROXYNODE(self)) ); + OUTPUT: + RETVAL + + +SV* +previousSibling( self ) + xmlNodePtr self + ALIAS: + getPreviousSibling = 1 + CODE: + PERL_UNUSED_VAR(ix); + RETVAL = PmmNodeToSv( self->prev, + PmmOWNERPO( PmmPROXYNODE(self) ) ); + OUTPUT: + RETVAL + +SV* +previousNonBlankSibling( self ) + xmlNodePtr self + PREINIT: + xmlNodePtr prev; + CODE: + prev = self->prev; + while (prev != NULL && xmlIsBlankNode(prev)) + prev = prev->prev; + RETVAL = PmmNodeToSv( prev, + PmmOWNERPO(PmmPROXYNODE(self)) ); + OUTPUT: + RETVAL + + +void +_childNodes( self, only_nonblank = 0 ) + xmlNodePtr self + int only_nonblank + ALIAS: + XML::LibXML::Node::getChildnodes = 1 + PREINIT: + xmlNodePtr cld; + SV * element; + int len = 0; + int wantarray = GIMME_V; + PPCODE: + PERL_UNUSED_VAR(ix); + if ( self->type != XML_ATTRIBUTE_NODE ) { + cld = self->children; + xs_warn("childnodes start"); + while ( cld ) { + if ( !(only_nonblank && xmlIsBlankNode(cld)) ) { + if( wantarray != G_SCALAR ) { + element = PmmNodeToSv(cld, PmmOWNERPO(PmmPROXYNODE(self)) ); + XPUSHs(sv_2mortal(element)); + } + len++; + } + cld = cld->next; + } + } + if ( wantarray == G_SCALAR ) { + XPUSHs(sv_2mortal(newSViv(len)) ); + } + +void +_getChildrenByTagNameNS( self, namespaceURI, node_name ) + xmlNodePtr self + SV * namespaceURI + SV * node_name + PREINIT: + xmlChar * name; + xmlChar * nsURI; + xmlNodePtr cld; + SV * element; + int len = 0; + int name_wildcard = 0; + int ns_wildcard = 0; + int wantarray = GIMME_V; + PPCODE: + name = nodeSv2C(node_name, self ); + nsURI = nodeSv2C(namespaceURI, self ); + + if ( nsURI != NULL ) { + if (xmlStrlen(nsURI) == 0 ) { + xmlFree(nsURI); + nsURI = NULL; + } else if (xmlStrcmp( nsURI, (xmlChar *)"*" )==0) { + ns_wildcard = 1; + } + } + if ( name !=NULL && xmlStrcmp( name, (xmlChar *)"*" ) == 0) { + name_wildcard = 1; + } + if ( self->type != XML_ATTRIBUTE_NODE ) { + cld = self->children; + xs_warn("childnodes start"); + while ( cld ) { + if (((name_wildcard && (cld->type == XML_ELEMENT_NODE)) || + xmlStrcmp( name, cld->name ) == 0) + && (ns_wildcard || + (cld->ns != NULL && + xmlStrcmp(nsURI,cld->ns->href) == 0 ) || + (cld->ns == NULL && nsURI == NULL))) { + if( wantarray != G_SCALAR ) { + element = PmmNodeToSv(cld, PmmOWNERPO(PmmPROXYNODE(self)) ); + XPUSHs(sv_2mortal(element)); + } + len++; + } + cld = cld->next; + } + } + if ( wantarray == G_SCALAR ) { + XPUSHs(sv_2mortal(newSViv(len)) ); + } + xmlFree(name); + if (nsURI) xmlFree(nsURI); + +SV* +firstChild( self ) + xmlNodePtr self + ALIAS: + getFirstChild = 1 + CODE: + PERL_UNUSED_VAR(ix); + RETVAL = PmmNodeToSv( self->children, + PmmOWNERPO( PmmPROXYNODE(self) ) ); + OUTPUT: + RETVAL + +SV* +firstNonBlankChild( self ) + xmlNodePtr self + PREINIT: + xmlNodePtr child; + CODE: + child = self->children; + while (child !=NULL && xmlIsBlankNode(child)) + child = child->next; + RETVAL = PmmNodeToSv( child, + PmmOWNERPO( PmmPROXYNODE(self) ) ); + OUTPUT: + RETVAL + +SV* +lastChild( self ) + xmlNodePtr self + ALIAS: + getLastChild = 1 + CODE: + PERL_UNUSED_VAR(ix); + RETVAL = PmmNodeToSv( self->last, + PmmOWNERPO( PmmPROXYNODE(self) ) ); + OUTPUT: + RETVAL + +void +_attributes( self ) + xmlNodePtr self + ALIAS: + XML::LibXML::Node::getAttributes = 1 + PREINIT: + xmlAttrPtr attr = NULL; + xmlNsPtr ns = NULL; + SV * element; + int len=0; + int wantarray = GIMME_V; + PPCODE: + PERL_UNUSED_VAR(ix); + if ( self->type != XML_ATTRIBUTE_NODE + && self->type != XML_DTD_NODE ) { + attr = self->properties; + while ( attr != NULL ) { + if ( wantarray != G_SCALAR ) { + element = PmmNodeToSv((xmlNodePtr)attr, + PmmOWNERPO(PmmPROXYNODE(self)) ); + XPUSHs(sv_2mortal(element)); + } + attr = attr->next; + len++; + } + if (self->type == XML_ELEMENT_NODE) { + ns = self->nsDef; + while ( ns != NULL ) { + const char * CLASS = "XML::LibXML::Namespace"; + if ( wantarray != G_SCALAR ) { + /* namespace handling is kinda odd: + * as soon we have a namespace isolated from its + * owner, we loose the context. therefore it is + * forbidden to access the NS information directly. + * instead the use will receive a copy of the real + * namespace, that can be destroied and is not + * bound to a document. + * + * this avoids segfaults in the end. + */ + if ((ns->prefix != NULL || ns->href != NULL)) { + xmlNsPtr tns = xmlCopyNamespace(ns); + if ( tns != NULL ) { + element = sv_newmortal(); + XPUSHs(sv_setref_pv( element, + (char *)CLASS, + (void*)tns)); + } + } + } + ns = ns->next; + len++; + } + } + } + if( wantarray == G_SCALAR ) { + XPUSHs( sv_2mortal(newSViv(len)) ); + } + +int +hasChildNodes( self ) + xmlNodePtr self + CODE: + if ( self->type == XML_ATTRIBUTE_NODE ) { + RETVAL = 0; + } + else { + RETVAL = self->children ? 1 : 0 ; + } + OUTPUT: + RETVAL + +int +hasAttributes( self ) + xmlNodePtr self + CODE: + if ( self->type == XML_ATTRIBUTE_NODE + || self->type == XML_DTD_NODE ) { + RETVAL = 0; + } + else { + RETVAL = self->properties ? 1 : 0 ; + } + OUTPUT: + RETVAL + +SV* +ownerDocument( self ) + xmlNodePtr self + ALIAS: + XML::LibXML::Node::getOwnerDocument = 1 + CODE: + PERL_UNUSED_VAR(ix); + xs_warn( "GET OWNERDOC\n" ); + if( self != NULL + && self->doc != NULL ){ + RETVAL = PmmNodeToSv((xmlNodePtr)(self->doc), NULL); + } + else { + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + +SV* +ownerNode( self ) + xmlNodePtr self + ALIAS: + XML::LibXML::Node::getOwner = 1 + XML::LibXML::Node::getOwnerElement = 2 + CODE: + PERL_UNUSED_VAR(ix); + RETVAL = PmmNodeToSv(PmmNODE(PmmOWNERPO(PmmPROXYNODE(self))), NULL); + OUTPUT: + RETVAL + + +void +normalize( self ) + xmlNodePtr self + CODE: + domNodeNormalize( self ); + + +SV* +insertBefore( self, nNode, refNode ) + xmlNodePtr self + xmlNodePtr nNode + SV * refNode + PREINIT: + xmlNodePtr oNode=NULL, rNode; + INIT: + oNode = PmmSvNode(refNode); + CODE: + rNode = domInsertBefore( self, nNode, oNode ); + if ( rNode != NULL ) { + RETVAL = PmmNodeToSv( rNode, + PmmOWNERPO(PmmPROXYNODE(self)) ); + if (rNode->type == XML_DTD_NODE) { + LibXML_set_int_subset(self->doc, rNode); + } + PmmFixOwner(PmmPROXYNODE(rNode), PmmOWNERPO(PmmPROXYNODE(self))); + } + else { + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + +SV* +insertAfter( self, nNode, refNode ) + xmlNodePtr self + xmlNodePtr nNode + SV* refNode + PREINIT: + xmlNodePtr oNode = NULL, rNode; + INIT: + oNode = PmmSvNode(refNode); + CODE: + rNode = domInsertAfter( self, nNode, oNode ); + if ( rNode != NULL ) { + RETVAL = PmmNodeToSv( rNode, + PmmOWNERPO(PmmPROXYNODE(self)) ); + if (rNode->type == XML_DTD_NODE) { + LibXML_set_int_subset(self->doc, rNode); + } + PmmFixOwner(PmmPROXYNODE(rNode), PmmOWNERPO(PmmPROXYNODE(self))); + } + else { + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + +SV* +replaceChild( self, nNode, oNode ) + xmlNodePtr self + xmlNodePtr nNode + xmlNodePtr oNode + PREINIT: + xmlNodePtr ret = NULL; + CODE: + // if newNode == oldNode or self == newNode then do nothing, just return nNode. + if (nNode == oNode || self == nNode ) { + ret = nNode; + RETVAL = PmmNodeToSv(ret, PmmOWNERPO(PmmPROXYNODE(ret))); + } + else{ + if ( self->type == XML_DOCUMENT_NODE ) { + switch ( nNode->type ) { + case XML_ELEMENT_NODE: + warn("replaceChild with an element on a document node not supported yet!"); + XSRETURN_UNDEF; + break; + case XML_DOCUMENT_FRAG_NODE: + warn("replaceChild with a document fragment node on a document node not supported yet!"); + XSRETURN_UNDEF; + break; + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + warn("replaceChild with a text node not supported on a document node!"); + XSRETURN_UNDEF; + break; + default: + break; + } + } + ret = domReplaceChild( self, nNode, oNode ); + if (ret == NULL) { + XSRETURN_UNDEF; + } + else { + LibXML_reparent_removed_node(ret); + RETVAL = PmmNodeToSv(ret, PmmOWNERPO(PmmPROXYNODE(ret))); + if (nNode->type == XML_DTD_NODE) { + LibXML_set_int_subset(nNode->doc, nNode); + } + if ( nNode->_private != NULL ) { + PmmFixOwner( PmmPROXYNODE(nNode), + PmmOWNERPO(PmmPROXYNODE(self)) ); + } + } + } + OUTPUT: + RETVAL + +SV* +replaceNode( self,nNode ) + xmlNodePtr self + xmlNodePtr nNode + PREINIT: + xmlNodePtr ret = NULL; + ProxyNodePtr owner = NULL; + CODE: + if ( domIsParent( self, nNode ) == 1 ) { + XSRETURN_UNDEF; + } + owner = PmmOWNERPO(PmmPROXYNODE(self)); + + if ( self->type != XML_ATTRIBUTE_NODE ) { + ret = domReplaceChild( self->parent, nNode, self); + } + else { + ret = xmlReplaceNode( self, nNode ); + } + if ( ret ) { + LibXML_reparent_removed_node(ret); + RETVAL = PmmNodeToSv(ret, PmmOWNERPO(PmmPROXYNODE(ret))); + if (nNode->type == XML_DTD_NODE) { + LibXML_set_int_subset(nNode->doc, nNode); + } + if ( nNode->_private != NULL ) { + PmmFixOwner(PmmPROXYNODE(nNode), owner); + } + } + else { + croak( "replacement failed" ); + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + +SV* +removeChild( self, node ) + xmlNodePtr self + xmlNodePtr node + PREINIT: + xmlNodePtr ret; + CODE: + ret = domRemoveChild( self, node ); + if (ret == NULL) { + XSRETURN_UNDEF; + } + else { + LibXML_reparent_removed_node(ret); + RETVAL = PmmNodeToSv(ret, NULL); + } + OUTPUT: + RETVAL + +void +removeChildNodes( self ) + xmlNodePtr self + PREINIT: + xmlNodePtr elem, fragment; + ProxyNodePtr docfrag; + CODE: + docfrag = PmmNewFragment( self->doc ); + fragment = PmmNODE( docfrag ); + elem = self->children; + while ( elem ) { + xmlNodePtr next = elem->next; + xmlUnlinkNode( elem ); + if (elem->type == XML_ATTRIBUTE_NODE + || elem->type == XML_DTD_NODE) { + if (PmmPROXYNODE(elem) == NULL) { + xmlFreeNode(elem); + } + } + else { + /* this following piece is the function of domAppendChild() + * but in this special case we can avoid most of the logic of + * that function. + */ + if ( fragment->children != NULL ) { + xs_warn("unlink node!\n"); + domAddNodeToList( elem, fragment->last, NULL ); + } + else { + fragment->children = elem; + fragment->last = elem; + elem->parent= fragment; + } + PmmFixOwnerNode( elem, docfrag ); + } + elem = next; + } + + self->children = self->last = NULL; + if ( PmmREFCNT(docfrag) <= 0 ) { + xs_warn( "have not references left" ); + PmmREFCNT_inc( docfrag ); + PmmREFCNT_dec( docfrag ); + } + +void +unbindNode( self ) + xmlNodePtr self + ALIAS: + XML::LibXML::Node::unlink = 1 + XML::LibXML::Node::unlinkNode = 2 + PREINIT: + ProxyNodePtr docfrag = NULL; + CODE: + PERL_UNUSED_VAR(ix); + if ( self->type != XML_DOCUMENT_NODE + && self->type != XML_DOCUMENT_FRAG_NODE ) { + xmlUnlinkNode( self ); + LibXML_reparent_removed_node(self); + } + +SV* +appendChild( self, nNode ) + xmlNodePtr self + xmlNodePtr nNode + PREINIT: + xmlNodePtr rNode; + CODE: + if (self->type == XML_DOCUMENT_NODE ) { + /* NOT_SUPPORTED_ERR + */ + switch ( nNode->type ) { + case XML_ELEMENT_NODE: + warn("Appending an element to a document node not supported yet!"); + XSRETURN_UNDEF; + break; + case XML_DOCUMENT_FRAG_NODE: + warn("Appending a document fragment node to a document node not supported yet!"); + XSRETURN_UNDEF; + break; + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + warn("Appending text node not supported on a document node yet!"); + XSRETURN_UNDEF; + break; + default: + break; + } + } + + rNode = domAppendChild( self, nNode ); + + if ( rNode == NULL ) { + XSRETURN_UNDEF; + } + + RETVAL = PmmNodeToSv( nNode, + PmmOWNERPO(PmmPROXYNODE(self)) ); + if (nNode->type == XML_DTD_NODE) { + LibXML_set_int_subset(self->doc, nNode); + } + PmmFixOwner( SvPROXYNODE(RETVAL), PmmPROXYNODE(self) ); + OUTPUT: + RETVAL + +SV* +addChild( self, nNode ) + xmlNodePtr self + xmlNodePtr nNode + PREINIT: + xmlNodePtr retval = NULL; + ProxyNodePtr proxy; + CODE: + switch ( nNode->type ) { + case XML_DOCUMENT_FRAG_NODE: + croak("Adding document fragments with addChild not supported!"); + XSRETURN_UNDEF; + case XML_DOCUMENT_NODE : + case XML_HTML_DOCUMENT_NODE : + case XML_DOCB_DOCUMENT_NODE : + croak("addChild: HIERARCHY_REQUEST_ERR\n"); + XSRETURN_UNDEF; + case XML_NOTATION_NODE : + case XML_NAMESPACE_DECL : + case XML_DTD_NODE : + case XML_DOCUMENT_TYPE_NODE : + case XML_ENTITY_DECL : + case XML_ELEMENT_DECL : + case XML_ATTRIBUTE_DECL : + croak("addChild: unsupported node type!"); + XSRETURN_UNDEF; + default: + break; + } + + xmlUnlinkNode(nNode); + proxy = PmmPROXYNODE(nNode); + retval = xmlAddChild( self, nNode ); + + if ( retval == NULL ) { + croak( "Error: addChild failed (check node types)!\n" ); + } + + if ( retval != nNode ) { + xs_warn( "node was lost during operation\n" ); + PmmNODE(proxy) = NULL; + } + + RETVAL = PmmNodeToSv( retval, + PmmOWNERPO(PmmPROXYNODE(self)) ); + if ( retval != self ) { + PmmFixOwner( SvPROXYNODE(RETVAL), PmmPROXYNODE(self) ); + } + OUTPUT: + RETVAL + + +SV* +addSibling( self, nNode ) + xmlNodePtr self + xmlNodePtr nNode + PREINIT: + xmlNodePtr ret = NULL; + ProxyNodePtr owner = NULL; + CODE: + if ( nNode->type == XML_DOCUMENT_FRAG_NODE ) { + croak("Adding document fragments with addSibling not yet supported!"); + XSRETURN_UNDEF; + } + owner = PmmOWNERPO(PmmPROXYNODE(self)); + + if (self->type == XML_TEXT_NODE && nNode->type == XML_TEXT_NODE + && self->name == nNode->name) { + /* As a result of text merging, the added node may be freed. */ + xmlNodePtr copy = xmlCopyNode(nNode, 0); + ret = xmlAddSibling(self, copy); + + if (ret) { + RETVAL = PmmNodeToSv(ret, owner); + /* Unlink original node. */ + xmlUnlinkNode(nNode); + LibXML_reparent_removed_node(nNode); + } + else { + xmlFreeNode(copy); + XSRETURN_UNDEF; + } + } + else { + ret = xmlAddSibling( self, nNode ); + + if ( ret ) { + RETVAL = PmmNodeToSv(ret, owner); + if (nNode->type == XML_DTD_NODE) { + LibXML_set_int_subset(self->doc, nNode); + } + PmmFixOwner(SvPROXYNODE(RETVAL), owner); + } + else { + XSRETURN_UNDEF; + } + } + OUTPUT: + RETVAL + +SV* +cloneNode( self, deep=0 ) + xmlNodePtr self + int deep + PREINIT: + xmlNodePtr ret; + xmlDocPtr doc = NULL; + ProxyNodePtr docfrag = NULL; + CODE: + ret = PmmCloneNode( self, deep ); + if ( ret == NULL ) { + XSRETURN_UNDEF; + } + + if ( ret->type == XML_DTD_NODE ) { + RETVAL = PmmNodeToSv(ret, NULL); + } + else { + doc = self->doc; + + if ( doc != NULL ) { + xmlSetTreeDoc(ret, doc); /* setting to self, no need to clear psvi */ + } + + docfrag = PmmNewFragment( doc ); + xmlAddChild( PmmNODE(docfrag), ret ); + RETVAL = PmmNodeToSv(ret, docfrag); + } + OUTPUT: + RETVAL + +int +isSameNode( self, oNode ) + xmlNodePtr self + xmlNodePtr oNode + ALIAS: + XML::LibXML::Node::isEqual = 1 + CODE: + PERL_UNUSED_VAR(ix); + RETVAL = ( self == oNode ) ? 1 : 0; + OUTPUT: + RETVAL + +IV +unique_key( self ) + xmlNodePtr self + CODE: + /* Cast pointer to IV */ + RETVAL = PTR2IV(self); + OUTPUT: + RETVAL + +SV * +baseURI( self ) + xmlNodePtr self + PREINIT: + xmlChar * uri; + CODE: + uri = xmlNodeGetBase( self->doc, self ); + RETVAL = C2Sv( uri, NULL ); + xmlFree( uri ); + OUTPUT: + RETVAL + +void +setBaseURI( self, URI ) + xmlNodePtr self + SV * URI + PREINIT: + xmlChar * uri; + CODE: + uri = nodeSv2C( URI, self ); + if ( uri != NULL ) { + xmlNodeSetBase( self, uri ); + } + +SV* +toString( self, format=0, useDomEncoding = &PL_sv_undef ) + xmlNodePtr self + SV * useDomEncoding + int format + ALIAS: + XML::LibXML::Node::serialize = 1 + PREINIT: + xmlBufferPtr buffer; + const xmlChar *ret = NULL; + SV* internalFlag = NULL; + int oldTagFlag = xmlSaveNoEmptyTags; + CODE: + PERL_UNUSED_VAR(ix); + internalFlag = get_sv("XML::LibXML::setTagCompression", 0); + + if ( internalFlag ) { + xmlSaveNoEmptyTags = SvTRUE(internalFlag); + } + buffer = xmlBufferCreate(); + + if ( format <= 0 ) { + xmlNodeDump( buffer, + self->doc, + self, 0, format); + } + else { + int t_indent_var = xmlIndentTreeOutput; + xmlIndentTreeOutput = 1; + xmlNodeDump( buffer, + self->doc, + self, 0, format); + xmlIndentTreeOutput = t_indent_var; + } + + ret = xmlBufferContent( buffer ); + + xmlSaveNoEmptyTags = oldTagFlag; + + if ( ret != NULL ) { + if ( useDomEncoding != &PL_sv_undef && SvTRUE(useDomEncoding) ) { + RETVAL = nodeC2Sv((xmlChar*)ret, PmmNODE(PmmPROXYNODE(self))) ; + SvUTF8_off(RETVAL); + } + else { + RETVAL = C2Sv((xmlChar*)ret, NULL) ; + } + xmlBufferFree( buffer ); + } + else { + xmlBufferFree( buffer ); + xs_warn("Failed to convert node to string"); + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + + +SV * +_toStringC14N(self, comments=0, xpath=&PL_sv_undef, exclusive=0, inc_prefix_list=NULL, xpath_context) + xmlNodePtr self + int comments + SV * xpath + int exclusive + char** inc_prefix_list + SV * xpath_context + + PREINIT: + xmlChar *result = NULL; + xmlChar *nodepath = NULL; + xmlXPathContextPtr child_ctxt = NULL; + xmlXPathObjectPtr xpath_res = NULL; + xmlNodeSetPtr nodelist = NULL; + xmlNodePtr refNode = NULL; + PREINIT_SAVED_ERROR + INIT: + /* due to how c14n is implemented, the nodeset it receives must + include child nodes; ie, child nodes aren't assumed to be rendered. + so we use an xpath expression to find all of the child nodes. */ + + if ( self->doc == NULL ) { + croak("Node passed to toStringC14N must be part of a document"); + } + + refNode = self; + CODE: + if ( xpath != NULL && xpath != &PL_sv_undef ) { + nodepath = Sv2C( xpath, NULL ); + } + + if ( nodepath != NULL && xmlStrlen( nodepath ) == 0 ) { + xmlFree( nodepath ); + nodepath = NULL; + } + + if ( nodepath == NULL + && self->type != XML_DOCUMENT_NODE + && self->type != XML_HTML_DOCUMENT_NODE + && self->type != XML_DOCB_DOCUMENT_NODE + ) { + if (comments) + nodepath = xmlStrdup( (const xmlChar *) "(. | .//node() | .//@* | .//namespace::*)" ); + else + nodepath = xmlStrdup( (const xmlChar *) "(. | .//node() | .//@* | .//namespace::*)[not(self::comment())]" ); + } + + if ( nodepath != NULL ) { + if ( self->type == XML_DOCUMENT_NODE + || self->type == XML_HTML_DOCUMENT_NODE + || self->type == XML_DOCB_DOCUMENT_NODE ) { + refNode = xmlDocGetRootElement( self->doc ); + } + if (SvOK(xpath_context)) { + child_ctxt = INT2PTR(xmlXPathContextPtr,SvIV(SvRV(xpath_context))); + if ( child_ctxt == NULL ) { + croak("XPathContext: missing xpath context\n"); + } + } else { + xpath_context = NULL; + child_ctxt = xmlXPathNewContext(self->doc); + } + if (!child_ctxt) { + if ( nodepath != NULL ) { + xmlFree( nodepath ); + } + croak("Failed to create xpath context"); + } + + child_ctxt->node = self; + LibXML_configure_namespaces(child_ctxt); + + xpath_res = xmlXPathEval(nodepath, child_ctxt); + if (child_ctxt->namespaces != NULL) { + xmlFree( child_ctxt->namespaces ); + child_ctxt->namespaces = NULL; + } + if (!xpath_context) xmlXPathFreeContext(child_ctxt); + if ( nodepath != NULL ) { + xmlFree( nodepath ); + } + + if (xpath_res == NULL) { + croak("2 Failed to compile xpath expression"); + } + + nodelist = xpath_res->nodesetval; + if ( nodelist == NULL ) { + xmlXPathFreeObject(xpath_res); + croak( "cannot canonize empty nodeset!" ); + } + } + + INIT_ERROR_HANDLER; + + xmlC14NDocDumpMemory( self->doc, + nodelist, + exclusive, (xmlChar **) inc_prefix_list, + comments, + &result ); + + if ( xpath_res ) xmlXPathFreeObject(xpath_res); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(0); + + if (result == NULL) { + croak("Failed to convert doc to string in doc->toStringC14N"); + } else { + RETVAL = C2Sv( result, NULL ); + xmlFree(result); + } + OUTPUT: + RETVAL + +SV* +string_value ( self, useDomEncoding = &PL_sv_undef ) + xmlNodePtr self + SV * useDomEncoding + ALIAS: + to_literal = 1 + textContent = 2 + PREINIT: + xmlChar * string = NULL; + CODE: + PERL_UNUSED_VAR(ix); + /* we can't just return a string, because of UTF8! */ + string = xmlXPathCastNodeToString(self); + if ( SvTRUE(useDomEncoding) ) { + RETVAL = nodeC2Sv(string, + self); + } + else { + RETVAL = C2Sv(string, + NULL); + } + xmlFree(string); + OUTPUT: + RETVAL + +double +to_number ( self ) + xmlNodePtr self + CODE: + RETVAL = xmlXPathCastNodeToNumber(self); + OUTPUT: + RETVAL + + +void +_find( pnode, pxpath, to_bool ) + SV* pnode + SV * pxpath + int to_bool + PREINIT: + xmlNodePtr node = PmmSvNode(pnode); + ProxyNodePtr owner = NULL; + xmlXPathObjectPtr found = NULL; + xmlNodeSetPtr nodelist = NULL; + xmlChar * xpath = NULL; + xmlXPathCompExprPtr comp = NULL; + PREINIT_SAVED_ERROR + INIT: + if ( node == NULL ) { + croak( "lost node" ); + } + if (sv_isobject(pxpath) && sv_isa(pxpath,"XML::LibXML::XPathExpression")) { + comp = INT2PTR(xmlXPathCompExprPtr,SvIV((SV*)SvRV( pxpath ))); + if (!comp) XSRETURN_UNDEF; + } else { + xpath = nodeSv2C(pxpath, node); + if ( !(xpath && xmlStrlen(xpath)) ) { + xs_warn( "bad xpath\n" ); + if ( xpath ) + xmlFree(xpath); + croak( "empty XPath found" ); + XSRETURN_UNDEF; + } + } + PPCODE: + INIT_ERROR_HANDLER; + if (comp) { + found = domXPathCompFind( node, comp, to_bool ); + } else { + found = domXPathFind( node, xpath, to_bool ); + xmlFree( xpath ); + } + CLEANUP_ERROR_HANDLER; + if (found) { + REPORT_ERROR(1); + switch (found->type) { + case XPATH_NODESET: + /* return as a NodeList */ + /* access ->nodesetval */ + XPUSHs(sv_2mortal(newSVpv("XML::LibXML::NodeList", 0))); + nodelist = found->nodesetval; + if ( nodelist ) { + if ( nodelist->nodeNr > 0 ) { + int i; + const char * cls = "XML::LibXML::Node"; + xmlNodePtr tnode; + SV * element; + int l = nodelist->nodeNr; + + owner = PmmOWNERPO(SvPROXYNODE(pnode)); + for( i=0 ; i < l; i++){ + /* we have to create a new instance of an + * objectptr. and then + * place the current node into the new + * object. afterwards we can + * push the object to the array! + */ + tnode = nodelist->nodeTab[i]; + + /* let's be paranoid */ + if (tnode->type == XML_NAMESPACE_DECL) { + xmlNsPtr newns = xmlCopyNamespace((xmlNsPtr)tnode); + if ( newns != NULL ) { + element = NEWSV(0,0); + cls = PmmNodeTypeName( tnode ); + element = sv_setref_pv( element, + (const char *)cls, + (void*)newns + ); + } + else { + continue; + } + } + else { + element = PmmNodeToSv(tnode, owner); + } + + XPUSHs( sv_2mortal(element) ); + } + } + xmlXPathFreeNodeSet( found->nodesetval ); + found->nodesetval = NULL; + } + break; + case XPATH_BOOLEAN: + /* return as a Boolean */ + /* access ->boolval */ + XPUSHs(sv_2mortal(newSVpv("XML::LibXML::Boolean", 0))); + XPUSHs(sv_2mortal(newSViv(found->boolval))); + break; + case XPATH_NUMBER: + /* return as a Number */ + /* access ->floatval */ + XPUSHs(sv_2mortal(newSVpv("XML::LibXML::Number", 0))); + XPUSHs(sv_2mortal(newSVnv(found->floatval))); + break; + case XPATH_STRING: + /* access ->stringval */ + /* return as a Literal */ + XPUSHs(sv_2mortal(newSVpv("XML::LibXML::Literal", 0))); + XPUSHs(sv_2mortal(C2Sv(found->stringval, NULL))); + break; + default: + croak("Unknown XPath return type"); + } + xmlXPathFreeObject(found); + } else { + REPORT_ERROR(0); + } + +void +_findnodes( pnode, perl_xpath ) + SV* pnode + SV * perl_xpath + PREINIT: + xmlNodePtr node = PmmSvNode(pnode); + ProxyNodePtr owner = NULL; + xmlNodeSetPtr nodelist = NULL; + SV * element = NULL ; + xmlChar * xpath = NULL ; + xmlXPathCompExprPtr comp = NULL; + PREINIT_SAVED_ERROR + INIT: + if ( node == NULL ) { + if ( xpath ) + xmlFree(xpath); + croak( "lost node" ); + } + if (sv_isobject(perl_xpath) && sv_isa(perl_xpath,"XML::LibXML::XPathExpression")) { + comp = INT2PTR(xmlXPathCompExprPtr,SvIV((SV*)SvRV( perl_xpath ))); + if (!comp) XSRETURN_UNDEF; + } else { + xpath = nodeSv2C(perl_xpath, node); + if ( !(xpath && xmlStrlen(xpath)) ) { + xs_warn( "bad xpath\n" ); + if ( xpath ) + xmlFree(xpath); + croak( "empty XPath found" ); + XSRETURN_UNDEF; + } + } + PPCODE: + INIT_ERROR_HANDLER; + if (comp) { + nodelist = domXPathCompSelect( node, comp ); + } else { + nodelist = domXPathSelect( node, xpath ); + xmlFree(xpath); + } + CLEANUP_ERROR_HANDLER; + + if ( nodelist ) { + REPORT_ERROR(1); + if ( nodelist->nodeNr > 0 ) { + int i; + int len = nodelist->nodeNr; + const char * cls = "XML::LibXML::Node"; + xmlNodePtr tnode; + owner = PmmOWNERPO(SvPROXYNODE(pnode)); + + for(i=0 ; i < len; i++){ + /* we have to create a new instance of an objectptr. + * and then place the current node into the new object. + * afterwards we can push the object to the array! + */ + element = NULL; + tnode = nodelist->nodeTab[i]; + if (tnode->type == XML_NAMESPACE_DECL) { + xmlNsPtr newns = xmlCopyNamespace((xmlNsPtr)tnode); + if ( newns != NULL ) { + element = NEWSV(0,0); + cls = PmmNodeTypeName( tnode ); + element = sv_setref_pv( element, + (const char *)cls, + newns + ); + } + else { + continue; + } + } + else { + element = PmmNodeToSv(tnode, owner); + } + + XPUSHs( sv_2mortal(element) ); + } + } + xmlXPathFreeNodeSet( nodelist ); + } else { + REPORT_ERROR(0); + } + +void +getNamespaces( pnode ) + SV * pnode + ALIAS: + namespaces = 1 + PREINIT: + xmlNodePtr node; + xmlNsPtr ns = NULL; + xmlNsPtr newns = NULL; + SV* element = &PL_sv_undef; + const char * class = "XML::LibXML::Namespace"; + INIT: + PERL_UNUSED_VAR(ix); + node = PmmSvNode(pnode); + if ( node == NULL ) { + croak( "lost node" ); + } + PPCODE: + if (node->type == XML_ELEMENT_NODE) { + ns = node->nsDef; + while ( ns != NULL ) { + if (ns->prefix != NULL || ns->href != NULL) { + newns = xmlCopyNamespace(ns); + if ( newns != NULL ) { + element = NEWSV(0,0); + element = sv_setref_pv( element, + (const char *)class, + (void*)newns + ); + XPUSHs( sv_2mortal(element) ); + } + } + ns = ns->next; + } + } + +SV * +getNamespace( node ) + xmlNodePtr node + ALIAS: + localNamespace = 1 + localNS = 2 + PREINIT: + xmlNsPtr ns = NULL; + xmlNsPtr newns = NULL; + const char * class = "XML::LibXML::Namespace"; + CODE: + PERL_UNUSED_VAR(ix); + if ( node->type == XML_ELEMENT_NODE + || node->type == XML_ATTRIBUTE_NODE + || node->type == XML_PI_NODE ) { + ns = node->ns; + if ( ns != NULL ) { + newns = xmlCopyNamespace(ns); + if ( newns != NULL ) { + RETVAL = NEWSV(0,0); + RETVAL = sv_setref_pv( RETVAL, + (const char *)class, + (void*)newns + ); + } else { + XSRETURN_UNDEF; + } + } + else { + XSRETURN_UNDEF; + } + } else { + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + + +SV * +nodePath( self ) + xmlNodePtr self + PREINIT: + xmlChar * path = NULL; + CODE: + path = xmlGetNodePath( self ); + if ( path == NULL ) { + croak( "cannot calculate path for the given node" ); + } + RETVAL = C2Sv( path, NULL ); + xmlFree(path); + OUTPUT: + RETVAL + +int +line_number( self ) + xmlNodePtr self + CODE: + RETVAL = xmlGetLineNo( self ); + OUTPUT: + RETVAL + +MODULE = XML::LibXML PACKAGE = XML::LibXML::Element + +SV* +new(CLASS, name ) + char * name + PREINIT: + xmlNodePtr newNode; + ProxyNodePtr docfrag = NULL; + CODE: + docfrag = PmmNewFragment(NULL); + newNode = xmlNewNode( NULL, (const xmlChar*)name ); + newNode->doc = NULL; + xmlAddChild(PmmNODE(docfrag), newNode); + RETVAL = PmmNodeToSv(newNode, docfrag ); + OUTPUT: + RETVAL + +int +_setNamespace(self, namespaceURI, namespacePrefix = &PL_sv_undef, flag = 1 ) + SV * self + SV * namespaceURI + SV * namespacePrefix + int flag + PREINIT: + xmlNodePtr node = PmmSvNode(self); + xmlChar * nsURI = nodeSv2C(namespaceURI,node); + xmlChar * nsPrefix = NULL; + xmlNsPtr ns = NULL; + INIT: + if ( node == NULL ) { + croak( "lost node" ); + } + CODE: + /* if ( !nsURI ){ + XSRETURN_UNDEF; + } */ + + nsPrefix = nodeSv2C(namespacePrefix, node); + if ( xmlStrlen( nsPrefix ) == 0 ) { + xmlFree(nsPrefix); + nsPrefix = NULL; + } + if ( xmlStrlen( nsURI ) == 0 ) { + xmlFree(nsURI); + nsURI = NULL; + } + if ( nsPrefix == NULL && nsURI == NULL ) { + /* special case: empty namespace */ + if ( (ns = xmlSearchNs(node->doc, node, NULL)) && + ( ns->href && xmlStrlen( ns->href ) != 0 ) ) { + /* won't take it */ + RETVAL = 0; + } else if ( flag ) { + /* no namespace */ + xmlSetNs(node, NULL); + RETVAL = 1; + } else { + RETVAL = 0; + } + } + else if ( flag && (ns = xmlSearchNs(node->doc, node, nsPrefix)) ) { + /* user just wants to set the namespace for the node */ + /* try to reuse an existing declaration for the prefix */ + if ( xmlStrEqual( ns->href, nsURI ) ) { + RETVAL = 1; + } + else if ( (ns = xmlNewNs( node, nsURI, nsPrefix )) ) { + RETVAL = 1; + } + else { + RETVAL = 0; + } + } + else if ( (ns = xmlNewNs( node, nsURI, nsPrefix )) ) + RETVAL = 1; + else + RETVAL = 0; + + if ( flag && ns ) { + xmlSetNs(node, ns); + } + if ( nsPrefix ) xmlFree(nsPrefix); + if ( nsURI ) xmlFree(nsURI); + OUTPUT: + RETVAL + +int +setNamespaceDeclURI( self, svprefix, newURI ) + xmlNodePtr self + SV * svprefix + SV * newURI + PREINIT: + xmlChar * prefix = NULL; + xmlChar * nsURI = NULL; + xmlNsPtr ns; + CODE: + RETVAL = 0; + prefix = nodeSv2C( svprefix , self ); + nsURI = nodeSv2C( newURI , self ); + /* null empty values */ + if ( prefix && xmlStrlen(prefix) == 0) { + xmlFree( prefix ); + prefix = NULL; + } + if ( nsURI && xmlStrlen(nsURI) == 0) { + xmlFree( nsURI ); + nsURI = NULL; + } + ns = self->nsDef; + while ( ns ) { + if ((ns->prefix || ns->href ) && + ( xmlStrcmp( ns->prefix, prefix ) == 0 )) { + if (ns->href) xmlFree((char*)ns->href); + ns->href = nsURI; + if ( nsURI == NULL ) { + domRemoveNsRefs( self, ns ); + } else + nsURI = NULL; /* do not free it */ + RETVAL = 1; + break; + } else { + ns = ns->next; + } + } + if ( prefix ) xmlFree( prefix ); + if ( nsURI ) xmlFree( nsURI ); + OUTPUT: + RETVAL + +int +setNamespaceDeclPrefix( self, svprefix, newPrefix ) + xmlNodePtr self + SV * svprefix + SV * newPrefix + PREINIT: + xmlChar * prefix = NULL; + xmlChar * nsPrefix = NULL; + xmlNsPtr ns; + CODE: + RETVAL = 0; + prefix = nodeSv2C( svprefix , self ); + nsPrefix = nodeSv2C( newPrefix , self ); + /* null empty values */ + if ( prefix != NULL && xmlStrlen(prefix) == 0) { + xmlFree( prefix ); + prefix = NULL; + } + if ( nsPrefix != NULL && xmlStrlen(nsPrefix) == 0) { + xmlFree( nsPrefix ); + nsPrefix = NULL; + } + if ( xmlStrcmp( prefix, nsPrefix ) == 0 ) { + RETVAL = 1; + } else { + /* check that new prefix is not in scope */ + ns = xmlSearchNs( self->doc, self, nsPrefix ); + if ( ns != NULL ) { + if (nsPrefix != NULL) xmlFree( nsPrefix ); + if (prefix != NULL) xmlFree( prefix ); + croak("setNamespaceDeclPrefix: prefix '%s' is in use", ns->prefix); + } + /* lookup the declaration */ + ns = self->nsDef; + while ( ns != NULL ) { + if ((ns->prefix != NULL || ns->href != NULL) && + xmlStrcmp( ns->prefix, prefix ) == 0 ) { + if ( ns->href == NULL && nsPrefix != NULL ) { + /* xmlns:foo="" - no go */ + if ( prefix != NULL) xmlFree(prefix); + croak("setNamespaceDeclPrefix: cannot set non-empty prefix for empty namespace"); + } + if ( ns->prefix != NULL ) + xmlFree( (xmlChar*)ns->prefix ); + ns->prefix = nsPrefix; + nsPrefix = NULL; /* do not free it */ + RETVAL = 1; + break; + } else { + ns = ns->next; + } + } + } + if ( nsPrefix != NULL ) xmlFree(nsPrefix); + if ( prefix != NULL) xmlFree(prefix); + OUTPUT: + RETVAL + + +SV* +_getNamespaceDeclURI( self, ns_prefix ) + xmlNodePtr self + SV * ns_prefix + PREINIT: + xmlChar * prefix; + xmlNsPtr ns; + CODE: + prefix = nodeSv2C(ns_prefix, self ); + if ( prefix != NULL && xmlStrlen(prefix) == 0) { + xmlFree( prefix ); + prefix = NULL; + } + RETVAL = &PL_sv_undef; + ns = self->nsDef; + while ( ns != NULL ) { + if ( (ns->prefix != NULL || ns->href != NULL) && + xmlStrcmp( ns->prefix, prefix ) == 0 ) { + RETVAL = C2Sv(ns->href, NULL); + break; + } else { + ns = ns->next; + } + } + if ( prefix != NULL ) { + xmlFree( prefix ); + } + + OUTPUT: + RETVAL + +int +hasAttribute( self, attr_name ) + xmlNodePtr self + SV * attr_name + PREINIT: + xmlChar * name; + CODE: + name = nodeSv2C(attr_name, self ); + if ( ! name ) { + XSRETURN_UNDEF; + } + if ( domGetAttrNode( self, name ) ) { + RETVAL = 1; + } + else { + RETVAL = 0; + } + xmlFree(name); + OUTPUT: + RETVAL + +int +hasAttributeNS( self, namespaceURI, attr_name ) + xmlNodePtr self + SV * namespaceURI + SV * attr_name + PREINIT: + xmlChar * name; + xmlChar * nsURI; + xmlNodePtr attr; + CODE: + name = nodeSv2C(attr_name, self ); + nsURI = nodeSv2C(namespaceURI, self ); + + if ( name == NULL ) { + if ( nsURI != NULL ) { + xmlFree(nsURI); + } + XSRETURN_UNDEF; + } + if ( nsURI != NULL && xmlStrlen(nsURI) == 0 ){ + xmlFree(nsURI); + nsURI = NULL; + } + attr = (xmlNodePtr) xmlHasNsProp( self, name, nsURI ); + if ( attr && attr->type == XML_ATTRIBUTE_NODE ) { + RETVAL = 1; + } + else { + RETVAL = 0; + } + + xmlFree(name); + if ( nsURI != NULL ){ + xmlFree(nsURI); + } + OUTPUT: + RETVAL + +SV* +_getAttribute( self, attr_name, useDomEncoding = 0 ) + xmlNodePtr self + SV * attr_name + int useDomEncoding + PREINIT: + xmlChar * name; + xmlChar * prefix = NULL; + xmlChar * localname = NULL; + xmlChar * ret = NULL; + xmlNsPtr ns = NULL; + CODE: + name = nodeSv2C(attr_name, self ); + if( !name ) { + XSRETURN_UNDEF; + } + + ret = xmlGetNoNsProp(self, name); + if ( ret == NULL ) { + localname = xmlSplitQName2(name, &prefix); + if ( localname != NULL ) { + ns = xmlSearchNs( self->doc, self, prefix ); + if ( ns != NULL ) { + ret = xmlGetNsProp(self, localname, ns->href); + } + if ( prefix != NULL) { + xmlFree( prefix ); + } + xmlFree( localname ); + } + } + xmlFree(name); + if ( ret ) { + if ( useDomEncoding ) { + RETVAL = nodeC2Sv(ret, self); + } + else { + RETVAL = C2Sv(ret, NULL); + } + xmlFree( ret ); + } + else { + XSRETURN_UNDEF; + } + + OUTPUT: + RETVAL + +void +_setAttribute( self, attr_name, attr_value ) + xmlNodePtr self + SV * attr_name + SV * attr_value + PREINIT: + xmlChar * name = NULL; + xmlChar * value = NULL; +#if LIBXML_VERSION < 20621 + xmlChar * prefix = NULL; + xmlChar * localname = NULL; +#endif + CODE: + name = nodeSv2C(attr_name, self ); + + if ( !LibXML_test_node_name(name) ) { + xmlFree(name); + croak( "bad name" ); + } + value = nodeSv2C(attr_value, self ); +#if LIBXML_VERSION >= 20621 + /* + * For libxml2-2.6.21 and later we can use just xmlSetProp + */ + xmlSetProp(self,name,value); +#else + /* + * but xmlSetProp does not work correctly for older libxml2 versions + * The following is copied from libxml2 source + * with xmlSplitQName3 replaced by xmlSplitQName2 for compatibility + * with older libxml2 versions + */ + localname = xmlSplitQName2(name, &prefix); + if (localname != NULL) { + xmlNsPtr ns; + ns = xmlSearchNs(self->doc, self, prefix); + if (prefix != NULL) + xmlFree(prefix); + if (ns != NULL) + xmlSetNsProp(self, ns, localname, value); + else + xmlSetNsProp(self, NULL, name, value); + xmlFree(localname); + } else { + xmlSetNsProp(self, NULL, name, value); + } +#endif + xmlFree(name); + xmlFree(value); + + +void +removeAttribute( self, attr_name ) + xmlNodePtr self + SV * attr_name + PREINIT: + xmlChar * name; + xmlAttrPtr xattr = NULL; + CODE: + name = nodeSv2C(attr_name, self ); + if ( name ) { + xattr = domGetAttrNode( self, name ); + + if ( xattr ) { + xmlUnlinkNode((xmlNodePtr)xattr); + if ( xattr->_private ) { + PmmFixOwner((ProxyNodePtr)xattr->_private, NULL); + } + else { + xmlFreeProp(xattr); + } + } + xmlFree(name); + } + +SV* +getAttributeNode( self, attr_name ) + xmlNodePtr self + SV * attr_name + PREINIT: + xmlChar * name; + xmlAttrPtr ret = NULL; + CODE: + name = nodeSv2C(attr_name, self ); + if ( !name ) { + XSRETURN_UNDEF; + } + + ret = domGetAttrNode( self, name ); + xmlFree(name); + if ( ret ) { + RETVAL = PmmNodeToSv( (xmlNodePtr)ret, + PmmOWNERPO(PmmPROXYNODE(self)) ); + } + else { + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + +SV * +setAttributeNode( self, attr_node ) + xmlNodePtr self + SV * attr_node + PREINIT: + xmlAttrPtr attr = (xmlAttrPtr)PmmSvNode( attr_node ); + xmlAttrPtr ret = NULL; + INIT: + if ( attr == NULL ) { + croak( "lost attribute" ); + } + CODE: + if ( attr != NULL && attr->type != XML_ATTRIBUTE_NODE ) { + XSRETURN_UNDEF; + } + if ( attr->doc != self->doc ) { + domImportNode( self->doc, (xmlNodePtr)attr, 1, 1); + } + ret = domGetAttrNode( self, attr->name ); + if ( ret != NULL ) { + if ( ret != attr ) { + xmlReplaceNode( (xmlNodePtr)ret, (xmlNodePtr)attr ); + } + else { + XSRETURN_UNDEF; + } + } + else { + xmlAddChild( self, (xmlNodePtr)attr ); + } + + if ( attr->_private != NULL ) { + PmmFixOwner( SvPROXYNODE(attr_node), PmmPROXYNODE(self) ); + } + + if ( ret == NULL ) { + XSRETURN_UNDEF; + } + + RETVAL = PmmNodeToSv( (xmlNodePtr)ret, NULL ); + PmmFixOwner( SvPROXYNODE(RETVAL), NULL ); + OUTPUT: + RETVAL + +SV * +_getAttributeNS( self, namespaceURI, attr_name, useDomEncoding = 0 ) + xmlNodePtr self + SV * namespaceURI + SV * attr_name + int useDomEncoding + PREINIT: + xmlChar * name; + xmlChar * nsURI; + xmlChar * ret = NULL; + CODE: + name = nodeSv2C( attr_name, self ); + nsURI = nodeSv2C( namespaceURI, self ); + if ( !name ) { + xmlFree(nsURI); + XSRETURN_UNDEF; + } + if ( nsURI && xmlStrlen(nsURI) ) { + ret = xmlGetNsProp( self, name, nsURI ); + } + else { + ret = xmlGetProp( self, name ); + } + + xmlFree( name ); + if ( nsURI ) { + xmlFree( nsURI ); + } + if ( ret ) { + if (useDomEncoding) { + RETVAL = nodeC2Sv( ret, self ); + } else { + RETVAL = C2Sv( ret, NULL ); + } + xmlFree( ret ); + } + else { + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + +void +_setAttributeNS( self, namespaceURI, attr_name, attr_value ) + xmlNodePtr self + SV * namespaceURI + SV * attr_name + SV * attr_value + PREINIT: + xmlChar * nsURI; + xmlChar * name = NULL; + xmlChar * value = NULL; + xmlNsPtr ns = NULL; + xmlChar * localname = NULL; + xmlChar * prefix = NULL; + xmlNsPtr * all_ns = NULL; + int i; + INIT: + name = nodeSv2C( attr_name, self ); + + if ( !LibXML_test_node_name(name) ) { + xmlFree(name); + croak( "bad name" ); + } + + nsURI = nodeSv2C( namespaceURI, self ); + localname = xmlSplitQName2(name, &prefix); + if ( localname ) { + xmlFree( name ); + name = localname; + } + CODE: + value = nodeSv2C( attr_value, self ); + + if ( nsURI && xmlStrlen(nsURI) ) { + xs_warn( "found uri" ); + + ns = xmlSearchNsByHref( self->doc, self, nsURI ); + + /* + * check for any prefixed namespaces occluded by a default namespace + * because xmlSearchNsByHref will return default namespaces unless + * you are searching on an attribute node, which may not exist yet + */ + if ( ns && !ns->prefix ) + { + all_ns = xmlGetNsList(self->doc, self); + if ( all_ns ) + { + i = 0; + ns = all_ns[i]; + while ( ns ) + { + if ( ns->prefix && xmlStrEqual(ns->href, nsURI) ) + { + break; + } + ns = all_ns[i++]; + } + xmlFree(all_ns); + } + } + + if ( !ns ) { + /* create new ns */ + if ( prefix && xmlStrlen( prefix ) ) { + ns = xmlNewNs(self, nsURI , prefix); + } + else { + ns = NULL; + } + } + } + + if ( nsURI && xmlStrlen(nsURI) && !ns ) { + if ( prefix ) xmlFree( prefix ); + if ( nsURI ) xmlFree( nsURI ); + xmlFree( name ); + xmlFree( value ); + croak( "bad ns attribute!" ); + } + else { + /* warn( "set attribute %s->%s", name, value ); */ + xmlSetNsProp( self, ns, name, value ); + } + + if ( prefix ) { + xmlFree( prefix ); + } + if ( nsURI ) { + xmlFree( nsURI ); + } + xmlFree( name ); + xmlFree( value ); + +void +removeAttributeNS( self, namespaceURI, attr_name ) + xmlNodePtr self + SV * namespaceURI + SV * attr_name + PREINIT: + xmlChar * nsURI; + xmlChar * name = NULL; + xmlAttrPtr xattr = NULL; + CODE: + nsURI = nodeSv2C( namespaceURI, self ); + name = nodeSv2C( attr_name, self ); + if ( ! name ) { + xmlFree(nsURI); + XSRETURN_UNDEF; + } + + if ( nsURI && xmlStrlen(nsURI) ) { + xattr = xmlHasNsProp( self, name, nsURI ); + } + else { + xattr = xmlHasNsProp( self, name, NULL ); + } + if ( xattr && xattr->type == XML_ATTRIBUTE_NODE ) { + xmlUnlinkNode((xmlNodePtr)xattr); + if ( xattr->_private ) { + PmmFixOwner((ProxyNodePtr)xattr->_private, NULL); + } + else { + xmlFreeProp(xattr); + } + } + xmlFree(nsURI); + xmlFree( name ); + + +SV* +getAttributeNodeNS( self,namespaceURI, attr_name ) + xmlNodePtr self + SV * namespaceURI + SV * attr_name + PREINIT: + xmlChar * nsURI; + xmlChar * name; + xmlAttrPtr ret = NULL; + CODE: + nsURI = nodeSv2C(namespaceURI, self ); + name = nodeSv2C(attr_name, self ); + if ( !name ) { + xmlFree(nsURI); + XSRETURN_UNDEF; + } + if ( nsURI && xmlStrlen(nsURI) ) { + ret = xmlHasNsProp( self, name, nsURI ); + } + else { + ret = xmlHasNsProp( self, name, NULL ); + } + xmlFree(name); + if ( nsURI ) { + xmlFree(nsURI); + } + if ( ret && + ret->type == XML_ATTRIBUTE_NODE /* we don't want fixed attribute decls */ + ) { + RETVAL = PmmNodeToSv( (xmlNodePtr)ret, + PmmOWNERPO(PmmPROXYNODE(self)) ); + } + else { + /* warn("no prop\n"); */ + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + +SV * +setAttributeNodeNS( self, attr_node ) + xmlNodePtr self + SV * attr_node + PREINIT: + xmlAttrPtr attr = (xmlAttrPtr)PmmSvNode( attr_node ); + xmlNsPtr ns = NULL; + xmlAttrPtr ret = NULL; + INIT: + if ( attr == NULL ) { + croak( "lost attribute node" ); + } + CODE: + if ( attr->type != XML_ATTRIBUTE_NODE ) { + XSRETURN_UNDEF; + } + + if ( attr->doc != self->doc ) { + domImportNode( self->doc, (xmlNodePtr)attr, 1,1); + } + + + ns = attr->ns; + if ( ns != NULL ) { + ret = xmlHasNsProp( self, ns->href, attr->name ); + } + else { + ret = xmlHasNsProp( self, NULL, attr->name ); + } + + if ( ret && ret->type == XML_ATTRIBUTE_NODE ) { + if ( ret != attr ) { + xmlReplaceNode( (xmlNodePtr)ret, (xmlNodePtr)attr ); + } + else { + XSRETURN_UNDEF; + } + } + else { + xmlAddChild( self, (xmlNodePtr)attr ); + xmlReconciliateNs(self->doc, self); + } + if ( attr->_private != NULL ) { + PmmFixOwner( SvPROXYNODE(attr_node), PmmPROXYNODE(self) ); + } + if ( ret != NULL && ret->type == XML_ATTRIBUTE_NODE ) { + RETVAL = PmmNodeToSv( (xmlNodePtr)ret, NULL ); + PmmFixOwner( SvPROXYNODE(RETVAL), NULL ); + } else { + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + +SV * +removeAttributeNode( self, attr_node ) + xmlNodePtr self + SV * attr_node + PREINIT: + xmlAttrPtr attr = (xmlAttrPtr)PmmSvNode( attr_node ); + xmlAttrPtr ret; + INIT: + if ( attr == NULL ) { + croak( "lost attribute node" ); + } + CODE: + if ( attr->type != XML_ATTRIBUTE_NODE ) { + XSRETURN_UNDEF; + } + if ( attr->parent != self ) { + XSRETURN_UNDEF; + } + ret = attr; + xmlUnlinkNode( (xmlNodePtr)attr ); + RETVAL = PmmNodeToSv( (xmlNodePtr)ret, NULL ); + PmmFixOwner( SvPROXYNODE(RETVAL), NULL ); + OUTPUT: + RETVAL + +void +appendText( self, string ) + xmlNodePtr self + SV * string + ALIAS: + appendTextNode = 1 + XML::LibXML::DocumentFragment::appendText = 2 + XML::LibXML::DocumentFragment::appendTextNode = 3 + PREINIT: + xmlChar * content = NULL; + INIT: + PERL_UNUSED_VAR(ix); + content = nodeSv2C( string, self ); + if ( content == NULL ) { + XSRETURN_UNDEF; + } + if ( xmlStrlen(content) == 0 ) { + xmlFree( content ); + XSRETURN_UNDEF; + } + CODE: + xmlNodeAddContent( self, content ); + xmlFree(content); + + +void +appendTextChild( self, strname, strcontent=&PL_sv_undef, nsURI=&PL_sv_undef ) + xmlNodePtr self + SV * strname + SV * strcontent + SV * nsURI + PREINIT: + xmlChar * name; + xmlChar * content = NULL; + xmlChar * encstr = NULL; + INIT: + name = nodeSv2C( strname, self ); + if ( xmlStrlen(name) == 0 ) { + xmlFree(name); + XSRETURN_UNDEF; + } + CODE: + content = nodeSv2C(strcontent, self); + if ( content && xmlStrlen( content ) == 0 ) { + xmlFree(content); + content=NULL; + } + else if ( content ) { + encstr = xmlEncodeEntitiesReentrant( self->doc, content ); + xmlFree(content); + } + + xmlNewChild( self, NULL, name, encstr ); + + if ( encstr ) + xmlFree(encstr); + xmlFree(name); + +SV * +addNewChild( self, namespaceURI, nodename ) + xmlNodePtr self + SV * namespaceURI + SV * nodename + ALIAS: + XML::LibXML::DocumentFragment::addNewChild = 1 + PREINIT: + xmlChar * nsURI = NULL; + xmlChar * name = NULL; + xmlChar * localname = NULL; + xmlChar * prefix = NULL; + xmlNodePtr newNode = NULL; + xmlNodePtr prev = NULL; + xmlNsPtr ns = NULL; + CODE: + PERL_UNUSED_VAR(ix); + name = nodeSv2C(nodename, self); + if ( name && xmlStrlen( name ) == 0 ) { + xmlFree(name); + XSRETURN_UNDEF; + } + + nsURI = nodeSv2C(namespaceURI, self); + if ( nsURI && xmlStrlen( nsURI ) == 0 ) { + xmlFree(nsURI); + nsURI=NULL; + } + + if ( nsURI != NULL ) { + localname = xmlSplitQName2(name, &prefix); + ns = xmlSearchNsByHref(self->doc, self, nsURI); + + newNode = xmlNewDocNode(self->doc, + ns, + localname?localname:name, + NULL); + if ( ns == NULL ) { + xmlSetNs(newNode,xmlNewNs(newNode, nsURI, prefix)); + } + + xmlFree(localname); + xmlFree(prefix); + xmlFree(nsURI); + } + else { + newNode = xmlNewDocNode(self->doc, + NULL, + name, + NULL); + } + xmlFree(name); + /* add the node to the parent node */ + newNode->type = XML_ELEMENT_NODE; + newNode->parent = self; + newNode->doc = self->doc; + + if (self->children == NULL) { + self->children = newNode; + self->last = newNode; + } else { + prev = self->last; + prev->next = newNode; + newNode->prev = prev; + self->last = newNode; + } + RETVAL = PmmNodeToSv(newNode, PmmOWNERPO(PmmPROXYNODE(self)) ); + OUTPUT: + RETVAL + +MODULE = XML::LibXML PACKAGE = XML::LibXML::Text + +SV * +new( CLASS, content ) + SV * content + PREINIT: + xmlChar * data; + xmlNodePtr newNode; + ProxyNodePtr docfrag = NULL; + CODE: + data = Sv2C(content, NULL); + newNode = xmlNewText( data ); + xmlFree(data); + if( newNode != NULL ) { + docfrag = PmmNewFragment( NULL ); + xmlAddChild(PmmNODE(docfrag), newNode); + RETVAL = PmmNodeToSv(newNode,docfrag); + } + else { + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + +SV * +substringData( self, offset, length ) + xmlNodePtr self + int offset + int length + PREINIT: + xmlChar * data = NULL; + xmlChar * substr = NULL; + CODE: + if ( offset >= 0 && length >= 0 ) { + data = domGetNodeValue( self ); + if ( data != NULL ) { + substr = xmlUTF8Strsub( data, offset, length ); + RETVAL = C2Sv( (const xmlChar*)substr, NULL ); + xmlFree( substr ); + } + else { + XSRETURN_UNDEF; + } + } + else { + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + +void +setData( self, value ) + xmlNodePtr self + SV * value + ALIAS: + XML::LibXML::Attr::setValue = 1 + XML::LibXML::PI::_setData = 2 + PREINIT: + xmlChar * encstr = NULL; + CODE: + PERL_UNUSED_VAR(ix); + encstr = nodeSv2C(value,self); + domSetNodeValue( self, encstr ); + xmlFree(encstr); + +void +appendData( self, value ) + xmlNodePtr self + SV * value + PREINIT: + xmlChar * encstring = NULL; + int strlen = 0; + CODE: + encstring = Sv2C( value, + self->doc!=NULL ? self->doc->encoding : NULL ); + + if ( encstring != NULL ) { + strlen = xmlStrlen( encstring ); + xmlTextConcat( self, encstring, strlen ); + xmlFree( encstring ); + } + +void +insertData( self, offset, value ) + xmlNodePtr self + int offset + SV * value + PREINIT: + xmlChar * after= NULL; + xmlChar * data = NULL; + xmlChar * new = NULL; + xmlChar * encstring = NULL; + int dl = 0; + CODE: + if ( offset >= 0 ) { + encstring = Sv2C( value, + self->doc!=NULL ? self->doc->encoding : NULL ); + if ( encstring != NULL && xmlStrlen( encstring ) > 0 ) { + data = domGetNodeValue(self); + if ( data != NULL && xmlStrlen( data ) > 0 ) { + if ( xmlUTF8Strlen( data ) < offset ) { + data = xmlStrcat( data, encstring ); + domSetNodeValue( self, data ); + } + else { + dl = xmlUTF8Strlen( data ) - offset; + + if ( offset > 0 ) + new = xmlUTF8Strsub(data, 0, offset ); + + after = xmlUTF8Strsub(data, offset, dl ); + + if ( new != NULL ) { + new = xmlStrcat(new, encstring ); + } + else { + new = xmlStrdup( encstring ); + } + + if ( after != NULL ) + new = xmlStrcat(new, after ); + + domSetNodeValue( self, new ); + + xmlFree( new ); + xmlFree( after ); + } + xmlFree( data ); + } + else { + domSetNodeValue( self, encstring ); + } + xmlFree(encstring); + } + } + +void +deleteData( self, offset, length ) + xmlNodePtr self + int offset + int length + PREINIT: + xmlChar * data = NULL; + xmlChar * after = NULL; + xmlChar * new = NULL; + int len = 0; + int dl1 = 0; + int dl2 = 0; + CODE: + if ( length > 0 && offset >= 0 ) { + data = domGetNodeValue(self); + len = xmlUTF8Strlen( data ); + if ( data != NULL + && len > 0 + && len > offset ) { + dl1 = offset + length; + if ( offset > 0 ) + new = xmlUTF8Strsub( data, 0, offset ); + + if ( len > dl1 ) { + dl2 = len - dl1; + after = xmlUTF8Strsub( data, dl1, dl2 ); + if ( new != NULL ) { + new = xmlStrcat( new, after ); + xmlFree(after); + } + else { + new = after; + } + } + + domSetNodeValue( self, new ); + xmlFree(new); + } + } + +void +replaceData( self, offset,length, value ) + xmlNodePtr self + int offset + int length + SV * value + PREINIT: + xmlChar * after= NULL; + xmlChar * data = NULL; + xmlChar * new = NULL; + xmlChar * encstring = NULL; + int len = 0; + int dl1 = 0; + int dl2 = 0; + CODE: + if ( offset >= 0 ) { + encstring = Sv2C( value, + self->doc!=NULL ? self->doc->encoding : NULL ); + + if ( encstring != NULL && xmlStrlen( encstring ) > 0 ) { + data = domGetNodeValue(self); + len = xmlUTF8Strlen( data ); + + if ( data != NULL + && len > 0 + && len > offset ) { + + dl1 = offset + length; + if ( dl1 < len ) { + dl2 = xmlUTF8Strlen( data ) - dl1; + if ( offset > 0 ) { + new = xmlUTF8Strsub(data, 0, offset ); + new = xmlStrcat(new, encstring ); + } + else { + new = xmlStrdup( encstring ); + } + + after = xmlUTF8Strsub(data, dl1, dl2 ); + new = xmlStrcat(new, after ); + + domSetNodeValue( self, new ); + + xmlFree( new ); + xmlFree( after ); + } + else { + /* replace until end! */ + if ( offset > 0 ) { + new = xmlUTF8Strsub(data, 0, offset ); + new = xmlStrcat(new, encstring ); + } + else { + new = xmlStrdup( encstring ); + } + domSetNodeValue( self, new ); + xmlFree( new ); + } + xmlFree( data ); + } + + xmlFree(encstring); + } + } + +MODULE = XML::LibXML PACKAGE = XML::LibXML::Comment + +SV * +new( CLASS, content ) + SV * content + PREINIT: + xmlChar * encstring; + xmlNodePtr newNode; + ProxyNodePtr docfrag = NULL; + CODE: + encstring = Sv2C(content, NULL); + newNode = xmlNewComment( encstring ); + xmlFree(encstring); + if( newNode != NULL ) { + docfrag = PmmNewFragment( NULL ); + xmlAddChild(PmmNODE(docfrag), newNode); + RETVAL = PmmNodeToSv(newNode,docfrag); + } + else { + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + +MODULE = XML::LibXML PACKAGE = XML::LibXML::CDATASection + +SV * +new( CLASS , content ) + SV * content + PREINIT: + xmlChar * encstring; + xmlNodePtr newNode; + ProxyNodePtr docfrag = NULL; + CODE: + encstring = Sv2C(content, NULL); + newNode = xmlNewCDataBlock( NULL , encstring, xmlStrlen( encstring ) ); + xmlFree(encstring); + if ( newNode != NULL ){ + docfrag = PmmNewFragment( NULL ); + xmlAddChild(PmmNODE(docfrag), newNode); + RETVAL = PmmNodeToSv(newNode,docfrag); + } + else { + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + +MODULE = XML::LibXML PACKAGE = XML::LibXML::DocumentFragment + +SV* +new( CLASS ) + PREINIT: + xmlNodePtr real_doc=NULL; + CODE: + real_doc = xmlNewDocFragment( NULL ); + RETVAL = PmmNodeToSv( real_doc, NULL ); + OUTPUT: + RETVAL + +MODULE = XML::LibXML PACKAGE = XML::LibXML::Attr + +SV* +new( CLASS, pname, pvalue ) + SV * pname + SV * pvalue + PREINIT: + xmlNodePtr attr = NULL; + xmlChar * name; + xmlChar * value; + CODE: + name = Sv2C(pname,NULL); + value = Sv2C(pvalue,NULL); + if ( name == NULL ) { + XSRETURN_UNDEF; + } + attr = (xmlNodePtr)xmlNewProp( NULL, name, value ); + attr->doc = NULL; + RETVAL = PmmNodeToSv(attr,NULL); + OUTPUT: + RETVAL + + +SV* +parentElement( self ) + ALIAS: + XML::LibXML::Attr::getParentNode = 1 + XML::LibXML::Attr::getNextSibling = 2 + XML::LibXML::Attr::getPreviousSibling = 3 + XML::LibXML::Attr::nextSibling = 4 + XML::LibXML::Attr::previousSibling = 5 + CODE: + /* override the original parentElement(), since this an attribute is + * not part of the main tree + */ + + PERL_UNUSED_VAR(ix); + XSRETURN_UNDEF; + OUTPUT: + RETVAL + +SV* +serializeContent( self, useDomEncoding = &PL_sv_undef ) + SV * self + SV * useDomEncoding + PREINIT: + xmlBufferPtr buffer; + const xmlChar *ret = NULL; + xmlAttrPtr node = (xmlAttrPtr)PmmSvNode(self); + CODE: + buffer = xmlBufferCreate(); + domAttrSerializeContent(buffer, node); + if ( xmlBufferLength(buffer) > 0 ) { + ret = xmlBufferContent( buffer ); + } + if ( ret != NULL ) { + if ( useDomEncoding != &PL_sv_undef && SvTRUE(useDomEncoding) ) { + RETVAL = nodeC2Sv((xmlChar*)ret, PmmNODE(PmmPROXYNODE(node))) ; + } + else { + RETVAL = C2Sv((xmlChar*)ret, NULL) ; + } + xmlBufferFree( buffer ); + } + else { + xmlBufferFree( buffer ); + xs_warn("Failed to convert attribute to string"); + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + +SV* +toString(self , format=0, useDomEncoding = &PL_sv_undef ) + SV * self + SV * useDomEncoding + int format + ALIAS: + XML::LibXML::Attr::serialize = 1 + PREINIT: + xmlAttrPtr node = (xmlAttrPtr)PmmSvNode(self); + xmlBufferPtr buffer; + const xmlChar *ret = NULL; + CODE: + /* we add an extra method for serializing attributes since + XML::LibXML::Node::toString causes segmentation fault inside + libxml2 + */ + PERL_UNUSED_VAR(ix); + buffer = xmlBufferCreate(); + xmlBufferAdd(buffer, BAD_CAST " ", 1); + if ((node->ns != NULL) && (node->ns->prefix != NULL)) { + xmlBufferAdd(buffer, node->ns->prefix, xmlStrlen(node->ns->prefix)); + xmlBufferAdd(buffer, BAD_CAST ":", 1); + } + xmlBufferAdd(buffer, node->name, xmlStrlen(node->name)); + xmlBufferAdd(buffer, BAD_CAST "=\"", 2); + domAttrSerializeContent(buffer, node); + xmlBufferAdd(buffer, BAD_CAST "\"", 1); + + if ( xmlBufferLength(buffer) > 0 ) { + ret = xmlBufferContent( buffer ); + } + if ( ret != NULL ) { + if ( useDomEncoding != &PL_sv_undef && SvTRUE(useDomEncoding) ) { + RETVAL = nodeC2Sv((xmlChar*)ret, PmmNODE(PmmPROXYNODE(node))) ; + } + else { + RETVAL = C2Sv((xmlChar*)ret, NULL) ; + } + xmlBufferFree( buffer ); + } + else { + xmlBufferFree( buffer ); + xs_warn("Failed to convert attribute to string"); + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + + +int +_setNamespace(self, namespaceURI, namespacePrefix = &PL_sv_undef ) + SV * self + SV * namespaceURI + SV * namespacePrefix + PREINIT: + xmlAttrPtr node = (xmlAttrPtr)PmmSvNode(self); + xmlChar * nsURI = nodeSv2C(namespaceURI,(xmlNodePtr)node); + xmlChar * nsPrefix = NULL; + xmlNsPtr ns = NULL; + INIT: + if ( node == NULL ) { + croak( "lost node" ); + } + CODE: + if ( !nsURI || xmlStrlen(nsURI)==0 ){ + xmlSetNs((xmlNodePtr)node, NULL); + RETVAL = 1; + } + if ( !node->parent ) { + XSRETURN_UNDEF; + } + nsPrefix = nodeSv2C(namespacePrefix, (xmlNodePtr)node); + if ( (ns = xmlSearchNs(node->doc, node->parent, nsPrefix)) && + xmlStrEqual( ns->href, nsURI) ) { + /* same uri and prefix */ + RETVAL = 1; + } + else if ( (ns = xmlSearchNsByHref(node->doc, node->parent, nsURI)) ) { + /* set uri, but with a different prefix */ + RETVAL = 1; + } + else if (! RETVAL) + RETVAL = 0; + + if ( ns ) { + if ( ns->prefix ) { + xmlSetNs((xmlNodePtr)node, ns); + } else { + RETVAL = 0; + } + } + xmlFree(nsPrefix); + xmlFree(nsURI); + OUTPUT: + RETVAL + +int +isId( self ) + SV * self + PREINIT: + xmlAttrPtr attr = (xmlAttrPtr)PmmSvNode(self); + xmlNodePtr elem; + CODE: + if ( attr == NULL ) { + XSRETURN_UNDEF; + } + elem = attr->parent; + if ( elem == NULL || elem->doc == NULL ) { + XSRETURN_UNDEF; + } + RETVAL = xmlIsID( elem->doc, elem, attr ); + OUTPUT: + RETVAL + +MODULE = XML::LibXML PACKAGE = XML::LibXML::Namespace + +SV* +new(CLASS, namespaceURI, namespacePrefix=&PL_sv_undef) + const char * CLASS + SV * namespaceURI + SV * namespacePrefix + PREINIT: + xmlNsPtr ns = NULL; + xmlChar* nsURI; + xmlChar* nsPrefix; + CODE: + RETVAL = &PL_sv_undef; + + nsURI = Sv2C(namespaceURI,NULL); + if ( !nsURI ) { + XSRETURN_UNDEF; + } + nsPrefix = Sv2C(namespacePrefix, NULL); + ns = xmlNewNs(NULL, nsURI, nsPrefix); + if ( ns ) { + RETVAL = NEWSV(0,0); + RETVAL = sv_setref_pv( RETVAL, + CLASS, + (void*)ns); + } + xmlFree(nsURI); + if ( nsPrefix ) + xmlFree(nsPrefix); + OUTPUT: + RETVAL + +void +DESTROY(self) + SV * self + PREINIT: + xmlNsPtr ns = INT2PTR(xmlNsPtr,SvIV(SvRV(self))); + CODE: + xs_warn( "DESTROY NS" ); + if (ns) { + xmlFreeNs(ns); + } + +int +nodeType(self) + SV * self + ALIAS: + getType = 1 + PREINIT: + xmlNsPtr ns = INT2PTR(xmlNsPtr,SvIV(SvRV(self))); + CODE: + PERL_UNUSED_VAR(ix); + RETVAL = ns->type; + OUTPUT: + RETVAL + +SV* +declaredURI(self) + SV * self + ALIAS: + value = 1 + nodeValue = 2 + getData = 3 + getValue = 4 + value2 = 5 + href = 6 + PREINIT: + xmlNsPtr ns = INT2PTR(xmlNsPtr,SvIV(SvRV(self))); + xmlChar * href; + CODE: + PERL_UNUSED_VAR(ix); + href = xmlStrdup(ns->href); + RETVAL = C2Sv(href, NULL); + xmlFree(href); + OUTPUT: + RETVAL + +SV* +declaredPrefix(self) + SV * self + ALIAS: + localname = 1 + getLocalName = 2 + PREINIT: + xmlNsPtr ns = INT2PTR(xmlNsPtr,SvIV(SvRV(self))); + xmlChar * prefix; + CODE: + PERL_UNUSED_VAR(ix); + prefix = xmlStrdup(ns->prefix); + RETVAL = C2Sv(prefix, NULL); + xmlFree(prefix); + OUTPUT: + RETVAL + +SV* +unique_key( self ) + SV * self + PREINIT: + xmlNsPtr ns = INT2PTR(xmlNsPtr,SvIV(SvRV(self))); + xmlChar* key; + CODE: + /* Concatenate prefix and URI with vertical bar dividing*/ + key = xmlStrdup(ns->prefix); + key = xmlStrcat(key, (const xmlChar*)"|"); + key = xmlStrcat(key, ns->href); + RETVAL = C2Sv(key, NULL); + OUTPUT: + RETVAL + +int +_isEqual(self, ref_node) + SV * self + SV * ref_node + PREINIT: + xmlNsPtr ns = INT2PTR(xmlNsPtr,SvIV(SvRV(self))); + xmlNsPtr ons = INT2PTR(xmlNsPtr,SvIV(SvRV(ref_node))); + CODE: + RETVAL = 0; + if ( ns == ons ) { + RETVAL = 1; + } + else if ( xmlStrEqual(ns->href, ons->href) + && xmlStrEqual(ns->prefix, ons->prefix) ) { + RETVAL = 1; + } + OUTPUT: + RETVAL + + +MODULE = XML::LibXML PACKAGE = XML::LibXML::Dtd + +SV * +new(CLASS, external, system) + char * external + char * system + ALIAS: + parse_uri = 1 + PREINIT: + xmlDtdPtr dtd = NULL; + PREINIT_SAVED_ERROR + CODE: + PERL_UNUSED_VAR(ix); + INIT_ERROR_HANDLER; + dtd = xmlParseDTD((const xmlChar*)external, (const xmlChar*)system); + if ( dtd == NULL ) { + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(0); + XSRETURN_UNDEF; + } else { + xmlSetTreeDoc((xmlNodePtr)dtd, NULL); + RETVAL = PmmNodeToSv( (xmlNodePtr) dtd, NULL ); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(0); + } + OUTPUT: + RETVAL + +SV* +systemId( self ) + xmlDtdPtr self + ALIAS: + getSystemId = 1 + CODE: + PERL_UNUSED_VAR(ix); + if ( self->SystemID == NULL ) { + XSRETURN_UNDEF; + } else { + RETVAL = C2Sv(self->SystemID,NULL); + } + OUTPUT: + RETVAL + +SV* +publicId( self ) + xmlDtdPtr self + ALIAS: + getPublicId = 1 + CODE: + PERL_UNUSED_VAR(ix); + if ( self->ExternalID == NULL ) { + XSRETURN_UNDEF; + } else { + RETVAL = C2Sv(self->ExternalID,NULL); + } + OUTPUT: + RETVAL + +SV * +parse_string(CLASS, str, ...) + char * str + PREINIT: + xmlDtdPtr res; + SV * encoding_sv; + xmlParserInputBufferPtr buffer; + xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; + xmlChar * new_string; + PREINIT_SAVED_ERROR + CODE: + INIT_ERROR_HANDLER; + if (items > 2) { + encoding_sv = ST(2); + if (items > 3) { + CLEANUP_ERROR_HANDLER; + croak("parse_string: too many parameters"); + } + /* warn("getting encoding...\n"); */ + enc = xmlParseCharEncoding(SvPV_nolen(encoding_sv)); + if (enc == XML_CHAR_ENCODING_ERROR) { + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(1); + croak("Parse of encoding %s failed", SvPV_nolen(encoding_sv)); + } + } + buffer = xmlAllocParserInputBuffer(enc); + /* buffer = xmlParserInputBufferCreateMem(str, xmlStrlen(str), enc); */ + if ( !buffer) { + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(1); + croak("cannot create buffer!\n" ); + } + new_string = xmlStrdup((const xmlChar*)str); + xmlParserInputBufferPush(buffer, xmlStrlen(new_string), (const char*)new_string); + + res = xmlIOParseDTD(NULL, buffer, enc); + + /* NOTE: xmlIOParseDTD is documented to free its InputBuffer */ + xmlFree(new_string); + if ( res && LibXML_will_die_ctx(saved_error, 0) ) + xmlFreeDtd( res ); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(0); + if (res == NULL) { + croak("no DTD parsed!"); + } + RETVAL = PmmNodeToSv((xmlNodePtr)res, NULL); + OUTPUT: + RETVAL + + +#ifdef HAVE_SCHEMAS + +MODULE = XML::LibXML PACKAGE = XML::LibXML::RelaxNG + +void +DESTROY( self ) + xmlRelaxNGPtr self + CODE: + xmlRelaxNGFree( self ); + + +xmlRelaxNGPtr +parse_location( self, url, parser_options = 0, recover = FALSE ) + char * url + int parser_options + bool recover + PREINIT: + const char * CLASS = "XML::LibXML::RelaxNG"; + xmlRelaxNGParserCtxtPtr rngctxt = NULL; + xmlExternalEntityLoader old_ext_ent_loader = NULL; + PREINIT_SAVED_ERROR + CODE: + INIT_ERROR_HANDLER; + + rngctxt = xmlRelaxNGNewParserCtxt( url ); + if ( rngctxt == NULL ) { + croak( "failed to initialize RelaxNG parser" ); + } +#ifndef WITH_SERRORS + /* Register Error callbacks */ + xmlRelaxNGSetParserErrors( rngctxt, + (xmlRelaxNGValidityErrorFunc)LibXML_error_handler_ctx, + (xmlRelaxNGValidityWarningFunc)LibXML_error_handler_ctx, + saved_error ); +#endif + + if ( EXTERNAL_ENTITY_LOADER_FUNC == NULL && (parser_options & XML_PARSE_NONET) ) { + old_ext_ent_loader = xmlGetExternalEntityLoader(); + xmlSetExternalEntityLoader( xmlNoNetExternalEntityLoader ); + } + + RETVAL = xmlRelaxNGParse( rngctxt ); + + if ( EXTERNAL_ENTITY_LOADER_FUNC == NULL && (parser_options & XML_PARSE_NONET) ) + xmlSetExternalEntityLoader( (xmlExternalEntityLoader)old_ext_ent_loader ); + + xmlRelaxNGFreeParserCtxt( rngctxt ); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR((RETVAL == NULL) ? 0 : recover); + OUTPUT: + RETVAL + + +xmlRelaxNGPtr +parse_buffer( self, perlstring, parser_options = 0, recover = FALSE ) + SV * perlstring + int parser_options + bool recover + PREINIT: + const char * CLASS = "XML::LibXML::RelaxNG"; + xmlRelaxNGParserCtxtPtr rngctxt = NULL; + xmlExternalEntityLoader old_ext_ent_loader = NULL; + char * string = NULL; + STRLEN len = 0; + PREINIT_SAVED_ERROR + INIT: + string = SvPV( perlstring, len ); + if ( string == NULL ) { + croak( "cannot parse empty string" ); + } + CODE: + INIT_ERROR_HANDLER; + + rngctxt = xmlRelaxNGNewMemParserCtxt( string,len ); + if ( rngctxt == NULL ) { + croak( "failed to initialize RelaxNG parser" ); + } +#ifndef WITH_SERRORS + /* Register Error callbacks */ + xmlRelaxNGSetParserErrors( rngctxt, + (xmlRelaxNGValidityErrorFunc)LibXML_error_handler_ctx, + (xmlRelaxNGValidityWarningFunc)LibXML_error_handler_ctx, + saved_error ); +#endif + + if ( EXTERNAL_ENTITY_LOADER_FUNC == NULL && (parser_options & XML_PARSE_NONET) ) { + old_ext_ent_loader = xmlGetExternalEntityLoader(); + xmlSetExternalEntityLoader( xmlNoNetExternalEntityLoader ); + } + + RETVAL = xmlRelaxNGParse( rngctxt ); + + if ( EXTERNAL_ENTITY_LOADER_FUNC == NULL && (parser_options & XML_PARSE_NONET) ) + xmlSetExternalEntityLoader( (xmlExternalEntityLoader)old_ext_ent_loader ); + + xmlRelaxNGFreeParserCtxt( rngctxt ); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR((RETVAL == NULL) ? 0 : recover); + OUTPUT: + RETVAL + + +xmlRelaxNGPtr +parse_document( self, doc, parser_options = 0, recover = FALSE ) + xmlDocPtr doc + int parser_options + bool recover + PREINIT: + const char * CLASS = "XML::LibXML::RelaxNG"; + xmlRelaxNGParserCtxtPtr rngctxt = NULL; + xmlExternalEntityLoader old_ext_ent_loader = NULL; + PREINIT_SAVED_ERROR + CODE: + INIT_ERROR_HANDLER; + + rngctxt = xmlRelaxNGNewDocParserCtxt( doc ); + if ( rngctxt == NULL ) { + croak( "failed to initialize RelaxNG parser" ); + } +#ifndef WITH_SERRORS + /* Register Error callbacks */ + xmlRelaxNGSetParserErrors( rngctxt, + (xmlRelaxNGValidityErrorFunc) LibXML_error_handler_ctx, + (xmlRelaxNGValidityWarningFunc)LibXML_error_handler_ctx, + saved_error ); +#endif + + if ( EXTERNAL_ENTITY_LOADER_FUNC == NULL && (parser_options & XML_PARSE_NONET) ) { + old_ext_ent_loader = xmlGetExternalEntityLoader(); + xmlSetExternalEntityLoader( xmlNoNetExternalEntityLoader ); + } + + RETVAL = xmlRelaxNGParse( rngctxt ); + + if ( EXTERNAL_ENTITY_LOADER_FUNC == NULL && (parser_options & XML_PARSE_NONET) ) + xmlSetExternalEntityLoader( (xmlExternalEntityLoader)old_ext_ent_loader ); + + xmlRelaxNGFreeParserCtxt( rngctxt ); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR((RETVAL == NULL) ? 0 : recover); + OUTPUT: + RETVAL + +int +validate( self, doc ) + xmlRelaxNGPtr self + xmlDocPtr doc + PREINIT: + xmlRelaxNGValidCtxtPtr vctxt = NULL; + PREINIT_SAVED_ERROR + CODE: + INIT_ERROR_HANDLER; + + if (doc) { + PmmClearPSVI(doc); + PmmInvalidatePSVI(doc); + } + vctxt = xmlRelaxNGNewValidCtxt( self ); + if ( vctxt == NULL ) { + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(0); + croak( "cannot initialize the validation context" ); + } +#ifndef WITH_SERRORS + /* Register Error callbacks */ + xmlRelaxNGSetValidErrors( vctxt, + (xmlRelaxNGValidityErrorFunc)LibXML_error_handler_ctx, + (xmlRelaxNGValidityWarningFunc)LibXML_error_handler_ctx, + saved_error ); +#endif /* WITH_SERRORS */ + /* ** test only ** + xmlRelaxNGSetValidErrors( vctxt, + (xmlRelaxNGValidityErrorFunc)fprintf, + (xmlRelaxNGValidityWarningFunc)fprintf, + stderr ); + */ + RETVAL = xmlRelaxNGValidateDoc( vctxt, doc ); + xmlRelaxNGFreeValidCtxt( vctxt ); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(0); + if ( RETVAL == 1 ) { + XSRETURN_UNDEF; + } + if ( RETVAL == -1 ) { + croak( "API Error" ); + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + + +MODULE = XML::LibXML PACKAGE = XML::LibXML::Schema + +void +DESTROY( self ) + xmlSchemaPtr self + CODE: + xmlSchemaFree( self ); + + +xmlSchemaPtr +parse_location( self, url, parser_options = 0, recover = FALSE ) + char * url + int parser_options + bool recover + PREINIT: + const char * CLASS = "XML::LibXML::Schema"; + xmlSchemaParserCtxtPtr rngctxt = NULL; + xmlExternalEntityLoader old_ext_ent_loader = NULL; + PREINIT_SAVED_ERROR + CODE: + INIT_ERROR_HANDLER; + + rngctxt = xmlSchemaNewParserCtxt( url ); + if ( rngctxt == NULL ) { + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(0); + croak( "failed to initialize Schema parser" ); + } + + /* Register Error callbacks */ + xmlSchemaSetParserErrors( rngctxt, + (xmlSchemaValidityErrorFunc)LibXML_error_handler_ctx, + (xmlSchemaValidityWarningFunc)LibXML_error_handler_ctx, + saved_error ); + + if ( EXTERNAL_ENTITY_LOADER_FUNC == NULL && (parser_options & XML_PARSE_NONET) ) { + old_ext_ent_loader = xmlGetExternalEntityLoader(); + xmlSetExternalEntityLoader( xmlNoNetExternalEntityLoader ); + } + + RETVAL = xmlSchemaParse( rngctxt ); + + if ( EXTERNAL_ENTITY_LOADER_FUNC == NULL && (parser_options & XML_PARSE_NONET) ) + xmlSetExternalEntityLoader( (xmlExternalEntityLoader)old_ext_ent_loader ); + + xmlSchemaFreeParserCtxt( rngctxt ); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR((RETVAL == NULL) ? 0 : recover); + OUTPUT: + RETVAL + + +xmlSchemaPtr +parse_buffer( self, perlstring, parser_options = 0, recover = FALSE ) + SV * perlstring + int parser_options + bool recover + PREINIT: + const char * CLASS = "XML::LibXML::Schema"; + xmlSchemaParserCtxtPtr rngctxt = NULL; + xmlExternalEntityLoader old_ext_ent_loader = NULL; + char * string = NULL; + STRLEN len = 0; + PREINIT_SAVED_ERROR + INIT: + string = SvPV( perlstring, len ); + if ( string == NULL ) { + croak( "cannot parse empty string" ); + } + CODE: + INIT_ERROR_HANDLER; + + rngctxt = xmlSchemaNewMemParserCtxt( string,len ); + if ( rngctxt == NULL ) { + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(0); + croak( "failed to initialize Schema parser" ); + } + + /* Register Error callbacks */ + xmlSchemaSetParserErrors( rngctxt, + (xmlSchemaValidityErrorFunc)LibXML_error_handler_ctx, + (xmlSchemaValidityWarningFunc)LibXML_error_handler_ctx, + saved_error ); + + if ( EXTERNAL_ENTITY_LOADER_FUNC == NULL && (parser_options & XML_PARSE_NONET) ) { + old_ext_ent_loader = xmlGetExternalEntityLoader(); + xmlSetExternalEntityLoader( xmlNoNetExternalEntityLoader ); + } + + RETVAL = xmlSchemaParse( rngctxt ); + + if ( EXTERNAL_ENTITY_LOADER_FUNC == NULL && (parser_options & XML_PARSE_NONET) ) + xmlSetExternalEntityLoader( (xmlExternalEntityLoader)old_ext_ent_loader ); + + xmlSchemaFreeParserCtxt( rngctxt ); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR((RETVAL == NULL) ? 0 : recover); + OUTPUT: + RETVAL + + +int +validate( self, node ) + xmlSchemaPtr self + xmlNodePtr node + PREINIT: + xmlSchemaValidCtxtPtr vctxt = NULL; + PREINIT_SAVED_ERROR + CODE: + INIT_ERROR_HANDLER; + + if (node->type == XML_DOCUMENT_NODE) { + PmmClearPSVI((xmlDocPtr)node); + PmmInvalidatePSVI((xmlDocPtr)node); + } + vctxt = xmlSchemaNewValidCtxt( self ); + if ( vctxt == NULL ) { + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(0); + croak( "cannot initialize the validation context" ); + } + + /* Register Error callbacks */ + xmlSchemaSetValidErrors( vctxt, + (xmlSchemaValidityErrorFunc)LibXML_error_handler_ctx, + (xmlSchemaValidityWarningFunc)LibXML_error_handler_ctx, + saved_error ); + + if (node->type == XML_DOCUMENT_NODE) { + RETVAL = xmlSchemaValidateDoc(vctxt, (xmlDocPtr)node); + } + else { + RETVAL = xmlSchemaValidateOneElement(vctxt, node); + } + + xmlSchemaFreeValidCtxt( vctxt ); + + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(0); + if ( RETVAL > 0 ) { + XSRETURN_UNDEF; + } + if ( RETVAL == -1 ) { + croak( "API Error" ); + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + +#endif /* HAVE_SCHEMAS */ + +MODULE = XML::LibXML::XPathContext PACKAGE = XML::LibXML::XPathContext + +# PROTOTYPES: DISABLE + +SV* +new( CLASS, ... ) + const char * CLASS + PREINIT: + SV * pnode = &PL_sv_undef; + INIT: + xmlXPathContextPtr ctxt; + CODE: + if( items > 1 ) + pnode = ST(1); + + ctxt = xmlXPathNewContext( NULL ); + ctxt->namespaces = NULL; + + New(0, ctxt->user, sizeof(XPathContextData), XPathContextData); + if (ctxt->user == NULL) { + croak("XPathContext: failed to allocate proxy object\n"); + } + + if (SvOK(pnode)) { + XPathContextDATA(ctxt)->node = newSVsv(pnode); + } else { + XPathContextDATA(ctxt)->node = &PL_sv_undef; + } + + XPathContextDATA(ctxt)->pool = NULL; + XPathContextDATA(ctxt)->varLookup = NULL; + XPathContextDATA(ctxt)->varData = NULL; + + xmlXPathRegisterFunc(ctxt, + (const xmlChar *) "document", + perlDocumentFunction); + + RETVAL = NEWSV(0,0), + RETVAL = sv_setref_pv( RETVAL, + CLASS, + (void*)ctxt ); + OUTPUT: + RETVAL + +void +DESTROY( self ) + SV * self + INIT: + xmlXPathContextPtr ctxt = INT2PTR(xmlXPathContextPtr,SvIV(SvRV(self))); + CODE: + xs_warn( "DESTROY XPATH CONTEXT" ); + if (ctxt) { + if (XPathContextDATA(ctxt) != NULL) { + if (XPathContextDATA(ctxt)->node != NULL && + SvOK(XPathContextDATA(ctxt)->node)) { + SvREFCNT_dec(XPathContextDATA(ctxt)->node); + } + if (XPathContextDATA(ctxt)->varLookup != NULL && + SvOK(XPathContextDATA(ctxt)->varLookup)) { + SvREFCNT_dec(XPathContextDATA(ctxt)->varLookup); + } + if (XPathContextDATA(ctxt)->varData != NULL && + SvOK(XPathContextDATA(ctxt)->varData)) { + SvREFCNT_dec(XPathContextDATA(ctxt)->varData); + } + if (XPathContextDATA(ctxt)->pool != NULL && + SvOK(XPathContextDATA(ctxt)->pool)) { + SvREFCNT_dec((SV *)XPathContextDATA(ctxt)->pool); + } + Safefree(XPathContextDATA(ctxt)); + } + + if (ctxt->namespaces != NULL) { + xmlFree( ctxt->namespaces ); + } + if (ctxt->funcLookupData != NULL && SvROK((SV*)ctxt->funcLookupData) + && SvTYPE(SvRV((SV *)ctxt->funcLookupData)) == SVt_PVHV) { + SvREFCNT_dec((SV *)ctxt->funcLookupData); + } + + xmlXPathFreeContext(ctxt); + } + +SV* +getContextNode( self ) + SV * self + INIT: + xmlXPathContextPtr ctxt = INT2PTR(xmlXPathContextPtr,SvIV(SvRV(self))); + if ( ctxt == NULL ) { + croak("XPathContext: missing xpath context\n"); + } + CODE: + if(XPathContextDATA(ctxt)->node != NULL) { + RETVAL = newSVsv(XPathContextDATA(ctxt)->node); + } else { + RETVAL = &PL_sv_undef; + } + OUTPUT: + RETVAL + +int +getContextPosition( self ) + SV * self + INIT: + xmlXPathContextPtr ctxt = INT2PTR(xmlXPathContextPtr,SvIV(SvRV(self))); + if ( ctxt == NULL ) { + croak("XPathContext: missing xpath context\n"); + } + CODE: + RETVAL = ctxt->proximityPosition; + OUTPUT: + RETVAL + +int +getContextSize( self ) + SV * self + INIT: + xmlXPathContextPtr ctxt = INT2PTR(xmlXPathContextPtr,SvIV(SvRV(self))); + if ( ctxt == NULL ) { + croak("XPathContext: missing xpath context\n"); + } + CODE: + RETVAL = ctxt->contextSize; + OUTPUT: + RETVAL + +void +setContextNode( self , pnode ) + SV * self + SV * pnode + INIT: + xmlXPathContextPtr ctxt = INT2PTR(xmlXPathContextPtr,SvIV(SvRV(self))); + if ( ctxt == NULL ) { + croak("XPathContext: missing xpath context\n"); + } + PPCODE: + if (XPathContextDATA(ctxt)->node != NULL) { + SvREFCNT_dec(XPathContextDATA(ctxt)->node); + } + if (SvOK(pnode)) { + XPathContextDATA(ctxt)->node = newSVsv(pnode); + } else { + XPathContextDATA(ctxt)->node = NULL; + } + +void +setContextPosition( self , position ) + SV * self + int position + INIT: + xmlXPathContextPtr ctxt = INT2PTR(xmlXPathContextPtr,SvIV(SvRV(self))); + if ( ctxt == NULL ) + croak("XPathContext: missing xpath context\n"); + if ( position < -1 || position > ctxt->contextSize ) + croak("XPathContext: invalid position\n"); + PPCODE: + ctxt->proximityPosition = position; + +void +setContextSize( self , size ) + SV * self + int size + INIT: + xmlXPathContextPtr ctxt = INT2PTR(xmlXPathContextPtr,SvIV(SvRV(self))); + if ( ctxt == NULL ) + croak("XPathContext: missing xpath context\n"); + if ( size < -1 ) + croak("XPathContext: invalid size\n"); + PPCODE: + ctxt->contextSize = size; + if ( size == 0 ) + ctxt->proximityPosition = 0; + else if ( size > 0 ) + ctxt->proximityPosition = 1; + else + ctxt->proximityPosition = -1; + +void +registerNs( pxpath_context, prefix, ns_uri ) + SV * pxpath_context + SV * prefix + SV * ns_uri + PREINIT: + xmlXPathContextPtr ctxt = NULL; + INIT: + ctxt = INT2PTR(xmlXPathContextPtr,SvIV(SvRV(pxpath_context))); + if ( ctxt == NULL ) { + croak("XPathContext: missing xpath context\n"); + } + LibXML_configure_xpathcontext(ctxt); + PPCODE: + if(SvOK(ns_uri)) { + if(xmlXPathRegisterNs(ctxt, (xmlChar *) SvPV_nolen(prefix), + (xmlChar *) SvPV_nolen(ns_uri)) == -1) { + croak("XPathContext: cannot register namespace\n"); + } + } else { + if(xmlXPathRegisterNs(ctxt, (xmlChar *) SvPV_nolen(prefix), NULL) == -1) { + croak("XPathContext: cannot unregister namespace\n"); + } + } + +SV* +lookupNs( pxpath_context, prefix ) + SV * pxpath_context + SV * prefix + PREINIT: + xmlXPathContextPtr ctxt = NULL; + INIT: + ctxt = INT2PTR(xmlXPathContextPtr,SvIV(SvRV(pxpath_context))); + if ( ctxt == NULL ) { + croak("XPathContext: missing xpath context\n"); + } + LibXML_configure_xpathcontext(ctxt); + CODE: + RETVAL = C2Sv(xmlXPathNsLookup(ctxt, (xmlChar *) SvPV_nolen(prefix)), NULL); + OUTPUT: + RETVAL + +SV* +getVarLookupData( self ) + SV * self + INIT: + xmlXPathContextPtr ctxt = INT2PTR(xmlXPathContextPtr,SvIV(SvRV(self))); + if ( ctxt == NULL ) { + croak("XPathContext: missing xpath context\n"); + } + CODE: + if(XPathContextDATA(ctxt)->varData != NULL) { + RETVAL = newSVsv(XPathContextDATA(ctxt)->varData); + } else { + RETVAL = &PL_sv_undef; + } + OUTPUT: + RETVAL + +SV* +getVarLookupFunc( self ) + SV * self + INIT: + xmlXPathContextPtr ctxt = INT2PTR(xmlXPathContextPtr,SvIV(SvRV(self))); + if ( ctxt == NULL ) { + croak("XPathContext: missing xpath context\n"); + } + CODE: + if(XPathContextDATA(ctxt)->varData != NULL) { + RETVAL = newSVsv(XPathContextDATA(ctxt)->varLookup); + } else { + RETVAL = &PL_sv_undef; + } + OUTPUT: + RETVAL + +void +registerVarLookupFunc( pxpath_context, lookup_func, lookup_data ) + SV * pxpath_context + SV * lookup_func + SV * lookup_data + PREINIT: + xmlXPathContextPtr ctxt = NULL; + XPathContextDataPtr data = NULL; + INIT: + ctxt = INT2PTR(xmlXPathContextPtr,SvIV(SvRV(pxpath_context))); + if ( ctxt == NULL ) + croak("XPathContext: missing xpath context\n"); + data = XPathContextDATA(ctxt); + if ( data == NULL ) + croak("XPathContext: missing xpath context private data\n"); + LibXML_configure_xpathcontext(ctxt); + /* free previous lookup function and data */ + if (data->varLookup && SvOK(data->varLookup)) + SvREFCNT_dec(data->varLookup); + if (data->varData && SvOK(data->varData)) + SvREFCNT_dec(data->varData); + data->varLookup=NULL; + data->varData=NULL; + PPCODE: + if (SvOK(lookup_func)) { + if ( SvROK(lookup_func) && SvTYPE(SvRV(lookup_func)) == SVt_PVCV ) { + data->varLookup = newSVsv(lookup_func); + if (SvOK(lookup_data)) + data->varData = newSVsv(lookup_data); + xmlXPathRegisterVariableLookup(ctxt, + LibXML_generic_variable_lookup, ctxt); + if (ctxt->varLookupData==NULL || ctxt->varLookupData != ctxt) { + croak( "XPathContext: registration failure\n" ); + } + } else { + croak("XPathContext: 1st argument is not a CODE reference\n"); + } + } else { + /* unregister */ + xmlXPathRegisterVariableLookup(ctxt, NULL, NULL); + } + +void +registerFunctionNS( pxpath_context, name, uri, func) + SV * pxpath_context + char * name + SV * uri + SV * func + PREINIT: + xmlXPathContextPtr ctxt = NULL; + SV * pfdr; + SV * key; + STRLEN len; + char *strkey; + + INIT: + ctxt = INT2PTR(xmlXPathContextPtr,SvIV(SvRV(pxpath_context))); + if ( ctxt == NULL ) { + croak("XPathContext: missing xpath context\n"); + } + LibXML_configure_xpathcontext(ctxt); + if ( !SvOK(func) || + (SvOK(func) && ((SvROK(func) && SvTYPE(SvRV(func)) == SVt_PVCV ) + || SvPOK(func)))) { + if (ctxt->funcLookupData == NULL) { + if (SvOK(func)) { + pfdr = newRV_noinc((SV*) newHV()); + ctxt->funcLookupData = pfdr; + } else { + /* looks like no perl function was never registered, */ + /* nothing to unregister */ + warn("XPathContext: nothing to unregister\n"); + return; + } + } else { + if (SvTYPE(SvRV((SV *)ctxt->funcLookupData)) == SVt_PVHV) { + /* good, it's a HV */ + pfdr = (SV *)ctxt->funcLookupData; + } else { + croak ("XPathContext: cannot register: funcLookupData structure occupied\n"); + } + } + key = newSVpvn("",0); + if (SvOK(uri)) { + sv_catpv(key, "{"); + sv_catsv(key, uri); + sv_catpv(key, "}"); + } + sv_catpv(key, (const char*)name); + strkey = SvPV(key, len); + /* warn("Trying to store function '%s' in %d\n", strkey, pfdr); */ + if (SvOK(func)) { + (void) hv_store((HV *)SvRV(pfdr),strkey, len, newSVsv(func), 0); + } else { + /* unregister */ + (void) hv_delete((HV *)SvRV(pfdr),strkey, len, G_DISCARD); + } + SvREFCNT_dec(key); + } else { + croak("XPathContext: 3rd argument is not a CODE reference or function name\n"); + } + PPCODE: + if (SvOK(uri)) { + xmlXPathRegisterFuncNS(ctxt, (xmlChar *) name, + (xmlChar *) SvPV(uri, len), + (SvOK(func) ? + LibXML_generic_extension_function : NULL)); + } else { + xmlXPathRegisterFunc(ctxt, (xmlChar *) name, + (SvOK(func) ? + LibXML_generic_extension_function : NULL)); + } + +void +_free_node_pool( pxpath_context ) + SV * pxpath_context + PREINIT: + xmlXPathContextPtr ctxt = NULL; + INIT: + ctxt = INT2PTR(xmlXPathContextPtr,SvIV(SvRV(pxpath_context))); + if ( ctxt == NULL ) { + croak("XPathContext: missing xpath context\n"); + } + PPCODE: + if (XPathContextDATA(ctxt)->pool != NULL) { + SvREFCNT_dec((SV *)XPathContextDATA(ctxt)->pool); + XPathContextDATA(ctxt)->pool = NULL; + } + +void +_findnodes( pxpath_context, perl_xpath ) + SV * pxpath_context + SV * perl_xpath + PREINIT: + xmlXPathContextPtr ctxt = NULL; + ProxyNodePtr owner = NULL; + xmlXPathObjectPtr found = NULL; + xmlNodeSetPtr nodelist = NULL; + SV * element = NULL ; + xmlChar * xpath = NULL; + xmlXPathCompExprPtr comp = NULL; + PREINIT_SAVED_ERROR + INIT: + ctxt = INT2PTR(xmlXPathContextPtr,SvIV(SvRV(pxpath_context))); + if ( ctxt == NULL ) { + croak("XPathContext: missing xpath context\n"); + } + LibXML_configure_xpathcontext(ctxt); + if ( ctxt->node == NULL ) { + croak("XPathContext: lost current node\n"); + } + if (sv_isobject(perl_xpath) && sv_isa(perl_xpath,"XML::LibXML::XPathExpression")) { + comp = INT2PTR(xmlXPathCompExprPtr,SvIV((SV*)SvRV( perl_xpath ))); + if (!comp) XSRETURN_UNDEF; + } else { + xpath = nodeSv2C(perl_xpath, ctxt->node); + if ( !(xpath && xmlStrlen(xpath)) ) { + if ( xpath ) + xmlFree(xpath); + croak("XPathContext: empty XPath found\n"); + XSRETURN_UNDEF; + } + } + PPCODE: + INIT_ERROR_HANDLER; + + PUTBACK ; + if (comp) { + found = domXPathCompFindCtxt( ctxt, comp, 0 ); + } else { + found = domXPathFindCtxt( ctxt, xpath, 0 ); + xmlFree(xpath); + } + SPAGAIN ; + + if (found != NULL) { + nodelist = found->nodesetval; + } else { + nodelist = NULL; + } + CLEANUP_ERROR_HANDLER; + if ( nodelist ) { + REPORT_ERROR(1); + if ( nodelist->nodeNr > 0 ) { + int i; + const char * cls = "XML::LibXML::Node"; + xmlNodePtr tnode; + int l = nodelist->nodeNr; + for( i = 0 ; i < l; i++){ + /* we have to create a new instance of an objectptr. + * and then place the current node into the new object. + * afterwards we can push the object to the array! + */ + element = NULL; + tnode = nodelist->nodeTab[i]; + if (tnode->type == XML_NAMESPACE_DECL) { + xmlNsPtr newns = xmlCopyNamespace((xmlNsPtr)tnode); + if ( newns != NULL ) { + element = NEWSV(0,0); + cls = PmmNodeTypeName( tnode ); + element = sv_setref_pv( element, + (const char *)cls, + newns + ); + } + else { + continue; + } + } + else { + if (tnode->doc) { + owner = PmmOWNERPO(PmmNewNode((xmlNodePtr) tnode->doc)); + } else { + /* we try to find a known node on the ancestor axis */ + xmlNodePtr n = tnode; + while (n && n->_private == NULL) n = n->parent; + if (n) owner = PmmOWNERPO(((ProxyNodePtr)n->_private)); + else owner = NULL; /* self contained node */ + } + element = PmmNodeToSv(tnode, owner); + } + XPUSHs( sv_2mortal(element) ); + } + } + /* prevent libxml2 from freeing the actual nodes */ + if (found->boolval) found->boolval=0; + xmlXPathFreeObject(found); + } + else { + xmlXPathFreeObject(found); + REPORT_ERROR(0); + } + +void +_find( pxpath_context, pxpath, to_bool ) + SV * pxpath_context + SV * pxpath + int to_bool + PREINIT: + xmlXPathContextPtr ctxt = NULL; + ProxyNodePtr owner = NULL; + xmlXPathObjectPtr found = NULL; + xmlNodeSetPtr nodelist = NULL; + xmlChar * xpath = NULL; + xmlXPathCompExprPtr comp = NULL; + PREINIT_SAVED_ERROR + INIT: + ctxt = INT2PTR(xmlXPathContextPtr,SvIV(SvRV(pxpath_context))); + if ( ctxt == NULL ) { + croak("XPathContext: missing xpath context\n"); + } + LibXML_configure_xpathcontext(ctxt); + if ( ctxt->node == NULL ) { + croak("XPathContext: lost current node\n"); + } + if (sv_isobject(pxpath) && sv_isa(pxpath,"XML::LibXML::XPathExpression")) { + comp = INT2PTR(xmlXPathCompExprPtr,SvIV((SV*)SvRV( pxpath ))); + if (!comp) XSRETURN_UNDEF; + } else { + xpath = nodeSv2C(pxpath, ctxt->node); + if ( !(xpath && xmlStrlen(xpath)) ) { + if ( xpath ) + xmlFree(xpath); + croak("XPathContext: empty XPath found\n"); + XSRETURN_UNDEF; + } + } + PPCODE: + INIT_ERROR_HANDLER; + PUTBACK ; + if (comp) { + found = domXPathCompFindCtxt( ctxt, comp, to_bool ); + } else { + found = domXPathFindCtxt( ctxt, xpath, to_bool ); + xmlFree(xpath); + } + SPAGAIN ; + CLEANUP_ERROR_HANDLER; + if (found) { + REPORT_ERROR(1); + switch (found->type) { + case XPATH_NODESET: + /* return as a NodeList */ + /* access ->nodesetval */ + XPUSHs(sv_2mortal(newSVpv("XML::LibXML::NodeList", 0))); + nodelist = found->nodesetval; + if ( nodelist ) { + if ( nodelist->nodeNr > 0 ) { + int i; + const char * cls = "XML::LibXML::Node"; + xmlNodePtr tnode; + SV * element; + int l = nodelist->nodeNr; + + for( i = 0 ; i < l; i++){ + /* we have to create a new instance of an + * objectptr. and then + * place the current node into the new + * object. afterwards we can + * push the object to the array! + */ + tnode = nodelist->nodeTab[i]; + + /* let's be paranoid */ + if (tnode->type == XML_NAMESPACE_DECL) { + xmlNsPtr newns = xmlCopyNamespace((xmlNsPtr)tnode); + if ( newns != NULL ) { + element = NEWSV(0,0); + cls = PmmNodeTypeName( tnode ); + element = sv_setref_pv( element, + (const char *)cls, + (void*)newns + ); + } + else { + continue; + } + } + else { + if (tnode->doc) { + owner = PmmOWNERPO(PmmNewNode((xmlNodePtr) tnode->doc)); + } else { + /* we try to find a known node on the ancestor axis */ + xmlNodePtr n = tnode; + while (n && n->_private == NULL) n = n->parent; + if (n) owner = PmmOWNERPO(((ProxyNodePtr)n->_private)); + else owner = NULL; /* self contained node */ + } + element = PmmNodeToSv(tnode, owner); + } + XPUSHs( sv_2mortal(element) ); + } + } + } + /* prevent libxml2 from freeing the actual nodes */ + if (found->boolval) found->boolval=0; + break; + case XPATH_BOOLEAN: + /* return as a Boolean */ + /* access ->boolval */ + XPUSHs(sv_2mortal(newSVpv("XML::LibXML::Boolean", 0))); + XPUSHs(sv_2mortal(newSViv(found->boolval))); + break; + case XPATH_NUMBER: + /* return as a Number */ + /* access ->floatval */ + XPUSHs(sv_2mortal(newSVpv("XML::LibXML::Number", 0))); + XPUSHs(sv_2mortal(newSVnv(found->floatval))); + break; + case XPATH_STRING: + /* access ->stringval */ + /* return as a Literal */ + XPUSHs(sv_2mortal(newSVpv("XML::LibXML::Literal", 0))); + XPUSHs(sv_2mortal(C2Sv(found->stringval, NULL))); + break; + default: + croak("Unknown XPath return type"); + } + xmlXPathFreeObject(found); + } + else { + REPORT_ERROR(0); + } + +MODULE = XML::LibXML PACKAGE = XML::LibXML::InputCallback + +void +lib_cleanup_callbacks( self ) + CODE: + xmlCleanupInputCallbacks(); + xmlRegisterDefaultInputCallbacks(); + +void +lib_init_callbacks( self ) + CODE: + xmlRegisterDefaultInputCallbacks(); /* important */ + xmlRegisterInputCallbacks((xmlInputMatchCallback) LibXML_input_match, + (xmlInputOpenCallback) LibXML_input_open, + (xmlInputReadCallback) LibXML_input_read, + (xmlInputCloseCallback) LibXML_input_close); + +#ifdef HAVE_READER_SUPPORT + +MODULE = XML::LibXML PACKAGE = XML::LibXML::Reader + +xmlTextReaderPtr +_newForFile(CLASS, filename, encoding, options) + const char* CLASS + const char* filename + const char * encoding = SvOK($arg) ? SvPV_nolen($arg) : NULL; + int options = SvOK($arg) ? SvIV($arg) : 0; + CODE: + RETVAL = xmlReaderForFile(filename, encoding, options); + INIT_READER_ERROR_HANDLER(RETVAL); + OUTPUT: + RETVAL + +xmlTextReaderPtr +_newForIO(CLASS, fh, url, encoding, options) + const char* CLASS + SV * fh + const char * url = SvOK($arg) ? SvPV_nolen($arg) : NULL; + const char * encoding = SvOK($arg) ? SvPV_nolen($arg) : NULL; + int options = SvOK($arg) ? SvIV($arg) : 0; + CODE: + (void)SvREFCNT_inc(fh); /* _dec'd by LibXML_close_perl */ + RETVAL = xmlReaderForIO((xmlInputReadCallback) LibXML_read_perl, + (xmlInputCloseCallback) LibXML_close_perl, + (void *) fh, url, encoding, options); + INIT_READER_ERROR_HANDLER(RETVAL) + OUTPUT: + RETVAL + +xmlTextReaderPtr +_newForString(CLASS, string, url, encoding, options) + const char* CLASS + SV * string + const char * url = SvOK($arg) ? SvPV_nolen($arg) : NULL; + const char * encoding = SvOK($arg) ? SvPV_nolen($arg) : NULL; + int options = SvOK($arg) ? SvIV($arg) : 0; + CODE: + if (encoding == NULL && SvUTF8( string )) { + encoding = "UTF-8"; + } + RETVAL = xmlReaderForDoc((xmlChar* )SvPV_nolen(string), url, encoding, options); + INIT_READER_ERROR_HANDLER(RETVAL) + OUTPUT: + RETVAL + +xmlTextReaderPtr +_newForFd(CLASS, fd, url, encoding, options) + const char* CLASS + int fd + const char * url = SvOK($arg) ? SvPV_nolen($arg) : NULL; + const char * encoding = SvOK($arg) ? SvPV_nolen($arg) : NULL; + int options = SvOK($arg) ? SvIV($arg) : 0; + CODE: + RETVAL = xmlReaderForFd(fd, url, encoding, options); + INIT_READER_ERROR_HANDLER(RETVAL) + OUTPUT: + RETVAL + +xmlTextReaderPtr +_newForDOM(CLASS, perl_doc) + const char* CLASS + SV * perl_doc + CODE: + PmmREFCNT_inc(SvPROXYNODE(perl_doc)); /* _dec in DESTROY */ + RETVAL = xmlReaderWalker((xmlDocPtr) PmmSvNode(perl_doc)); + OUTPUT: + RETVAL + +int +attributeCount(reader) + xmlTextReaderPtr reader + CODE: + RETVAL = xmlTextReaderAttributeCount(reader); + OUTPUT: + RETVAL + +SV * +baseURI(reader) + xmlTextReaderPtr reader + PREINIT: + const xmlChar *result = NULL; + CODE: + result = xmlTextReaderConstBaseUri(reader); + RETVAL = C2Sv(result, NULL); + OUTPUT: + RETVAL + +long +byteConsumed(reader) + xmlTextReaderPtr reader + CODE: + RETVAL = xmlTextReaderByteConsumed(reader); + OUTPUT: + RETVAL + +int +_close(reader) + xmlTextReaderPtr reader + CODE: + RETVAL = xmlTextReaderClose(reader); + OUTPUT: + RETVAL + +SV * +encoding(reader) + xmlTextReaderPtr reader + PREINIT: + const xmlChar *result = NULL; + CODE: + result = xmlTextReaderConstEncoding(reader); + RETVAL = C2Sv(result, NULL); + OUTPUT: + RETVAL + +SV * +localName(reader) + xmlTextReaderPtr reader + PREINIT: + const xmlChar *result = NULL; + CODE: + result = xmlTextReaderConstLocalName(reader); + RETVAL = C2Sv(result, NULL); + OUTPUT: + RETVAL + +SV * +name(reader) + xmlTextReaderPtr reader + PREINIT: + const xmlChar *result = NULL; + CODE: + result = xmlTextReaderConstName(reader); + RETVAL = C2Sv(result, NULL); + OUTPUT: + RETVAL + +SV * +namespaceURI(reader) + xmlTextReaderPtr reader + PREINIT: + const xmlChar *result = NULL; + CODE: + result = xmlTextReaderConstNamespaceUri(reader); + RETVAL = C2Sv(result, NULL); + OUTPUT: + RETVAL + +SV * +prefix(reader) + xmlTextReaderPtr reader + PREINIT: + const xmlChar *result = NULL; + CODE: + result = xmlTextReaderConstPrefix(reader); + RETVAL = C2Sv(result, NULL); + OUTPUT: + RETVAL + +SV * +value(reader) + xmlTextReaderPtr reader + PREINIT: + const xmlChar *result = NULL; + CODE: + result = xmlTextReaderConstValue(reader); + RETVAL = C2Sv(result, NULL); + OUTPUT: + RETVAL + +SV * +xmlLang(reader) + xmlTextReaderPtr reader + PREINIT: + const xmlChar *result = NULL; + CODE: + result = xmlTextReaderConstXmlLang(reader); + RETVAL = C2Sv(result, NULL); + OUTPUT: + RETVAL + + +SV * +xmlVersion(reader) + xmlTextReaderPtr reader + PREINIT: + const xmlChar *result = NULL; + CODE: + result = xmlTextReaderConstXmlVersion(reader); + RETVAL = C2Sv(result, NULL); + OUTPUT: + RETVAL + + +int +depth(reader) + xmlTextReaderPtr reader + CODE: + RETVAL = xmlTextReaderDepth(reader); + OUTPUT: + RETVAL + + +SV * +getAttribute(reader, name) + xmlTextReaderPtr reader + char * name + PREINIT: + xmlChar *result = NULL; + CODE: + result = xmlTextReaderGetAttribute(reader, (xmlChar*) name); + RETVAL = C2Sv(result, NULL); + xmlFree(result); + OUTPUT: + RETVAL + +SV * +getAttributeNo(reader, no) + xmlTextReaderPtr reader + int no + PREINIT: + xmlChar *result = NULL; + CODE: + result = xmlTextReaderGetAttributeNo(reader, no); + RETVAL = C2Sv(result, NULL); + xmlFree(result); + OUTPUT: + RETVAL + +SV * +getAttributeNs(reader, localName, namespaceURI) + xmlTextReaderPtr reader + char * localName + char * namespaceURI = SvOK($arg) ? SvPV_nolen($arg) : NULL; + PREINIT: + xmlChar *result = NULL; + CODE: + result = xmlTextReaderGetAttributeNs(reader, (xmlChar*) localName, + (xmlChar*) namespaceURI); + RETVAL = C2Sv(result, NULL); + xmlFree(result); + OUTPUT: + RETVAL + +int +columnNumber(reader) + xmlTextReaderPtr reader + CODE: + RETVAL = xmlTextReaderGetParserColumnNumber(reader); + OUTPUT: + RETVAL + +int +lineNumber(reader) + xmlTextReaderPtr reader + CODE: + RETVAL = xmlTextReaderGetParserLineNumber(reader); + OUTPUT: + RETVAL + +int +_getParserProp(reader, prop) + xmlTextReaderPtr reader + int prop + CODE: + RETVAL = xmlTextReaderGetParserProp(reader, prop); + OUTPUT: + RETVAL + +int +hasAttributes(reader) + xmlTextReaderPtr reader + CODE: + RETVAL = xmlTextReaderHasAttributes(reader); + OUTPUT: + RETVAL + +int +hasValue(reader) + xmlTextReaderPtr reader + CODE: + RETVAL = xmlTextReaderHasValue(reader); + OUTPUT: + RETVAL + +SV* +getAttributeHash(reader) + xmlTextReaderPtr reader + PREINIT: + HV* hv; + SV* sv; + const xmlChar* name; + PREINIT_SAVED_ERROR + CODE: + INIT_ERROR_HANDLER; + hv=newHV(); + if (xmlTextReaderHasAttributes(reader) && xmlTextReaderMoveToFirstAttribute(reader)==1) { + do { + name = xmlTextReaderConstName(reader); + sv=C2Sv((xmlTextReaderConstValue(reader)),NULL); + if (sv && hv_store(hv, (const char*) name, xmlStrlen(name), sv, 0)==NULL) { + SvREFCNT_dec(sv); /* free if not needed by hv_stores */ + } + } while (xmlTextReaderMoveToNextAttribute(reader)==1); + xmlTextReaderMoveToElement(reader); + } + RETVAL=newRV_noinc((SV*)hv); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(0); + OUTPUT: + RETVAL + +int +isDefault(reader) + xmlTextReaderPtr reader + CODE: + RETVAL = xmlTextReaderIsDefault(reader); + OUTPUT: + RETVAL + +int +isEmptyElement(reader) + xmlTextReaderPtr reader + CODE: + RETVAL = xmlTextReaderIsEmptyElement(reader); + OUTPUT: + RETVAL + +int +isNamespaceDecl(reader) + xmlTextReaderPtr reader + CODE: + RETVAL = xmlTextReaderIsNamespaceDecl(reader); + OUTPUT: + RETVAL + +int +isValid(reader) + xmlTextReaderPtr reader + CODE: + RETVAL = xmlTextReaderIsValid(reader); + OUTPUT: + RETVAL + +SV * +lookupNamespace(reader, prefix) + xmlTextReaderPtr reader + char * prefix = SvOK($arg) ? SvPV_nolen($arg) : NULL; + PREINIT: + xmlChar *result = NULL; + CODE: + result = xmlTextReaderLookupNamespace(reader, (xmlChar*) prefix); + RETVAL = C2Sv(result, NULL); + xmlFree(result); + OUTPUT: + RETVAL + + +int +moveToAttribute(reader, name) + xmlTextReaderPtr reader + char * name + CODE: + RETVAL = xmlTextReaderMoveToAttribute(reader, (xmlChar*) name); + OUTPUT: + RETVAL + +int +moveToAttributeNo(reader, no) + xmlTextReaderPtr reader + int no + CODE: + RETVAL = xmlTextReaderMoveToAttributeNo(reader, no); + OUTPUT: + RETVAL + +int +moveToAttributeNs(reader, localName, namespaceURI) + xmlTextReaderPtr reader + char * localName + char * namespaceURI = SvOK($arg) ? SvPV_nolen($arg) : NULL; + CODE: + RETVAL = xmlTextReaderMoveToAttributeNs(reader, + (xmlChar*) localName, (xmlChar*) namespaceURI); + OUTPUT: + RETVAL + +int +moveToElement(reader) + xmlTextReaderPtr reader + CODE: + RETVAL = xmlTextReaderMoveToElement(reader); + OUTPUT: + RETVAL + +int +moveToFirstAttribute(reader) + xmlTextReaderPtr reader + CODE: + RETVAL = xmlTextReaderMoveToFirstAttribute(reader); + OUTPUT: + RETVAL + +int +moveToNextAttribute(reader) + xmlTextReaderPtr reader + CODE: + RETVAL = xmlTextReaderMoveToNextAttribute(reader); + OUTPUT: + RETVAL + +int +next(reader) + xmlTextReaderPtr reader + PREINIT: + PREINIT_SAVED_ERROR + CODE: + INIT_ERROR_HANDLER; + RETVAL = xmlTextReaderNext(reader); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(0); + OUTPUT: + RETVAL + +#define LIBXML_READER_NEXT_SIBLING(ret,reader) \ + ret = xmlTextReaderNextSibling(reader); \ + if (ret == -1) \ + { \ + int depth; \ + depth = xmlTextReaderDepth(reader); \ + ret = xmlTextReaderRead(reader); \ + while (ret == 1 && xmlTextReaderDepth(reader) > depth) { \ + ret = xmlTextReaderNext(reader); \ + } \ + if (ret == 1) { \ + if (xmlTextReaderDepth(reader) != depth) { \ + ret = 0; \ + } else if (xmlTextReaderNodeType(reader) == XML_READER_TYPE_END_ELEMENT) { \ + ret = xmlTextReaderRead(reader); \ + } \ + } \ + } + +int +nextSibling(reader) + xmlTextReaderPtr reader + PREINIT: + PREINIT_SAVED_ERROR + CODE: + INIT_ERROR_HANDLER; + LIBXML_READER_NEXT_SIBLING(RETVAL,reader) + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(0); + OUTPUT: + RETVAL + +int +nextSiblingElement(reader, name = NULL, nsURI = NULL) + xmlTextReaderPtr reader + const char * name + const char * nsURI + PREINIT: + PREINIT_SAVED_ERROR + CODE: + INIT_ERROR_HANDLER; + do { + LIBXML_READER_NEXT_SIBLING(RETVAL,reader) + if (LIBXML_READER_TEST_ELEMENT(reader,name,nsURI)) { + break; + } + } while (RETVAL == 1); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(0); + OUTPUT: + RETVAL + +int +nextElement(reader, name = NULL, nsURI = NULL) + xmlTextReaderPtr reader + const char * name + const char * nsURI + PREINIT: + PREINIT_SAVED_ERROR + CODE: + INIT_ERROR_HANDLER; + do { + RETVAL = xmlTextReaderRead(reader); + if (LIBXML_READER_TEST_ELEMENT(reader,name,nsURI)) { + break; + } + } while (RETVAL == 1); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(0); + OUTPUT: + RETVAL + +int +nextPatternMatch(reader, compiled) + xmlTextReaderPtr reader + xmlPatternPtr compiled + PREINIT: + PREINIT_SAVED_ERROR + xmlNodePtr node = NULL; + CODE: + if ( compiled == NULL ) + croak("Usage: $reader->nextPatternMatch( a-XML::LibXML::Pattern-object )"); + do { + RETVAL = xmlTextReaderRead(reader); + node = xmlTextReaderCurrentNode(reader); + if (node && xmlPatternMatch(compiled, node)) { + break; + } + } while (RETVAL == 1); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(0); + OUTPUT: + RETVAL + +int +skipSiblings(reader) + xmlTextReaderPtr reader + PREINIT: + int depth; + PREINIT: + PREINIT_SAVED_ERROR + CODE: + INIT_ERROR_HANDLER; + depth = xmlTextReaderDepth(reader); + RETVAL = -1; + if (depth > 0) { + do { + RETVAL = xmlTextReaderNext(reader); + } while (RETVAL == 1 && xmlTextReaderDepth(reader) >= depth); + if (xmlTextReaderNodeType(reader) != XML_READER_TYPE_END_ELEMENT) { + RETVAL = -1; + } + } + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(0); + OUTPUT: + RETVAL + +int +nodeType(reader) + xmlTextReaderPtr reader + CODE: + RETVAL = xmlTextReaderNodeType(reader); + OUTPUT: + RETVAL + +SV* +quoteChar(reader) + xmlTextReaderPtr reader + PREINIT: + int ret; + CODE: + ret = xmlTextReaderQuoteChar(reader); + if (ret == -1) XSRETURN_UNDEF; + RETVAL = newSVpvf("%c",ret); + OUTPUT: + RETVAL + +int +read(reader) + xmlTextReaderPtr reader + PREINIT: + PREINIT_SAVED_ERROR + CODE: + INIT_ERROR_HANDLER; + RETVAL = xmlTextReaderRead(reader); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(0); + OUTPUT: + RETVAL + +int +readAttributeValue(reader) + xmlTextReaderPtr reader + PREINIT: + PREINIT_SAVED_ERROR + CODE: + INIT_ERROR_HANDLER; + RETVAL = xmlTextReaderReadAttributeValue(reader); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(0); + OUTPUT: + RETVAL + + +SV * +readInnerXml(reader) + xmlTextReaderPtr reader + PREINIT: + xmlChar *result = NULL; + PREINIT: + PREINIT_SAVED_ERROR + CODE: + INIT_ERROR_HANDLER; + result = xmlTextReaderReadInnerXml(reader); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(0); + if (!result) XSRETURN_UNDEF; + RETVAL = C2Sv(result, NULL); + xmlFree(result); + OUTPUT: + RETVAL + +SV * +readOuterXml(reader) + xmlTextReaderPtr reader + PREINIT: + xmlChar *result = NULL; + PREINIT: + PREINIT_SAVED_ERROR + CODE: + INIT_ERROR_HANDLER; + result = xmlTextReaderReadOuterXml(reader); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(0); + if (result) { + RETVAL = C2Sv(result, NULL); + xmlFree(result); + } else { + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + +int +readState(reader) + xmlTextReaderPtr reader + CODE: + RETVAL = xmlTextReaderReadState(reader); + OUTPUT: + RETVAL + +int +_setParserProp(reader, prop, value) + xmlTextReaderPtr reader + int prop + int value + CODE: + RETVAL = xmlTextReaderSetParserProp(reader, prop, value); + OUTPUT: + RETVAL + +int +standalone(reader) + xmlTextReaderPtr reader + CODE: + RETVAL = xmlTextReaderStandalone(reader); + OUTPUT: + RETVAL + +SV * +_nodePath(reader) + xmlTextReaderPtr reader + PREINIT: + xmlNodePtr node = NULL; + xmlChar * path = NULL; + CODE: + node = xmlTextReaderCurrentNode(reader); + if ( node ==NULL ) { + XSRETURN_UNDEF; + } + path = xmlGetNodePath( node ); + if ( path == NULL ) { + XSRETURN_UNDEF; + } + RETVAL = C2Sv(path,NULL); + xmlFree(path); + OUTPUT: + RETVAL + +#ifdef LIBXML_PATTERN_ENABLED + +int +matchesPattern(reader, compiled) + xmlTextReaderPtr reader + xmlPatternPtr compiled + PREINIT: + xmlNodePtr node = NULL; + CODE: + if ( compiled == NULL ) + XSRETURN_UNDEF; + node = xmlTextReaderCurrentNode(reader); + if ( node ==NULL ) { + XSRETURN_UNDEF; + } + RETVAL = xmlPatternMatch(compiled, node); + OUTPUT: + RETVAL + +#endif /* LIBXML_PATTERN_ENABLED */ + +SV * +copyCurrentNode(reader,expand = 0) + xmlTextReaderPtr reader + int expand + PREINIT: + xmlNodePtr node = NULL; + xmlNodePtr copy; + xmlDocPtr doc = NULL; + ProxyNodePtr proxy; + PREINIT: + PREINIT_SAVED_ERROR + CODE: + INIT_ERROR_HANDLER; + if (expand) { + node = xmlTextReaderExpand(reader); + } + else { + node = xmlTextReaderCurrentNode(reader); + } + if (node) { + doc = xmlTextReaderCurrentDoc(reader); + } + if (!doc) { + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(0); + XSRETURN_UNDEF; + } + if (xmlTextReaderGetParserProp(reader,XML_PARSER_VALIDATE)) + PmmInvalidatePSVI(doc); /* the document may have psvi info */ + + copy = PmmCloneNode( node, expand ); + if ( copy == NULL ) { + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(0); + XSRETURN_UNDEF; + } + if ( copy->type == XML_DTD_NODE ) { + RETVAL = PmmNodeToSv(copy, NULL); + } + else { + ProxyNodePtr docfrag = NULL; + + if ( doc != NULL ) { + xmlSetTreeDoc(copy, doc); + } + proxy = PmmNewNode((xmlNodePtr)doc); + if (PmmREFCNT(proxy) == 0) { + PmmREFCNT_inc(proxy); + } + LibXML_set_reader_preserve_flag(reader); + + docfrag = PmmNewFragment( doc ); + xmlAddChild( PmmNODE(docfrag), copy ); + RETVAL = PmmNodeToSv(copy, docfrag); + } + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(0); + OUTPUT: + RETVAL + +SV * +document(reader) + xmlTextReaderPtr reader + PREINIT: + xmlDocPtr doc = NULL; + CODE: + doc = xmlTextReaderCurrentDoc(reader); + if (!doc) XSRETURN_UNDEF; + RETVAL = PmmNodeToSv((xmlNodePtr)doc, NULL); + /* FIXME: taint the document with PmmInvalidatePSVI if the reader did validation */ + if ( PmmREFCNT(SvPROXYNODE(RETVAL))==1 ) { + /* will be decremented in Reader destructor */ + PmmREFCNT_inc(SvPROXYNODE(RETVAL)); + } + if (xmlTextReaderGetParserProp(reader,XML_PARSER_VALIDATE)) + PmmInvalidatePSVI(doc); /* the document may have psvi info */ + + LibXML_set_reader_preserve_flag(reader); + + OUTPUT: + RETVAL + +int +_preservePattern(reader,pattern,ns_map=NULL) + xmlTextReaderPtr reader + char * pattern + AV * ns_map + PREINIT: + xmlChar** namespaces = NULL; + SV** aux; + int last,i; + CODE: + if (ns_map) { + last = av_len(ns_map); + New(0,namespaces, last+2, xmlChar*); + for( i = 0; i <= last ; i++ ) { + aux = av_fetch(ns_map,i,0); + namespaces[i]=(xmlChar*) SvPV_nolen(*aux); + } + namespaces[i]=0; + } + RETVAL = xmlTextReaderPreservePattern(reader,(const xmlChar*) pattern, + (const xmlChar**)namespaces); + Safefree(namespaces); + OUTPUT: + RETVAL + +SV * +preserveNode(reader) + xmlTextReaderPtr reader + PREINIT: + xmlNodePtr node; + xmlDocPtr doc; + ProxyNodePtr proxy; + PREINIT_SAVED_ERROR + CODE: + INIT_ERROR_HANDLER; + doc = xmlTextReaderCurrentDoc(reader); + if (!doc) { + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(0); + XSRETURN_UNDEF; + } + proxy = PmmNewNode((xmlNodePtr)doc); + if ( PmmREFCNT(proxy) == 0 ) { + /* new proxy node */ + PmmREFCNT_inc(proxy); + } + LibXML_set_reader_preserve_flag(reader); + + node = xmlTextReaderPreserve(reader); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(0); + if (node) { + RETVAL = PmmNodeToSv(node, proxy); + } else { + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + +int +finish(reader) + xmlTextReaderPtr reader + PREINIT: + PREINIT_SAVED_ERROR + CODE: + INIT_ERROR_HANDLER; + while (1) { + RETVAL = xmlTextReaderRead(reader); + if (RETVAL!=1) break; + } + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(0); + RETVAL++; /* we want 0 - fail, 1- success */ + OUTPUT: + RETVAL + +#ifdef HAVE_SCHEMAS + +int +_setRelaxNGFile(reader,rng) + xmlTextReaderPtr reader + char* rng + CODE: + RETVAL = xmlTextReaderRelaxNGValidate(reader,rng); + OUTPUT: + RETVAL + +int +_setRelaxNG(reader,rng_doc) + xmlTextReaderPtr reader + xmlRelaxNGPtr rng_doc + CODE: + RETVAL = xmlTextReaderRelaxNGSetSchema(reader,rng_doc); + OUTPUT: + RETVAL + +int +_setXSDFile(reader,xsd) + xmlTextReaderPtr reader + char* xsd + CODE: + RETVAL = xmlTextReaderSchemaValidate(reader,xsd); + OUTPUT: + RETVAL + +int +_setXSD(reader,xsd_doc) + xmlTextReaderPtr reader + xmlSchemaPtr xsd_doc + CODE: + RETVAL = xmlTextReaderSetSchema(reader,xsd_doc); + OUTPUT: + RETVAL + +#endif /* HAVE_SCHEMAS */ + +void +_DESTROY(reader) + xmlTextReaderPtr reader + PREINIT: + xmlDocPtr doc; + ProxyNodePtr proxy; + /* SV * error_sv = NULL; + xmlTextReaderErrorFunc f = NULL; */ + CODE: + + if ( LibXML_get_reader_preserve_flag(reader) ) { + doc = xmlTextReaderCurrentDoc(reader); + if (doc) { + proxy = PmmNewNode((xmlNodePtr)doc); + if ( PmmREFCNT(proxy) == 0 ) { + PmmREFCNT_inc(proxy); + } + PmmREFCNT_dec(proxy); + } + } + if (xmlTextReaderReadState(reader) != XML_TEXTREADER_MODE_CLOSED) { + xmlTextReaderClose(reader); + } + /* xmlTextReaderGetErrorHandler(reader, &f, (void **) &error_sv); + if (error_sv) { + sv_2mortal(error_sv); + } */ + xmlFreeTextReader(reader); + +#endif /* HAVE_READER_SUPPORT */ + +#ifdef WITH_SERRORS + +MODULE = XML::LibXML PACKAGE = XML::LibXML::LibError + +int +domain( self ) + xmlErrorPtr self + CODE: + RETVAL = self->domain; + OUTPUT: + RETVAL + +int +code( self ) + xmlErrorPtr self + CODE: + RETVAL = self->code; + OUTPUT: + RETVAL + +int +line( self ) + xmlErrorPtr self + CODE: + RETVAL = self->line; + OUTPUT: + RETVAL + +int +num1( self ) + xmlErrorPtr self + ALIAS: + int1 = 1 + CODE: + PERL_UNUSED_VAR(ix); + RETVAL = self->int1; + OUTPUT: + RETVAL + +int +num2( self ) + xmlErrorPtr self + ALIAS: + int2 = 1 + CODE: + PERL_UNUSED_VAR(ix); + RETVAL = self->int2; + OUTPUT: + RETVAL + +int +level( self ) + xmlErrorPtr self + CODE: + RETVAL = (int)self->level; + OUTPUT: + RETVAL + +char * +message( self ) + xmlErrorPtr self + CODE: + RETVAL = self->message; + OUTPUT: + RETVAL + +char * +file( self ) + xmlErrorPtr self + CODE: + RETVAL = (char*)self->file; + OUTPUT: + RETVAL + +char * +str1( self ) + xmlErrorPtr self + CODE: + RETVAL = (char*)self->str1; + OUTPUT: + RETVAL + +char * +str2( self ) + xmlErrorPtr self + CODE: + RETVAL = (char*)self->str2; + OUTPUT: + RETVAL + +char * +str3( self ) + xmlErrorPtr self + CODE: + RETVAL = (char*)self->str3; + OUTPUT: + RETVAL + +void +context_and_column( self ) + xmlErrorPtr self + PREINIT: + xmlParserInputPtr input; + const xmlChar *cur, *base, *col_cur; + unsigned int n, col; /* GCC warns if signed, because compared with sizeof() */ + xmlChar content[81]; /* space for 80 chars + line terminator */ + xmlChar *ctnt; + int domain; + xmlParserCtxtPtr ctxt = NULL; + PPCODE: + domain = self->domain; + if ((domain == XML_FROM_PARSER) || (domain == XML_FROM_HTML) || + (domain == XML_FROM_DTD) || (domain == XML_FROM_NAMESPACE) || + (domain == XML_FROM_IO) || (domain == XML_FROM_VALID)) { + ctxt = (xmlParserCtxtPtr) self->ctxt; + } + if (ctxt == NULL) XSRETURN_EMPTY; + input = ctxt->input; + if ((input != NULL) && (input->filename == NULL) && + (ctxt->inputNr > 1)) { + input = ctxt->inputTab[ctxt->inputNr - 2]; + } + if (input == NULL) XSRETURN_EMPTY; + cur = input->cur; + base = input->base; + /* skip backwards over any end-of-lines */ + while ((cur > base) && ((*(cur) == '\n') || (*(cur) == '\r'))) { + cur--; + } + n = 0; + /* search backwards for beginning-of-line (to max buff size) */ + while ((n++ < (sizeof(content)-1)) && (cur > base) && + (*(cur) != '\n') && (*(cur) != '\r')) + cur--; + /* search backwards for beginning-of-line for calculating the + * column. */ + col_cur = cur; + while ((col_cur > base) && (*(col_cur) != '\n') && (*(col_cur) != '\r')) + col_cur--; + if ((*(cur) == '\n') || (*(cur) == '\r')) cur++; + if ((*(col_cur) == '\n') || (*(col_cur) == '\r')) col_cur++; + /* calculate the error position in terms of the current position */ + col = input->cur - col_cur; + /* search forward for end-of-line (to max buff size) */ + n = 0; + ctnt = content; + /* copy selected text to our buffer */ + while ((*cur != 0) && (*(cur) != '\n') && + (*(cur) != '\r') && (n < sizeof(content)-1)) { + *ctnt++ = *cur++; + n++; + } + *ctnt = 0; + EXTEND(SP,2); + PUSHs(sv_2mortal(C2Sv(content, NULL))); + PUSHs(sv_2mortal(newSViv(col))); + +#endif /* WITH_SERRORS */ + + +#ifdef LIBXML_PATTERN_ENABLED + +MODULE = XML::LibXML PACKAGE = XML::LibXML::Pattern + +xmlPatternPtr +_compilePattern(CLASS, ppattern, pattern_type, ns_map=NULL) + SV * ppattern + AV * ns_map + int pattern_type + PREINIT: + xmlChar * pattern = Sv2C(ppattern, NULL); + xmlChar** namespaces = NULL; + SV** aux; + int last,i; + PREINIT_SAVED_ERROR + CODE: + if ( pattern == NULL ) + XSRETURN_UNDEF; + if (ns_map) { + last = av_len(ns_map); + New(0,namespaces, last+2, xmlChar*); + for( i = 0; i <= last ; i++ ) { + aux = av_fetch(ns_map,i,0); + namespaces[i]=(xmlChar*) SvPV_nolen(*aux); + } + namespaces[i]=0; + } + INIT_ERROR_HANDLER; + RETVAL = xmlPatterncompile(pattern, NULL, pattern_type, (const xmlChar **) namespaces); + Safefree(namespaces); + xmlFree( pattern ); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(0); + if ( RETVAL == NULL ) { + croak("Compilation of pattern failed"); + } + OUTPUT: + RETVAL + +int +matchesNode(self, node) + xmlPatternPtr self + xmlNodePtr node + CODE: + if ( node ==NULL ) { + XSRETURN_UNDEF; + } + RETVAL = xmlPatternMatch(self, node); + OUTPUT: + RETVAL + +void +DESTROY( self ) + xmlPatternPtr self + CODE: + xs_warn( "DESTROY PATTERN OBJECT" ); + xmlFreePattern(self); + +#endif /* LIBXML_PATTERN_ENABLED */ + +#ifdef LIBXML_REGEXP_ENABLED + +MODULE = XML::LibXML PACKAGE = XML::LibXML::RegExp + +xmlRegexpPtr +_compile(CLASS, pregexp) + SV * pregexp + PREINIT: + xmlChar * regexp = Sv2C(pregexp, NULL); + PREINIT_SAVED_ERROR + CODE: + if ( regexp == NULL ) + XSRETURN_UNDEF; + INIT_ERROR_HANDLER; + RETVAL = xmlRegexpCompile(regexp); + xmlFree( regexp ); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(0); + if ( RETVAL == NULL ) { + croak("Compilation of regexp failed"); + } + OUTPUT: + RETVAL + +int +matches(self, pvalue) + xmlRegexpPtr self + SV* pvalue + PREINIT: + xmlChar * value = Sv2C(pvalue, NULL); + CODE: + if ( value == NULL ) + XSRETURN_UNDEF; + RETVAL = xmlRegexpExec(self,value); + xmlFree( value ); + OUTPUT: + RETVAL + +int +isDeterministic(self) + xmlRegexpPtr self + CODE: + RETVAL = xmlRegexpIsDeterminist(self); + OUTPUT: + RETVAL + +void +DESTROY( self ) + xmlRegexpPtr self + CODE: + xs_warn( "DESTROY REGEXP OBJECT" ); + xmlRegFreeRegexp(self); + +#endif /* LIBXML_REGEXP_ENABLED */ + + +MODULE = XML::LibXML PACKAGE = XML::LibXML::XPathExpression + +xmlXPathCompExprPtr +new(CLASS, pxpath) + SV * pxpath + PREINIT: + xmlChar * xpath = Sv2C(pxpath, NULL); + PREINIT_SAVED_ERROR + CODE: + if ( pxpath == NULL ) + XSRETURN_UNDEF; + INIT_ERROR_HANDLER; + RETVAL = xmlXPathCompile( xpath ); + xmlFree( xpath ); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(0); + if ( RETVAL == NULL ) { + croak("Compilation of XPath expression failed!"); + } + OUTPUT: + RETVAL + +void +DESTROY( self ) + xmlXPathCompExprPtr self + CODE: + xs_warn( "DESTROY COMPILED XPATH OBJECT" ); + xmlXPathFreeCompExpr(self); + +MODULE = XML::LibXML PACKAGE = XML::LibXML::Common + +PROTOTYPES: DISABLE + +SV* +encodeToUTF8( encoding, string ) + const char * encoding + SV * string + PREINIT: + xmlChar * realstring = NULL; + xmlChar * tstr = NULL; + xmlCharEncoding enc = 0; + STRLEN len = 0; + xmlBufferPtr in = NULL, out = NULL; + xmlCharEncodingHandlerPtr coder = NULL; + PREINIT_SAVED_ERROR + CODE: + if (!SvOK(string)) { + XSRETURN_UNDEF; + } else if (!SvCUR(string)) { + XSRETURN_PV(""); + } + realstring = (xmlChar*) SvPV(string, len); + if ( realstring != NULL ) { + /* warn("encode %s", realstring ); */ +#ifdef HAVE_UTF8 + if ( !DO_UTF8(string) && encoding != NULL ) { +#else + if ( encoding != NULL ) { +#endif + enc = xmlParseCharEncoding( encoding ); + + if ( enc == 0 ) { + /* this happens if the encoding is "" or NULL */ + enc = XML_CHAR_ENCODING_UTF8; + } + + if ( enc == XML_CHAR_ENCODING_UTF8 ) { + /* copy the string */ + /* warn( "simply copy the string" ); */ + tstr = xmlStrndup( realstring, len ); + } + else { + INIT_ERROR_HANDLER; + if ( enc > 1 ) { + coder= xmlGetCharEncodingHandler( enc ); + } + else if ( enc == XML_CHAR_ENCODING_ERROR ){ + coder =xmlFindCharEncodingHandler( encoding ); + } + else { + croak("no encoder found\n"); + } + if ( coder == NULL ) { + croak( "cannot encode string" ); + } + in = xmlBufferCreateStatic((void*)realstring, len ); + out = xmlBufferCreate(); + if ( xmlCharEncInFunc( coder, out, in ) >= 0 ) { + tstr = xmlStrdup( out->content ); + } + + xmlBufferFree( in ); + xmlBufferFree( out ); + xmlCharEncCloseFunc( coder ); + + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(0); + } + } + else { + tstr = xmlStrndup( realstring, len ); + } + + if ( !tstr ) { + croak( "return value missing!" ); + } + + len = xmlStrlen( tstr ); + RETVAL = newSVpvn( (const char *)tstr, len ); +#ifdef HAVE_UTF8 + SvUTF8_on(RETVAL); +#endif + xmlFree(tstr); + } + else { + XSRETURN_UNDEF; + } + OUTPUT: + RETVAL + +SV* +decodeFromUTF8( encoding, string ) + const char * encoding + SV* string + PREINIT: + xmlChar * tstr = NULL; + xmlChar * realstring = NULL; + xmlCharEncoding enc = 0; + STRLEN len = 0; + xmlBufferPtr in = NULL, out = NULL; + xmlCharEncodingHandlerPtr coder = NULL; + PREINIT_SAVED_ERROR + CODE: +#ifdef HAVE_UTF8 + if ( !SvOK(string) ) { + XSRETURN_UNDEF; + } else if (!SvCUR(string)) { + XSRETURN_PV(""); + } else if ( !SvUTF8(string) ) { + croak("string is not utf8!!"); + } else { +#endif + realstring = (xmlChar*) SvPV(string, len); + if ( realstring != NULL ) { + /* warn("decode %s", realstring ); */ + enc = xmlParseCharEncoding( encoding ); + if ( enc == 0 ) { + /* this happens if the encoding is "" or NULL */ + enc = XML_CHAR_ENCODING_UTF8; + } + + if ( enc == XML_CHAR_ENCODING_UTF8 ) { + /* copy the string */ + /* warn( "simply copy the string" ); */ + tstr = xmlStrdup( realstring ); + len = xmlStrlen( tstr ); + } + else { + INIT_ERROR_HANDLER; + if ( enc > 1 ) { + coder= xmlGetCharEncodingHandler( enc ); + } + else if ( enc == XML_CHAR_ENCODING_ERROR ){ + coder = xmlFindCharEncodingHandler( encoding ); + } + else { + croak("no encoder found\n"); + } + + if ( coder == NULL ) { + croak( "cannot encode string" ); + } + + in = xmlBufferCreate(); + out = xmlBufferCreate(); + xmlBufferCCat( in, (char*) realstring ); + if ( xmlCharEncOutFunc( coder, out, in ) >= 0 ) { + len = xmlBufferLength( out ); + tstr = xmlCharStrndup( (char*) xmlBufferContent( out ), len ); + } + + xmlBufferFree( in ); + xmlBufferFree( out ); + xmlCharEncCloseFunc( coder ); + CLEANUP_ERROR_HANDLER; + REPORT_ERROR(0); + if ( !tstr ) { + croak( "return value missing!" ); + } + } + + RETVAL = newSVpvn( (const char *)tstr, len ); + xmlFree( tstr ); +#ifdef HAVE_UTF8 + if ( enc == XML_CHAR_ENCODING_UTF8 ) { + SvUTF8_on(RETVAL); + } +#endif + } + else { + XSRETURN_UNDEF; + } +#ifdef HAVE_UTF8 + } +#endif + OUTPUT: + RETVAL diff --git a/MANIFEST b/MANIFEST new file mode 100644 index 0000000..4297696 --- /dev/null +++ b/MANIFEST @@ -0,0 +1,223 @@ +Av_CharPtrPtr.c +Av_CharPtrPtr.h +Changes +Devel.xs +HACKING.txt +LICENSE +LibXML.pm +LibXML.pod +LibXML.xs +MANIFEST +Makefile.PL +README +TODO +debian/changelog +debian/compat +debian/control +debian/copyright +debian/libxml-libxml-perl.docs +debian/libxml-libxml-perl.examples +debian/libxml-libxml-perl.install +debian/libxml-libxml-perl.postinst +debian/libxml-libxml-perl.prerm +debian/rules +docs/libxml.dbk +dom.c +dom.h +example/JBR-ALLENtrees.htm +example/article.xml +example/article_bad.xml +example/article_external_bad.xml +example/article_internal.xml +example/article_internal_bad.xml +example/bad.dtd +example/bad.xml +example/catalog.xml +example/cb_example.pl +example/complex/complex.dtd +example/complex/complex.xml +example/complex/complex2.xml +example/complex/dtd/f.dtd +example/complex/dtd/g.dtd +example/create-sample-html-document.pl +example/dromeds.xml +example/dtd.xml +example/enc2_latin2.html +example/enc_latin2.html +example/ext_ent.dtd +example/ns.xml +example/test.dtd +example/test.html +example/test.xhtml +example/test.xml +example/test2.xml +example/test3.xml +example/test4.xml +example/thedieline.rss +example/utf-16-1.html +example/utf-16-2.html +example/utf-16-2.xml +example/xmllibxmldocs.pl +example/xmlns/badguy.xml +example/xmlns/goodguy.xml +example/xpath.pl +example/yahoo-finance-html-with-errors.html +lib/XML/LibXML/Attr.pod +lib/XML/LibXML/AttributeHash.pm +lib/XML/LibXML/Boolean.pm +lib/XML/LibXML/CDATASection.pod +lib/XML/LibXML/Comment.pod +lib/XML/LibXML/Common.pm +lib/XML/LibXML/Common.pod +lib/XML/LibXML/DOM.pod +lib/XML/LibXML/Devel.pm +lib/XML/LibXML/Document.pod +lib/XML/LibXML/DocumentFragment.pod +lib/XML/LibXML/Dtd.pod +lib/XML/LibXML/Element.pod +lib/XML/LibXML/ErrNo.pm +lib/XML/LibXML/ErrNo.pod +lib/XML/LibXML/Error.pm +lib/XML/LibXML/Error.pod +lib/XML/LibXML/InputCallback.pod +lib/XML/LibXML/Literal.pm +lib/XML/LibXML/Namespace.pod +lib/XML/LibXML/Node.pod +lib/XML/LibXML/NodeList.pm +lib/XML/LibXML/Number.pm +lib/XML/LibXML/PI.pod +lib/XML/LibXML/Parser.pod +lib/XML/LibXML/Pattern.pod +lib/XML/LibXML/Reader.pm +lib/XML/LibXML/Reader.pod +lib/XML/LibXML/RegExp.pod +lib/XML/LibXML/RelaxNG.pod +lib/XML/LibXML/SAX.pm +lib/XML/LibXML/SAX.pod +lib/XML/LibXML/SAX/Builder.pm +lib/XML/LibXML/SAX/Builder.pod +lib/XML/LibXML/SAX/Generator.pm +lib/XML/LibXML/SAX/Parser.pm +lib/XML/LibXML/Schema.pod +lib/XML/LibXML/Text.pod +lib/XML/LibXML/XPathContext.pm +lib/XML/LibXML/XPathContext.pod +lib/XML/LibXML/XPathExpression.pod +perl-libxml-mm.c +perl-libxml-mm.h +perl-libxml-sax.c +perl-libxml-sax.h +ppport.h +scripts/Test.pm-to-Test-More.pl +scripts/bump-version-number.pl +scripts/fast-eumm.pl +scripts/prints-to-comments.pl +scripts/tag-release.pl +scripts/total-build-and-test.bash +scripts/update-HACKING-file.bash +t/00-report-prereqs.t +t/01basic.t +t/02parse.t +t/03doc.t +t/04node.t +t/05text.t +t/06elements.t +t/07dtd.t +t/08findnodes.t +t/09xpath.t +t/10ns.t +t/11memory.t +t/12html.t +t/13dtd.t +t/14sax.t +t/15nodelist.t +t/16docnodes.t +t/17callbacks.t +t/18docfree.t +t/19die_on_invalid_utf8_rt_58848.t +t/19encoding.t +t/20extras.t +t/21catalog.t +t/23rawfunctions.t +t/24c14n.t +t/25relaxng.t +t/26schema.t +t/27new_callbacks_simple.t +t/28new_callbacks_multiple.t +t/29id.t +t/30keep_blanks.t +t/30xpathcontext.t +t/31xpc_functions.t +t/32xpc_variables.t +t/35huge_mode.t +t/40reader.t +t/40reader_mem_error.t +t/41xinclude.t +t/42common.t +t/43options.t +t/44extent.t +t/45regex.t +t/46err_column.t +t/47load_xml_callbacks.t +t/48_RH5_double_free_rt83779.t +t/48_SAX_Builder_rt_91433.t +t/48_gh_pr63_detect_undef_values.t +t/48_memleak_rt_83744.t +t/48_reader_undef_warning_on_empty_str_rt106830.t +t/48_removeChild_crashes_rt_80395.t +t/48_replaceNode_DTD_nodes_rT_80521.t +t/48_rt123379_setNamespace.t +t/48_rt55000.t +t/48_rt93429_recover_2_in_html_parsing.t +t/48importing_nodes_IDs_rt_69520.t +t/49_load_html.t +t/49callbacks_returning_undef.t +t/49global_extent.t +t/50devel.t +t/51_parse_html_string_rt87089.t +t/60error_prev_chain.t +t/60struct_error.t +t/61error.t +t/62overload.t +t/71overloads.t +t/72destruction.t +t/80registryleak.t +t/90shared_clone_failed_rt_91800.t +t/90stack.t +t/90threads.t +t/91unique_key.t +t/cpan-changes.t +t/data/callbacks_returning_undef.xml +t/data/chinese.xml +t/lib/Collector.pm +t/lib/Counter.pm +t/lib/Stacker.pm +t/lib/TestHelpers.pm +t/pod-files-presence.t +t/pod.t +t/release-kwalitee.t +t/style-trailing-space.t +test/relaxng/badschema.rng +test/relaxng/demo.rng +test/relaxng/demo.xml +test/relaxng/demo2.rng +test/relaxng/demo3.rng +test/relaxng/demo4.rng +test/relaxng/invaliddemo.xml +test/relaxng/net.rng +test/relaxng/schema.rng +test/schema/badschema.xsd +test/schema/demo.xml +test/schema/invaliddemo.xml +test/schema/net.xsd +test/schema/schema.xsd +test/textReader/countries.xml +test/xinclude/entity.txt +test/xinclude/test.xml +test/xinclude/xinclude.xml +typemap +xpath.c +xpath.h +xpathcontext.h +META.yml Module YAML meta-data (added by MakeMaker) +META.json Module JSON meta-data (added by MakeMaker) diff --git a/META.json b/META.json new file mode 100644 index 0000000..2bfa259 --- /dev/null +++ b/META.json @@ -0,0 +1,102 @@ +{ + "abstract" : "Interface to Gnome libxml2 xml parsing and DOM library", + "author" : [ + "Petr Pajas " + ], + "dynamic_config" : 0, + "generated_by" : "ExtUtils::MakeMaker version 7.60, CPAN::Meta::Converter version 2.150010", + "keywords" : [ + "dom", + "html", + "libxml", + "object oriented", + "oop", + "parse", + "parser", + "parsing", + "pullparser", + "sax", + "sgml", + "xml", + "xpath", + "XPath", + "xs" + ], + "license" : [ + "perl_5" + ], + "meta-spec" : { + "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", + "version" : 2 + }, + "name" : "XML-LibXML", + "no_index" : { + "directory" : [ + "t", + "inc", + "xt" + ] + }, + "prereqs" : { + "build" : { + "requires" : { + "ExtUtils::MakeMaker" : "0" + } + }, + "configure" : { + "requires" : { + "Alien::Base::Wrapper" : "0", + "Alien::Libxml2" : "0.14", + "Config" : "0", + "ExtUtils::MakeMaker" : "0" + } + }, + "runtime" : { + "requires" : { + "Carp" : "0", + "DynaLoader" : "0", + "Encode" : "0", + "Exporter" : "5.57", + "IO::Handle" : "0", + "Scalar::Util" : "0", + "Tie::Hash" : "0", + "XML::NamespaceSupport" : "1.07", + "XML::SAX" : "0.11", + "XML::SAX::Base" : "0", + "XML::SAX::DocumentLocator" : "0", + "XML::SAX::Exception" : "0", + "base" : "0", + "constant" : "0", + "overload" : "0", + "parent" : "0", + "perl" : "5.008001", + "strict" : "0", + "vars" : "0", + "warnings" : "0" + } + }, + "test" : { + "requires" : { + "Config" : "0", + "Errno" : "0", + "IO::File" : "0", + "IO::Handle" : "0", + "POSIX" : "0", + "Scalar::Util" : "0", + "Test::More" : "0", + "locale" : "0", + "utf8" : "0" + } + } + }, + "release_status" : "stable", + "resources" : { + "repository" : { + "type" : "git", + "url" : "https://github.com/shlomif/perl-XML-LibXML.git", + "web" : "https://github.com/shlomif/perl-XML-LibXML" + } + }, + "version" : "2.0207", + "x_serialization_backend" : "JSON::PP version 4.06" +} diff --git a/META.yml b/META.yml new file mode 100644 index 0000000..fee45e0 --- /dev/null +++ b/META.yml @@ -0,0 +1,73 @@ +--- +abstract: 'Interface to Gnome libxml2 xml parsing and DOM library' +author: + - 'Petr Pajas ' +build_requires: + Config: '0' + Errno: '0' + ExtUtils::MakeMaker: '0' + IO::File: '0' + IO::Handle: '0' + POSIX: '0' + Scalar::Util: '0' + Test::More: '0' + locale: '0' + utf8: '0' +configure_requires: + Alien::Base::Wrapper: '0' + Alien::Libxml2: '0.14' + Config: '0' + ExtUtils::MakeMaker: '0' +dynamic_config: 0 +generated_by: 'ExtUtils::MakeMaker version 7.60, CPAN::Meta::Converter version 2.150010' +keywords: + - dom + - html + - libxml + - 'object oriented' + - oop + - parse + - parser + - parsing + - pullparser + - sax + - sgml + - xml + - xpath + - XPath + - xs +license: perl +meta-spec: + url: http://module-build.sourceforge.net/META-spec-v1.4.html + version: '1.4' +name: XML-LibXML +no_index: + directory: + - t + - inc + - xt +requires: + Carp: '0' + DynaLoader: '0' + Encode: '0' + Exporter: '5.57' + IO::Handle: '0' + Scalar::Util: '0' + Tie::Hash: '0' + XML::NamespaceSupport: '1.07' + XML::SAX: '0.11' + XML::SAX::Base: '0' + XML::SAX::DocumentLocator: '0' + XML::SAX::Exception: '0' + base: '0' + constant: '0' + overload: '0' + parent: '0' + perl: '5.008001' + strict: '0' + vars: '0' + warnings: '0' +resources: + repository: https://github.com/shlomif/perl-XML-LibXML.git +version: '2.0207' +x_serialization_backend: 'CPAN::Meta::YAML version 0.018' diff --git a/Makefile.PL b/Makefile.PL new file mode 100644 index 0000000..db85a65 --- /dev/null +++ b/Makefile.PL @@ -0,0 +1,811 @@ +# -------------------------------------------------------------------------- # +# $Id$ +# -------------------------------------------------------------------------- # +# Makefile.PL for XML::LibXML. +# This file is required to generate a localized Makefile +# -------------------------------------------------------------------------- # +# +# +# This is free software, you may use it and distribute it under the same terms as +# Perl itself. +# +# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas +# + +use strict; +use warnings; + +require 5.008; + +use vars qw/$DEVNULL $is_Win32 $extralibdir $skipsaxinstall/; + +use ExtUtils::MakeMaker; +use Config; +use Symbol; +use File::Spec; + +$|=0; +my %config; + +# -------------------------------------------------------------------------- # +# -------------------------------------------------------------------------- # +# common information go to the top, so they are easier to find +# -------------------------------------------------------------------------- # +my %INFOS = ( + 'NAME' => 'XML::LibXML', + 'VERSION_FROM' => 'LibXML.pm', # finds $VERSION + 'AUTHOR' => 'Petr Pajas', + 'ABSTRACT' => 'Interface to Gnome libxml2 xml parsing and DOM library', + 'LICENSE' => 'perl', + (($ExtUtils::MakeMaker::VERSION >= 6.48) + ? (MIN_PERL_VERSION => '5.008',) + : () + ), + 'PREREQ_PM' => { + 'base' => 0, + #'Hash::FieldHash' => '0.09', + 'parent' => 0, + 'strict' => 0, + 'Test::More' => 0, + 'vars' => 0, + 'warnings' => 0, + 'XML::NamespaceSupport' => '1.07', + 'XML::SAX' => '0.11', + 'XML::SAX::Base' => '0', + 'XML::SAX::Exception' => '0', + }, + 'OBJECT' => '$(O_FILES)', # add the DOM extensions to libxml2 + ($ExtUtils::MakeMaker::VERSION >= 6.54) + ? + ( + META_MERGE => + { + resources => + { + repository => 'https://github.com/shlomif/perl-XML-LibXML', + homepage => 'https://github.com/shlomif/perl-XML-LibXML', + }, + keywords => + [ + "dom", + "html", + "libxml", + "object oriented", + "oop", + "parse", + "parser", + "parsing", + "pullparser", + "sax", + "sgml", + "xml", + "xpath", + "XPath", + "xs", + ], + }, + ) + : (), +); +# -------------------------------------------------------------------------- # +# -------------------------------------------------------------------------- # + +use lib qw(inc); +use Devel::CheckLib; + +# Prompt the user here for any paths and other configuration + + +# -------------------------------------------------------------------------- # +# libxml2 valid versions + +# -------------------------------------------------------------------------- # + +# -------------------------------------------------------------------------- # +# read extra configurations from the commandline +my %params; +@params{qw(FORCE DEBUG DEFINE EXTRALIBDIR GDOME INC LIBS SKIP_SAX_INSTALL XMLPREFIX NO_THREADS LDFLAGS)}=(); + +@ARGV = grep { + my ($key, $val) = split(/=/, $_, 2); + if (exists $params{$key}) { + $config{$key} = $val; 0 + } else { 1 } +} @ARGV; + +$extralibdir = $config{EXTRALIBDIR}; +delete $config{EXTRALIBDIR}; + +# -------------------------------------------------------------------------- # + +# -------------------------------------------------------------------------- # +# force unsupported version +my $FORCE = delete $config{FORCE}; + +# switch Debugging messages on +my $DEBUG = delete $config{DEBUG}; + +if ( $config{DEBUG} and $is_Win32 ) { + warn "win32 compile\n"; +} +# -------------------------------------------------------------------------- # + +# -------------------------------------------------------------------------- # +# enable perls UTF8 support if available +if ( $] >= 5.006 ) { + warn "enable native perl UTF8\n"; + $config{DEFINE} .= " -DHAVE_UTF8"; +} +if ( $] < 5.008 or $config{NO_THREADS} ) { + warn "disabling XML::LibXML support for Perl threads\n"; + $config{DEFINE} .= " -DNO_XML_LIBXML_THREADS"; +} +delete $config{NO_THREADS}; +# -------------------------------------------------------------------------- # + +# -------------------------------------------------------------------------- # +# get the libxml2 configuration +# +# For each release we already know which libxml2 versions work with the given +# module. All we need is to keep track of bad versions. +# If a user wants to build XML::LibXML with a newer version, there will be +# a warning, that errors are possible. +# +# We keep track of the valid versions by keeping a blacklist of intervals +# of working and not working versions where Ma.Mi.Pt <= X.Y.Z is of the same +# state. +# +# NOTE: All versions, the tests pass will be marked as working. +# + +$skipsaxinstall = $ENV{SKIP_SAX_INSTALL} || $config{SKIP_SAX_INSTALL}; +delete $config{SKIP_SAX_INSTALL}; + +unless ( $is_Win32 ) { # cannot get config in W32 + my @blacklist = ( + # format X,Y,Z,is_ok, X,Y,Z is version, + # is_ok applies also to *preceding* versions + [2,4,22,0], + [2,4,25,0], # broken XPath + [2,4,28,0], # unsupported, may work fine with earlier XML::LibXML versions + [2,4,29,0], # broken + [2,4,30,0], # broken + [2,5,0,0], # unsupported + [2,5,1,0], # all pre 2.5.4 version have broken attr output + [2,5,5,0], # tests pass, but known as broken + [2,5,11,0], # will partially work + [2,6,0,0], # unsupported + [2,6,4,0], # schema error + [2,6,5,0], # broken xincludes + [2,6,15,0], + # [2,6,16,1], # first version to pass all tests + [2,6,18,1], # up to 2.6.18 all ok + [2,6,19,0], # broken c14n + [2,6,20,0], # broken schemas + [2,6,24,1], # all tests pass + [2,6,25,0], # broken XPath + [2,6,32,1], # tested, works ok + [2,7,1,0], # broken release, broken utf-16 + [2,7,6,1], # tested, ok + [2,7,8,1], # tested, ok + [2,9,3,1], # schema regression + [2,9,4,0], # schema regression + [2,9,9,1], + ); + my $xml2cfg = "xml2-config"; + my $libprefix = $ENV{XMLPREFIX} || $config{XMLPREFIX}; + + delete $config{XMLPREFIX}; # delete if exists, otherwise MakeMaker gets confused + + if ( defined $libprefix ) { + $xml2cfg = $libprefix . '/bin/' . $xml2cfg; + } + + # if a user defined INC and LIBS on the command line we must not + # override them + if ( not defined $config{LIBS} and not defined $config{INC} ) { + print "running xml2-config..."; + eval { + try_libconfig( $xml2cfg, \%config, \@blacklist ); + }; + + if ( $@ ) { + if ( $@ =~ /^VERSION|^FORCED/ ) { + my $libxml2_version; + print STDERR "The installed version of libxml2 $@ is not compatible with XML::LibXML (and probably buggy)!\n\n". + "You may continue at your own risk using 'perl Makefile.PL FORCE=1', but:\n\n". + " - don't expect XML::LibXML to build or work correctly!\n". + " - don't report errors!\n". + " - don't send patches!\n\n". + "Check the README file for more information on versions\n". + "that are tested with XML::LibXML\n\n"; + if ($@ =~ /^VERSION (\S+)/) { + $libxml2_version = $1; + } + # 0 recommended by http://cpantest.grango.org (Notes for CPAN Authors) + exit 1 if !$FORCE and $libxml2_version ne "2.9.4"; + } + if ( $@ =~ /^UNTESTED (\S*)/ ) { + warn "Note: libxml2 $1 was not tested with this XML::LibXML version.\n" +# warn <<"UNTESTED"; +# WARNING! +# The installed version of libxml2 was not tested with this version of XML::LibXML. + +# XML::LibXML may fail building or some tests may not pass. +# Expect strange errors and unstable scripts. + +# Check the README file for more informations +# END OF WARNING +# UNTESTED + } + + if ( not defined $config{LIBS} and not defined $config{INC} ) { + warn "didn't manage to get libxml2 config, guessing\n"; + $config{LIBS} = '-L/usr/local/lib -L/usr/lib -lxml2 -lm'; + $config{INC} = '-I/usr/local/include -I/usr/include'; + print <<"OPT"; +options: + LIBS='$config{LIBS}' + INC='$config{INC}' +If this is wrong, Re-run as: + \$ $^X Makefile.PL LIBS='-L/path/to/lib' INC='-I/path/to/include' + +OPT + } + } + } +} + +# -------------------------------------------------------------------------- # + +# -------------------------------------------------------------------------- # +# GDOME Support +# +# GDOME Support has to get explicitly activated by setting GDOME=1 as a config param. +# +unless ( $is_Win32 ) { # cannot get config in W32 + if ( $config{GDOME} ) { + my $ver; + my $state = undef; # there are three possible states: + # 1 : works + # 0 : works not + # undef : not yet tested + my @blacklist = ( + [0,7,2,0], + [0,7,3,1], + ); + print <<"GDOME"; + +GDOME Support (experimental): + XML::LibXML can parse into XML::GDOME DOMs if libgdome is installed. + This feature is optional and is not required for using XML::LibXML. + +GDOME + + print "running gdome-config..."; + + eval { + test_libconfig( "gdome-config", \%config, @blacklist ); + print "NOTE: You will need to install XML::GDOME to use this feature\n"; + }; + + if ( $@ ) { + if ( $@ =~ /^VERSION/ ) { + warn "The installed libgdome version is not supported\n"; + } + elsif ( $@ =~ /^UNTESTED/ ) { + warn "The installed libgdome version was not yet tested with XML::LibXML.\n"; + print "NOTE: You will need to install XML::GDOME to use this feature\n"; + } + } + } +} +# -------------------------------------------------------------------------- # + + +my $config_LIBS_alternatives; +# -------------------------------------------------------------------------- # +# fix the ld flags +# -------------------------------------------------------------------------- # +if (!defined $config{LIBS} || $config{LIBS} !~ /\-l(?:lib)?xml2\b/) { + # in this case we are not able to run xml2-config. therefore we need to + # expand the libz as well. + if ($is_Win32) { + if( $ENV{ACTIVEPERL_MINGW} ) { + $config{LIBS} .= ' -llibxml2.lib -lzlib.lib'; + } + else { + my $l = $config{LIBS}; + + if (!defined($l)) { + $l = ''; + } + + # Put several options. + $config_LIBS_alternatives = [ + map { "$l $_" } + q/ -llibxml2/, + q/ -lxml2 -lzlib/, + q/ -llibxml2 -lzlib -llibgettextlib.dll/ + ]; + $config{LIBS} = $config_LIBS_alternatives->[-1]; + $config{INC} .= " -I$Config{incpath}"; + } + } + else { + $config{LIBS} .= ' -lxml2 -lz -lm'; + } +} +elsif ($config{LIBS} !~ /\-lz\b/ and !($is_Win32 && $config{LIBS} !~ /\-lzlib\b/)) { + # note if libxml2 has not -lz within its cflags, we should not use + # it! We should trust libxml2 and assume libz is not available on the + # current system (this is ofcourse not true with win32 systems. + # $config{LIBS} .= $is_Win32 ? ' -lzlib' :' -lz'; + if ( $config{DEBUG} ) { + warn "zlib was not configured\n"; + warn "set zlib\n" if $is_Win32; + } + if ($is_Win32) { + if( $ENV{ACTIVEPERL_MINGW} ) { + $config{LIBS} .= ' -lzlib.lib'; + } else { + $config{LIBS} .= ' -lzlib'; + } + } else { + $config{LIBS} .= ' -lz'; + } +} + +# -------------------------------------------------------------------------- # +# MacOS X Compiler switches have to go here +# +# if we run on MacOSX, we should check if 10.2 is running and if so, +# if the Build Target is set correctly. Otherwise we have to set it by +# hand + +my $ldflags = delete $config{LDFLAGS}; +if ($ldflags) { + $config{dynamic_lib} = { OTHERLDFLAGS => " $ldflags " }; +} + +my $incpath = $config{INC} || ""; +$incpath =~ s#(\A|\s)\s*-I#$1#g; + +sub _libxml_check_lib_with_config_LIBs +{ + my ($lib_name, $conf_LIBS) = @_; + + return + check_lib( + debug => $DEBUG, + LIBS => $conf_LIBS, + # fill in what you prompted the user for here + lib => [$lib_name], + incpath => [split(/\s/,$incpath)], + header => + [ + 'libxml/c14n.h', + 'libxml/catalog.h', + 'libxml/entities.h', + 'libxml/globals.h', + 'libxml/HTMLparser.h', + 'libxml/HTMLtree.h', + 'libxml/parser.h', + 'libxml/parserInternals.h', + 'libxml/pattern.h', + 'libxml/relaxng.h', + 'libxml/tree.h', + 'libxml/uri.h', + 'libxml/valid.h', + 'libxml/xinclude.h', + 'libxml/xmlerror.h', + 'libxml/xmlIO.h', + 'libxml/xmlmemory.h', + 'libxml/xmlreader.h', + 'libxml/xmlregexp.h', + 'libxml/xmlschemas.h', + 'libxml/xmlversion.h', + 'libxml/xpath.h', + 'libxml/xpathInternals.h', + ], + ); +} + +sub _libxml_check_lib { + my ($libname) = @_; + + if (defined($config_LIBS_alternatives)) { + foreach my $conf_LIBS (@$config_LIBS_alternatives) { + if (_libxml_check_lib_with_config_LIBs($libname, $conf_LIBS)) { + $config{LIBS} = $conf_LIBS; + return 1; + } + } + } + else { + return _libxml_check_lib_with_config_LIBs($libname, $config{LIBS}); + } +} + +print "Checking for ability to link against xml2..."; +if ( _libxml_check_lib('xml2') ) { + print "yes\n"; +} +else { + print "no\n"; + print "Checking for ability to link against libxml2..."; + if ( _libxml_check_lib('libxml2')) { + print "yes\n"; + } + else { + print STDERR <<"DEATH"; +libxml2, zlib, and/or the Math library (-lm) have not been found. +Try setting LIBS and INC values on the command line +Or get libxml2 from + http://xmlsoft.org/ +If you install via RPMs, make sure you also install the -devel +RPMs, as this is where the headers (.h files) are. + +Also, you may try to run perl Makefile.PL with the DEBUG=1 parameter +to see the exact reason why the detection of libxml2 installation +failed or why Makefile.PL was not able to compile a test program. +DEATH + exit 0; # 0 recommended by http://cpantest.grango.org (Notes for CPAN Authors) + } +} + +# -------------------------------------------------------------------------- # +# _NOW_ write the Makefile + +WriteMakefile( + %INFOS, + %config, +); +# -------------------------------------------------------------------------- # + + +# -------------------------------------------------------------------------- # +# helper functions to build the Makefile +sub MY::manifypods { + package MY; + my $str = shift->SUPER::manifypods(@_); +# warn $str; +# $str =~ s/^manifypods : pure_all (.*)$/manifypods : pure_all docs $1/m; + $str .= <<"EOF"; + +docs-fast : +\t\@$^X -pi~ -e 's{[0-9.]*}{'"\$(VERSION)"'}' docs/libxml.dbk +\t\@$^X -Iblib/arch -Iblib/lib example/xmllibxmldocs.pl docs/libxml.dbk lib/XML/LibXML/ + +docs : pure_all +\t\@$^X -pi~ -e 's{[0-9.]*}{'"\$(VERSION)"'}' docs/libxml.dbk +\t\@$^X -Iblib/arch -Iblib/lib example/xmllibxmldocs.pl docs/libxml.dbk lib/XML/LibXML/ +\t\@$^X -pi.old -e 's/a/a/' Makefile.PL +\t\@echo "==> YOU MUST NOW RE-RUN $^X Makefile.PL <==" +\t\@false + +EOF + return $str; +} + +sub MY::install { + package MY; + my $script = shift->SUPER::install(@_); + unless ( $::skipsaxinstall ) { + $script =~ s/install :: (.*)$/install :: $1 install_sax_driver/m; + $script .= <<"INSTALL"; + +install_sax_driver : +\t-\@\$(PERL) -I\$(INSTALLSITELIB) -I\$(INSTALLSITEARCH) -MXML::SAX -e "XML::SAX->add_parser(q(XML::LibXML::SAX::Parser))->save_parsers()" +\t-\@\$(PERL) -I\$(INSTALLSITELIB) -I\$(INSTALLSITEARCH) -MXML::SAX -e "XML::SAX->add_parser(q(XML::LibXML::SAX))->save_parsers()" + +INSTALL + } else { + warn "Note: 'make install' will skip XML::LibXML::SAX registration with XML::SAX!\n"; + } + return $script; +} + +sub MY::test { + package MY; + my $script = shift->SUPER::test(@_); + if ( $::extralibdir ) { + $script =~ s/(\$\(TEST_VERBOSE\),)/$1 \'$::extralibdir\',/m; + } + return $script; +} + +# echo perl -pi~ -e '$$_=q($(version))."\n" if /#\ VERSION TEMPLATE/ ' $(TO_INST_PM) +sub MY::postamble { + my $mpl_args = join " ", map qq["$_"], @ARGV; + + my $CC = + ( + exists($ENV{CC}) + ? "CC = $ENV{CC}" + : '' + ); + + my $ret = "$CC\n" . <<'MAKE_FRAG'; + +# emacs flymake-mode +check-syntax : + test -n "$(CHK_SOURCES)" && \ + $(CCCMD) $(CCCDLFLAGS) "-I$(PERL_INC)" $(PASTHRU_DEFINE) $(DEFINE) -o /dev/null -S $(CHK_SOURCES) + +# used to update version numbers in all modules +version: + @version=`grep '\# VERSION TEMPLATE' $(VERSION_FROM)`; \ + echo "New version line: $$version"; \ + perl -pi~ -e '$$_=q('"$$version"')."\n" if /#\ VERSION TEMPLATE/ ' $(TO_INST_PM); + +runtest: pure_all + $(ABSPERLRUN) -MFile::Spec -MTest::Run::CmdLine::Iface -e \ + "local @INC = @INC; unshift @INC, map { File::Spec->rel2abs(\$$_) } ('$(INST_LIB)', '$(INST_ARCHLIB)'); Test::Run::CmdLine::Iface->new({test_files => [glob(q{t/*.t})]})->run();" + +distruntest: distdir + cd $(DISTVNAME) && $(ABSPERLRUN) Makefile.PL {#mpl_args#} + cd $(DISTVNAME) && $(MAKE) $(PASTHRU) + cd $(DISTVNAME) && $(MAKE) runtest $(PASTHRU) + +MAKE_FRAG + + $ret =~ s/{#mpl_args#}/$mpl_args/; + + return $ret; +} + + +# -------------------------------------------------------------------------- # + +# -------------------------------------------------------------------------- # +# Functions +# - these should really be in MakeMaker... But &shrug; +# -------------------------------------------------------------------------- # + +use Config; +use Cwd; +use Symbol; +use File::Spec; + + +BEGIN { + $is_Win32 = ($^O =~ /Win32/); + if ($is_Win32) { + $DEVNULL = 'DEVNULL'; + } + else { + $DEVNULL = eval { File::Spec->devnull }; + if ($@) { $DEVNULL = '/dev/null' } + } +} + +sub rm_f { + my @files = @_; + my @realfiles; + foreach (@files) { + push @realfiles, glob($_); + } + if (@realfiles) { + chmod(0777, @realfiles); + unlink(@realfiles); + } +} + +sub rm_fr { + my @files = @_; + my @realfiles; + foreach (@files) { + push @realfiles, glob($_); + } + foreach my $file (@realfiles) { + if (-d $file) { + # warn("$file is a directory\n"); + rm_fr("$file/*"); + rm_fr("$file/.exists"); + rmdir($file) || die "Couldn't remove $file: $!"; + } + else { + # warn("removing $file\n"); + chmod(0777, $file); + unlink($file); + } + } +} + +sub xsystem { + my $command_aref = shift; + if ($DEBUG) { + print "@$command_aref\n"; + if ((system { $command_aref->[0] } @$command_aref) != 0) { + die "system call to '@$command_aref' failed"; + } + return 1; + } + open(OLDOUT, ">&STDOUT"); + open(OLDERR, ">&STDERR"); + open(STDOUT, ">$DEVNULL"); + open(STDERR, ">$DEVNULL"); + my $retval = (system { $command_aref->[0] } @$command_aref); + open(STDOUT, ">&OLDOUT"); + open(STDERR, ">&OLDERR"); + if ($retval != 0) { + die "system call to '@$command_aref' failed"; + } + return 1; +} + +sub backtick { + my $command = shift; + if ($DEBUG) { + print $command, "\n"; + my $results = `$command`; + chomp $results; + if ($? != 0) { + die "backticks call to '$command' failed"; + } + return $results; + } + open(OLDOUT, ">&STDOUT"); + open(OLDERR, ">&STDERR"); + open(STDOUT, ">$DEVNULL"); + open(STDERR, ">$DEVNULL"); + my $results = `$command`; + my $retval = $?; + open(STDOUT, ">&OLDOUT"); + open(STDERR, ">&OLDERR"); + if ($retval != 0) { + die "backticks call to '$command' failed"; + } + chomp $results; + return $results; +} + +sub try_link0 { + my ($src, $opt) = @_; + # local $config{LIBS}; + # $config{LIBS} .= $opt; + unless (mkdir(".testlink", 0777)) { + rm_fr(".testlink"); + mkdir(".testlink", 0777) || die "Cannot create .testlink dir: $!"; + } + chdir(".testlink"); + { + open(my $cfile, '>', 'Conftest.xs') + or die "Cannot write to file Conftest.xs: $!"; + print {$cfile} <<"EOT"; +#ifdef __cplusplus +extern "C" { +#endif +#include +#include +#include +#ifdef __cplusplus +} +#endif + +EOT + print {$cfile} $src; + print {$cfile} <<"EOT"; + +MODULE = Conftest PACKAGE = Conftest + +PROTOTYPES: DISABLE + +EOT + close($cfile); + } + { + open(my $cfile, '>', 'Conftest.pm') + or die "Cannot write to file Conftest.pm: $!"; + print {$cfile} <<'EOT'; +package Conftest; +$VERSION = 1.0; +require DynaLoader; +@ISA = ('DynaLoader'); +bootstrap Conftest $VERSION; +1; +EOT + close($cfile); + } + { + open (my $cfile, '>', 'Makefile.PL') + or die "Cannot write to file Makefile.PL: $!"; + print {$cfile} <<'EOT'; +use ExtUtils::MakeMaker; +my %config; +while($_ = shift @ARGV) { + my ($k, $v) = split /=/, $_, 2; + warn("$k = $v\n"); + $config{$k} = $v; +} +WriteMakefile(NAME => "Conftest", VERSION_FROM => "Conftest.pm", %config); +EOT + close($cfile); + } + { + open(my $cfile, ">test.pl") + or die "Cannot write to file test.pl: $!"; + print {$cfile} <<"EOT"; +use Test; BEGIN { plan tests => 1; } END { ok(\$loaded) } +use Conftest; \$loaded++; +EOT + close($cfile); + } + my $quote = $is_Win32 ? '"' : "'"; + xsystem([$^X, 'Makefile.PL', + (map { "$_=$config{$_}" } keys %config), + ] + ); + + my $def_opt = defined($opt) ? $opt : ''; + # I am not sure if OTHERLDFLAGS is really required - at least the + # libraries to include do not belong here! + # I would assume a user to set OTHERLDFLAGS in the %config if they are + # really required. if done so, we don't have to pass them here ... + xsystem([$Config{make}, 'test', "OTHERLDFLAGS=${def_opt}"]); +} # end try_link0 + +sub try_link { + my $start_dir = cwd(); + my $result = eval { + try_link0(@_); + }; + warn $@ if $@; + chdir($start_dir); + rm_fr(".testlink"); + return $result; +} + +# -------------------------------------------------------------------------- # +# try_libconfig class a generic config file and requests --version, --libs and +# --cflags +sub try_libconfig { + my $cfgscript = shift; + my $config = shift; + my $bl = shift; + + my $state = undef; # there are three possible states: + # 1 : works + # 0 : works not + # undef : not yet tested + + my $ver = backtick("$cfgscript --version"); + if ( defined $ver ) { + my ( $major, $minor, $point) = $ver =~ /([0-9]+).([0-9]+)\.([0-9]+)/g; + foreach my $i ( @$bl ) { + $state = $i->[3]; + last if $major < $i->[0]; + next if $major > $i->[0]; + last if $minor < $i->[1]; + next if $minor > $i->[1]; + last if $point <= $i->[2]; + $state = undef; + } + + $config->{LIBS} = backtick("$cfgscript --libs"); + $config->{INC} = backtick("$cfgscript --cflags"); + + if ( defined $state and $state == 0 ) { + print "failed\n"; + if ($FORCE) { + die "FORCED $ver\n"; + } + else { + die "VERSION $ver\n"; + } + } + + unless ( defined $state ) { + print "untested\n"; + die "UNTESTED $ver\n"; + } + + print "ok ($ver)\n"; + } + else { + print "failed\n"; + die "FAILED\n"; # strange error + } +} +# -------------------------------------------------------------------------- # diff --git a/README b/README new file mode 100644 index 0000000..827b6ff --- /dev/null +++ b/README @@ -0,0 +1,285 @@ +INTRODUCTION +============ + +This module implements a Perl interface to the Gnome libxml2 library which +provides interfaces for parsing and manipulating XML files. This module allows +Perl programmers to make use of its highly capable validating XML parser and +its high performance DOM implementation. + + +IMPORTANT NOTES +=============== + +XML::LibXML was almost entirely reimplemented between version 1.40 to version +1.49. This may cause problems on some production machines. With version 1.50 a +lot of compatibility fixes were applied, so programs written for XML::LibXML +1.40 or less should run with version 1.50 again. + +In 1.59, a new callback API was introduced. This new API is not compatible with +the previous one. See XML::LibXML::InputCallback manual page for details. + +In 1.61 the XML::LibXML::XPathContext module, previously distributed +separately, was merged in. + +An experimental support for Perl threads introduced in 1.66 has been replaced +in 1.67. + + +DEPENDENCIES +============ + +Prior to installation you MUST have installed the libxml2 library. You can get +the latest libxml2 version from + +http://xmlsoft.org/ + +Without libxml2 installed this module will neither build nor run. + +Also XML::LibXML requires the following packages: + + o XML::SAX - base class for SAX parsers + o XML::NamespaceSupport - namespace support for SAX parsers + +These packages are required. If one is missing some tests will fail. + +Again, libxml2 is required to make XML::LibXML work. The library is not just +required to build XML::LibXML, it has to be accessible during run-time as well. +Because of this you need to make sure libxml2 is installed properly. To test +this, run the xmllint program on your system. xmllint is shipped with libxml2 +and therefore should be available. For building the module you will also need +the header file for libxml2, which in binary (.rpm,.deb) etc. distributions +usually dwell in a package named libxml2-devel or similar. + + +INSTALLATION +============ + +(These instructions are for UNIX and GNU/Linux systems. For MSWin32, See Notes +for Microsoft Windows below.) + +To install XML::LibXML just follow the standard installation routine for Perl +modules: + + 1 perl Makefile.PL + 2 make + 3 make test + 4 make install # as superuser + +Note that XML::LibXML is an XS based Perl extension and you need a C compiler +to build it. + +Note also that you should rebuild XML::LibXML if you upgrade libxml2 in order +to avoid problems with possible binary incompatibilities between releases of +the library. + + +Notes on libxml2 versions +========================= + +XML::LibXML requires at least libxml2 2.6.16 to compile and pass all tests and +at least 2.6.21 is required for XML::LibXML::Reader. For some older OS versions +this means that an update of the pre-built packages is required. + +Although libxml2 claims binary compatibility between its patch levels, it is a +good idea to recompile XML::LibXML and run its tests after an upgrade of +libxml2. + +If your libxml2 installation is not within your $PATH, you can pass the +XMLPREFIX=$YOURLIBXMLPREFIX parameter to Makefile.PL determining the correct +libxml2 version in use. e.g. + +> perl Makefile.PL XMLPREFIX=/usr/brand-new + +will ask '/usr/brand-new/bin/xml2-config' about your real libxml2 +configuration. + +Try to avoid setting INC and LIBS directly on the command-line, for if used, +Makefile.PL does not check the libxml2 version for compatibility with +XML::LibXML. + + +Which version of libxml2 should be used? +======================================== + +XML::LibXML is tested against a couple versions of libxml2 before it is +released. Thus there are versions of libxml2 that are known not to work +properly with XML::LibXML. The Makefile.PL keeps a blacklist of the +incompatible libxml2 versions using Alien::Libxml2. The blacklist itself is +kept inside its "alienfile" file. + +If Makefile.PL detects one of the incompatible versions, it notifies the user. +It may still happen that XML::LibXML builds and pass its tests with such a +version, but that does not mean everything is OK. There will be no support at +all for blacklisted versions! + +As of XML::LibXML 1.61, only versions 2.6.16 and higher are supported. +XML::LibXML will probably not compile with earlier libxml2 versions than 2.5.6. +Versions prior to 2.6.8 are known to be broken for various reasons, versions +prior to 2.1.16 exhibit problems with namespaced attributes and do not +therefore pass XML::LibXML regression tests. + +It may happen that an unsupported version of libxml2 passes all tests under +certain conditions. This is no reason to assume that it shall work without +problems. If Makefile.PL marks a version of libxml2 as incompatible or broken +it is done for a good reason. + +Full linking information for libxml2 can be obtained by invoking "xml2-config +--libs". + + +Notes for Microsoft Windows +=========================== + +Thanks to Randy Kobes there is a pre-compiled PPM package available on + +http://theoryx5.uwinnipeg.ca/ppmpackages/ + +Usually it takes a little time to build the package for the latest release. + +If you want to build XML::LibXML on Windows from source, you can use the +following instructions contributed by Christopher J. Madsen: + +These instructions assume that you already have your system set up to compile +modules that use C components. + +First, get the libxml2 binaries from http://xmlsoft.org/sources/win32/ +(currently also available at http://www.zlatkovic.com/pub/libxml/). + +You need: + +> iconv-VERSION.win32.zip +> libxml2-VERSION.win32.zip +> zlib-VERSION.win32.zip + +Download the latest version of each. (Each package will probably have a +different version.) When you extract them, you'll get directories named +iconv-VERSION.win32, libxml2-VERSION.win32, and zlib-VERSION.win32, each +containing bin, lib, and include directories. + +Combine all the bin, include, and lib directories under c:\Prog\LibXML. (You +can use any directory you prefer; just adjust the instructions accordingly.) + +Get the latest version of XML-LibXML from CPAN. Extract it. + +Issue these commands in the XML-LibXML-VERSION directory: + +> perl Makefile.PL INC=-Ic:\Prog\LibXML\include LIBS=-Lc:\Prog\LibXML\lib +> nmake +> copy c:\Prog\LibXML\bin\*.dll blib\arch\auto\XML\LibXML +> nmake test +> nmake install + +(Note: Some systems use dmake instead of nmake.) + +By copying the libxml2 DLLs to the arch directory, you help avoid conflicts +with other programs you may have installed that use other (possibly +incompatible) versions of those DLLs. + + +Notes for Mac OS X +================== + +Due to a refactoring of the module, XML::LibXML will not run with some earlier +versions of Mac OS X. It appears that this is related to special linker options +for that OS prior to version 10.2.2. Since the developers do not have full +access to this OS, help/ patches from OS X gurus are highly appreciated. + +It is confirmed that XML::LibXML builds and runs without problems since Mac OS +X 10.2.6. + + +Notes for HPUX +============== + +XML::LibXML requires libxml2 2.6.16 or later. There may not exist a usable +binary libxml2 package for HPUX and XML::LibXML. If HPUX cc does not compile +libxml2 correctly, you will be forced to recompile perl with gcc (unless you +have already done that). + +Additionally I received the following Note from Rozi Kovesdi: + +> Here is my report if someone else runs into the same problem: +> +> Finally I am done with installing all the libraries and XML Perl +> modules +> +> The combination that worked best for me was: +> gcc +> GNU make +> +> Most importantly - before trying to install Perl modules that depend on +> libxml2: +> +> must set SHLIB_PATH to include the path to libxml2 shared library +> +> assuming that you used the default: +> +> export SHLIB=/usr/local/lib +> +> also, make sure that the config files have execute permission: +> +> /usr/local/bin/xml2-config +> /usr/local/bin/xslt-config +> +> they did not have +x after they were installed by 'make install' +> and it took me a while to realize that this was my problem +> +> or one can use: +> +> perl Makefile.PL LIBS='-L/path/to/lib' INC='-I/path/to/include' + + +CONTACT +======= + +For bug reports, please use the issue tracker at +https://github.com/shlomif/perl-XML-LibXML/issues . + +For suggestions etc. you may contact the maintainer directly at +https://www.shlomifish.org/me/contact-me/ , but in general, it is recommended +to use the mailing list given below. + +For suggestions etc., and other issues related to XML::LibXML you may use the +perl XML mailing list (perl-xml@listserv.ActiveState.com), where most +XML-related Perl modules are discussed. In case of problems you should check +the archives of that list first. Many problems are already discussed there. You +can find the list's archives and subscription options at +http://aspn.activestate.com/ASPN/Mail/Browse/Threaded/perl-xml + + +PACKAGE HISTORY +=============== + +Version < 0.98 were maintained by Matt Sergeant + +0.98 > Version > 1.49 were maintained by Matt Sergeant and Christian Glahn + +Versions >= 1.49 are maintained by Christian Glahn + +Versions > 1.56 are co-maintained by Petr Pajas + +Versions >= 1.59 are provisionally maintained by Petr Pajas + + +PATCHES AND DEVELOPER VERSION +============================= + +As XML::LibXML is open source software, help and patches are appreciated. If +you find a bug in the current release, make sure this bug still exists in the +developer version of XML::LibXML. This version can be cloned from its Git +repository. For more information about that, see: + +https://github.com/shlomif/perl-XML-LibXML + +Please consider all regression tests as correct. If any test fails it is most +certainly related to a bug. + +If you find documentation bugs, please fix them in the libxml.dbk file, stored +in the docs directory. + + +KNOWN ISSUES +============ + +The push-parser implementation causes memory leaks. + diff --git a/TODO b/TODO new file mode 100644 index 0000000..4842e48 --- /dev/null +++ b/TODO @@ -0,0 +1,10 @@ +* Fix 'line_nubers' in LibXML.pm (with a test). + +* Fix the 'suppress_warnings' similarly to https://rt.cpan.org/Ticket/Display.html?id=53270 . + +- add a flag to disable touching the I/O callbacks (as requested by + thread users on xml@gnome.org) + +- apply user-data patch (changes the proxy node data structure) + + diff --git a/docs/libxml.dbk b/docs/libxml.dbk new file mode 100644 index 0000000..c7a4208 --- /dev/null +++ b/docs/libxml.dbk @@ -0,0 +1,7668 @@ + + + + XML::LibXML + + + + + Matt + Sergeant + + + + Christian + Glahn + + + + Petr + Pajas + + + + + 2.0207 + + 2001-2007 + AxKit.com Ltd + + + 2002-2006 + Christian Glahn + + + 2006-2009 + Petr Pajas + + + + + Introduction + + README + + This module implements a Perl interface to the Gnome + libxml2 library which provides + interfaces for parsing and manipulating XML files. This + module allows Perl programmers to make use of its highly + capable validating XML parser and its high performance DOM + implementation. + + + Important Notes + + XML::LibXML was almost entirely reimplemented between version 1.40 to version 1.49. This may cause problems on some production machines. With + version 1.50 a lot of compatibility fixes were applied, so programs written for XML::LibXML 1.40 or less should run with version 1.50 again. + In 1.59, a new callback API was introduced. This new API is not compatible with the previous one. + See XML::LibXML::InputCallback manual page for details. + In 1.61 the XML::LibXML::XPathContext module, previously distributed separately, was merged in. + An experimental support for Perl threads introduced in 1.66 has been replaced in 1.67. + + + + Dependencies + + Prior to installation you MUST have installed the libxml2 library. You can get the latest libxml2 version from + + http://xmlsoft.org/ + + Without libxml2 installed this module will neither build nor run. + + Also XML::LibXML requires the following packages: + + + + XML::SAX - base class for SAX parsers + + + + XML::NamespaceSupport - namespace support for SAX parsers + + + + + These packages are required. If one is missing some tests will fail. + + Again, libxml2 is required to make XML::LibXML work. The library is not just required to build XML::LibXML, it has to be accessible during + run-time as well. Because of this you need to make sure libxml2 is installed properly. To test this, run the xmllint program on your system. xmllint + is shipped with libxml2 and therefore should be available. + For building the module you will also need the header file for libxml2, which in binary + (.rpm,.deb) etc. distributions usually dwell in a package named libxml2-devel or similar. + + + + Installation + (These instructions are for UNIX and GNU/Linux systems. For MSWin32, +See Notes for Microsoft Windows below.) + To install XML::LibXML just follow the standard installation routine for Perl modules: + + + + perl Makefile.PL + + + + make + + + + make test + + + + make install # as superuser + + + + Note that XML::LibXML is an XS based Perl extension and you need a C compiler + to build it. + Note also that you should rebuild XML::LibXML if you upgrade libxml2 + in order to avoid problems with possible binary incompatibilities between releases of the library. + + + Notes on libxml2 versions + + XML::LibXML requires at least + libxml2 2.6.16 to compile and pass all tests and + at least 2.6.21 is required for XML::LibXML::Reader. + For some older OS versions this means that an + update of the pre-built packages is required. + + Although libxml2 claims binary compatibility between + its patch levels, it is a good idea to recompile XML::LibXML + and run its tests after an upgrade of libxml2. + + + If your libxml2 installation is not within your $PATH, + you can pass the XMLPREFIX=$YOURLIBXMLPREFIX parameter to Makefile.PL + determining the correct libxml2 version in use. e.g. + + + perl Makefile.PL XMLPREFIX=/usr/brand-new + + will ask '/usr/brand-new/bin/xml2-config' about your real libxml2 configuration. + + Try to avoid setting INC and LIBS directly on the + command-line, for if used, Makefile.PL does not check + the libxml2 version for compatibility with XML::LibXML. + + + + Which version of libxml2 should be used? + + XML::LibXML is tested against a couple versions of + libxml2 before it is released. Thus there are versions + of libxml2 that are known not to work properly with + XML::LibXML. The Makefile.PL keeps a blacklist of + the incompatible libxml2 versions using Alien::Libxml2. + The blacklist itself is kept inside its "alienfile" + file. + + If Makefile.PL detects one of the incompatible versions, + it notifies the user. It may still happen that + XML::LibXML builds and pass its tests with such + a version, but that does not mean everything + is OK. There will be no support at all for blacklisted versions! + + As of XML::LibXML 1.61, only versions 2.6.16 and higher are supported. + XML::LibXML will probably not compile with earlier libxml2 versions than + 2.5.6. Versions prior to 2.6.8 are known to be broken for various reasons, + versions prior to 2.1.16 exhibit problems with namespaced attributes + and do not therefore pass XML::LibXML regression tests. + + + It may happen that an unsupported version of libxml2 + passes all tests under certain conditions. This is no + reason to assume that it shall work without problems. + If Makefile.PL marks a version of libxml2 as incompatible or broken + it is done for a good reason. + + Full linking information for libxml2 can be obtained + by invoking "xml2-config --libs". + + + + Notes for Microsoft Windows + + Thanks to Randy Kobes there is a pre-compiled PPM package available on + http://theoryx5.uwinnipeg.ca/ppmpackages/ + + Usually it takes a little time to build the package for the latest release. + If you want to build XML::LibXML on Windows from source, you can use + the following instructions contributed by Christopher J. Madsen: + + These instructions assume that you already have your system set up to + compile modules that use C components. + + + First, get the libxml2 binaries from http://xmlsoft.org/sources/win32/ + (currently also available at http://www.zlatkovic.com/pub/libxml/). + + + You need: + + iconv-VERSION.win32.zip + libxml2-VERSION.win32.zip + zlib-VERSION.win32.zip + Download the latest version of each. (Each package will probably have + a different version.) When you extract them, you'll get directories + named iconv-VERSION.win32, libxml2-VERSION.win32, and + zlib-VERSION.win32, each containing bin, lib, and include directories. + Combine all the bin, include, and lib directories under c:\Prog\LibXML. + (You can use any directory you prefer; just adjust the instructions + accordingly.) + Get the latest version of XML-LibXML from CPAN. + Extract it. + Issue these commands in the XML-LibXML-VERSION directory: + perl Makefile.PL INC=-Ic:\Prog\LibXML\include LIBS=-Lc:\Prog\LibXML\lib + nmake + copy c:\Prog\LibXML\bin\*.dll blib\arch\auto\XML\LibXML + nmake test + nmake install + (Note: Some systems use dmake instead of nmake.) + By copying the libxml2 DLLs to the arch directory, you help avoid + conflicts with other programs you may have installed that use other + (possibly incompatible) versions of those DLLs. + + + Notes for Mac OS X + + + Due to a refactoring of the module, XML::LibXML will + not run with some earlier versions of Mac OS X. It + appears that this is related to special linker options + for that OS prior to version 10.2.2. Since the + developers do not have full access to this OS, help/ + patches from OS X gurus are highly appreciated. + + + It is confirmed that XML::LibXML builds and runs + without problems since Mac OS X 10.2.6. + + + + Notes for HPUX + + XML::LibXML requires libxml2 2.6.16 or + later. There may not exist a usable binary + libxml2 package for HPUX and XML::LibXML. If + HPUX cc does not compile libxml2 + correctly, you will be forced to recompile perl with + gcc (unless you have already done that). + + Additionally I received the following Note from Rozi Kovesdi: + + Here is my report if someone else runs into the same problem: + +Finally I am done with installing all the libraries and XML Perl +modules + +The combination that worked best for me was: +gcc +GNU make + +Most importantly - before trying to install Perl modules that depend on +libxml2: + +must set SHLIB_PATH to include the path to libxml2 shared library + +assuming that you used the default: + +export SHLIB=/usr/local/lib + +also, make sure that the config files have execute permission: + +/usr/local/bin/xml2-config +/usr/local/bin/xslt-config + +they did not have +x after they were installed by 'make install' +and it took me a while to realize that this was my problem + +or one can use: + +perl Makefile.PL LIBS='-L/path/to/lib' INC='-I/path/to/include' + + + + + Contact + + For bug reports, please use the issue tracker at + https://github.com/shlomif/perl-XML-LibXML/issues . + + + For suggestions etc. you may contact the maintainer directly at + https://www.shlomifish.org/me/contact-me/ + , but in general, it is recommended to use the mailing + list given below. + + + For suggestions etc., and other issues + related to XML::LibXML you may use the perl XML mailing list + (perl-xml@listserv.ActiveState.com), + where most XML-related Perl modules are discussed. + In case of problems you should check the archives of that + list first. Many problems are already discussed there. You + can find the list's archives and subscription options at + http://aspn.activestate.com/ASPN/Mail/Browse/Threaded/perl-xml + + + + Package History + + Version < 0.98 were maintained by Matt Sergeant + + 0.98 > Version > 1.49 were maintained by Matt Sergeant and Christian Glahn + + Versions >= 1.49 are maintained by Christian Glahn + + Versions > 1.56 are co-maintained by Petr Pajas + + Versions >= 1.59 are provisionally maintained by Petr Pajas + + + + Patches and Developer Version + + As XML::LibXML is open source software, help and + patches are appreciated. If you find a bug in the current + release, make sure this bug still exists in the developer + version of XML::LibXML. This version can be cloned + from its Git repository. For more information about that, + see: + + https://github.com/shlomif/perl-XML-LibXML + + Please consider all regression tests as correct. If + any test fails it is most certainly related to a + bug. + + If you find documentation bugs, please fix them in + the libxml.dbk file, stored in the docs directory. + + + + Known Issues + + The push-parser implementation causes memory leaks. + + + + + License + + LICENSE + + This is free software, you may use it and distribute it under the same terms as Perl itself. + + Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas + + + Disclaimer + + THIS PROGRAM IS DISTRIBUTED IN THE HOPE THAT IT WILL + BE USEFUL, BUT WITHOUT ANY WARRANTY; WITHOUT EVEN THE + IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS FOR A + PARTICULAR PURPOSE. + + + + + Perl Binding for libxml2 + + XML::LibXML + + + Synopsis + + use XML::LibXML; +my $dom = XML::LibXML->load_xml(string => <<'EOT'); +<some-xml/> +EOT + + + + Description + + This module is an interface to libxml2, providing + XML and HTML parsers with DOM, SAX and XMLReader interfaces, + a large subset of DOM Layer 3 interface and + a XML::XPath-like interface to XPath API of libxml2. + The module is split into several packages which are not described in this section; + unless stated otherwise, you only need to use XML::LibXML; + in your programs. + + Check out XML::LibXML by Example + for a tutorial. + + For further information, please check the following documentation: + + + + + + + Parsing XML files with XML::LibXML + + + + + + + + XML::LibXML Document Object Model (DOM) Implementation + + + + + + + + XML::LibXML direct SAX parser + + + + + + + + Reading XML with a pull-parser + + + + + + + + XML::LibXML frontend for DTD validation + + + + + + + + XML::LibXML frontend for RelaxNG schema validation + + + + + + + + XML::LibXML frontend for W3C Schema schema validation + + + + + + + API for evaluating XPath expressions with enhanced support + for the evaluation context + + + + + + + + Implementing custom URI Resolver and input callbacks + + + + + + + Common functions for XML::LibXML related Classes + + + + The nodes in the Document Object Model (DOM) are represented by the following classes + (most of which "inherit" from ): + + + + + + XML::LibXML class for DOM document nodes + + + + + + + + Abstract base class for XML::LibXML DOM nodes + + + + + + + + XML::LibXML class for DOM element nodes + + + + + + + + XML::LibXML class for DOM text nodes + + + + + + + + XML::LibXML class for comment DOM nodes + + + + + + + + XML::LibXML class for DOM CDATA sections + + + + + + + + XML::LibXML DOM attribute class + + + + + + + + XML::LibXML's DOM L2 Document Fragment implementation + + + + + + + + XML::LibXML DOM namespace nodes + + + + + + + + XML::LibXML DOM processing instruction nodes + + + + + + + Encodings support in XML::LibXML + Recall that since version 5.6.1, Perl distinguishes between + character strings (internally encoded in UTF-8) and so + called binary data and, accordingly, applies either + character or byte semantics to them. A scalar + representing a character string is distinguished from + a byte string by special flag (UTF8). Please refer to perlunicode for details. + + + XML::LibXML's API is designed to deal with many + encodings of XML documents completely transparently, so + that the application using XML::LibXML can be completely + ignorant about the encoding of the XML documents it works with. + On the other hand, functions like XML::LibXML::Document->setEncoding + give the user control over the document encoding. + + + To ensure the aforementioned transparency and + uniformity, most functions of XML::LibXML that work with + in-memory trees accept and return data as character + strings (i.e. UTF-8 encoded with the UTF8 flag on) + regardless of the original document encoding; however, + the functions related to I/O operations (i.e. parsing + and saving) operate with binary data (in the original + document encoding) obeying the encoding declaration of + the XML documents. + Below we summarize basic rules and principles + regarding encoding: + + + Do NOT apply any encoding-related PerlIO layers + (:utf8 or :encoding(...)) + to file handles that are an input for the parses + or an output for a serializer of (full) XML documents. + This is because the conversion of the data to/from the internal character representation + is provided by libxml2 itself which must be able to enforce the encoding + specified by the <?xml version="1.0" encoding="..."?> + declaration. Here is an example to follow: + use XML::LibXML; +# load +open my $fh, '<', 'file.xml'; +binmode $fh; # drop all PerlIO layers possibly created by a use open pragma +$doc = XML::LibXML->load_xml(IO => $fh); + +# save +open my $out, '>', 'out.xml'; +binmode $out; # as above +$doc->toFH($out); +# or +print {$out} $doc->toString(); + + + + All functions working with DOM accept and return + character strings (UTF-8 encoded with UTF8 flag on). E.g. + new('1.0',$some_encoding); +my $element = $doc->createElement($name); +$element->appendText($text); +$xml_fragment = $element->toString(); # returns a character string +$xml_document = $doc->toString(); # returns a byte string +]]> + + where + $some_encoding is the document encoding + that will be used when saving the document, + and $name and $text + contain character strings (UTF-8 encoded with UTF8 flag on). + Note that the method toString + returns XML as a character string if applied to + other node than the Document node and + a byte string containing the appropriate + <?xml version="1.0" encoding="..."?> + declaration if applied to a . + + + + DOM methods also accept binary strings in the original encoding of the + document to which the node belongs (UTF-8 is assumed if the node is not + attached to any document). Exploiting this feature is NOT RECOMMENDED + since it is considered bad practice. + + new('1.0','iso-8859-2'); +my $text = $doc->createTextNode($some_latin2_encoded_byte_string); +# WORKS, BUT NOT RECOMMENDED! +]]> + + + + NOTE: libxml2 support for many + encodings is based on the iconv library. The actual list + of supported encodings may vary from platform to + platform. To test if your platform works correctly with + your language encoding, build a simple document in the + particular encoding and try to parse it with XML::LibXML + to see if the parser produces any errors. Occasional + crashes were reported on rare platforms that ship with a broken + version of iconv. + + + Thread Support + + XML::LibXML since 1.67 partially supports Perl threads + in Perl >= 5.8.8. XML::LibXML can be used with threads + in two ways: + + + By default, all + XML::LibXML classes use CLONE_SKIP class method + to prevent Perl from copying XML::LibXML::* objects + when a new thread is spawn. + In this mode, all XML::LibXML::* objects are thread specific. + This is the safest way + to work with XML::LibXML in threads. + + + Alternatively, one may use + + use threads; +use XML::LibXML qw(:threads_shared); + + to indicate, that + all XML::LibXML node and parser objects + should be shared between the main thread + and any thread spawn from there. + For example, in + + my $doc = XML::LibXML->load_xml(location => $filename); +my $thr = threads->new(sub{ + # code working with $doc + 1; +}); +$thr->join; + + + the variable $doc + refers to the exact same XML::LibXML::Document + in the spawned thread as in the main thread. + + + Without using mutex locks, + parallel threads may read the same document + (i.e. any node that belongs to the document), + parse files, and modify different documents. + + + However, if there is a chance that + some of the threads will attempt to modify a document + (or even create + new nodes based on that document, + e.g. with $doc->createElement) + that other threads may be reading at the same time, + the user is responsible for creating a mutex lock + and using it in both + in the thread that modifies and + the thread that reads: + + my $doc = XML::LibXML->load_xml(location => $filename); +my $mutex : shared; +my $thr = threads->new(sub{ + lock $mutex; + my $el = $doc->createElement('foo'); + # ... + 1; +}); +{ + lock $mutex; + my $root = $doc->documentElement; + say $root->name; +} +$thr->join; + +Note that libxml2 uses dictionaries to store short strings and +these dictionaries are kept on a document node. Without mutex locks, it +could happen in the previous example that the thread modifies the +dictionary while other threads attempt to read from it, which could +easily lead to a crash. + + + Version Information + + Sometimes it is useful to figure out, for which + version XML::LibXML was compiled for. In most cases this + is for debugging or to check if a given installation meets + all functionality for the package. The functions + XML::LibXML::LIBXML_DOTTED_VERSION and + XML::LibXML::LIBXML_VERSION provide this version + information. Both functions simply pass through the values + of the similar named macros of libxml2. + Similarly, XML::LibXML::LIBXML_RUNTIME_VERSION returns + the version of the (usually dynamically) linked libxml2. + + + + + XML::LibXML::LIBXML_DOTTED_VERSION + + + + $Version_String = XML::LibXML::LIBXML_DOTTED_VERSION; + + + Returns the version string of the + libxml2 version XML::LibXML was compiled + for. This will be "2.6.2" for "libxml2 + 2.6.2". + + + + + XML::LibXML::LIBXML_VERSION + + + + $Version_ID = XML::LibXML::LIBXML_VERSION; + + + Returns the version id of the libxml2 + version XML::LibXML was compiled for. This + will be "20602" for "libxml2 2.6.2". Don't mix + this version id with + $XML::LibXML::VERSION. The latter contains the + version of XML::LibXML itself while the first + contains the version of libxml2 XML::LibXML + was compiled for. + + + + XML::LibXML::LIBXML_RUNTIME_VERSION + + + + $DLL_Version = XML::LibXML::LIBXML_RUNTIME_VERSION; + + + Returns a version string of the libxml2 + which is (usually dynamically) linked by + XML::LibXML. This will be "20602" for libxml2 + released as "2.6.2" and something like + "20602-CVS2032" for a CVS build of + libxml2. + XML::LibXML issues a warning if the version + of libxml2 dynamically linked to it is less than the version of libxml2 + which it was compiled against. + + + + + + + + EXPORTS + + By default the module exports all constants and functions + listed in the :all tag, described below. + + + + EXPORT TAGS + + + :all + + Includes the tags :libxml, :encoding, and + :ns described below. + + + + :libxml + + Exports integer constants for DOM node types. + XML_ELEMENT_NODE => 1 +XML_ATTRIBUTE_NODE => 2 +XML_TEXT_NODE => 3 +XML_CDATA_SECTION_NODE => 4 +XML_ENTITY_REF_NODE => 5 +XML_ENTITY_NODE => 6 +XML_PI_NODE => 7 +XML_COMMENT_NODE => 8 +XML_DOCUMENT_NODE => 9 +XML_DOCUMENT_TYPE_NODE => 10 +XML_DOCUMENT_FRAG_NODE => 11 +XML_NOTATION_NODE => 12 +XML_HTML_DOCUMENT_NODE => 13 +XML_DTD_NODE => 14 +XML_ELEMENT_DECL => 15 +XML_ATTRIBUTE_DECL => 16 +XML_ENTITY_DECL => 17 +XML_NAMESPACE_DECL => 18 +XML_XINCLUDE_START => 19 +XML_XINCLUDE_END => 20 + + + + :encoding + + Exports two encoding conversion functions from XML::LibXML::Common. + +encodeToUTF8() +decodeFromUTF8() + + + + + :ns + + Exports two convenience constants: the implicit namespace of the + reserved xml: prefix, + and the implicit namespace for the reserved xmlns: prefix. + +XML_XML_NS => 'http://www.w3.org/XML/1998/namespace' +XML_XMLNS_NS => 'http://www.w3.org/2000/xmlns/' + + + + + + + Related Modules + + The modules described in this section are not part of the XML::LibXML package itself. As they support some additional features, they are + mentioned here. + + + + XML::LibXSLT + + + XSLT 1.0 Processor using libxslt and XML::LibXML + + + + + XML::LibXML::Iterator + + + XML::LibXML Implementation of the DOM Traversal Specification + + + + + XML::CompactTree::XS + + + Uses XML::LibXML::Reader to very efficiently to parse XML document + or element into native Perl data structures, which are less flexible but + significantly faster to process then DOM. + + + + + + + + XML::LibXML and XML::GDOME + + Note: THE FUNCTIONS DESCRIBED HERE ARE STILL EXPERIMENTAL + + Although both modules make use of libxml2's XML capabilities, the DOM implementation of both modules are not compatible. But still it is + possible to exchange nodes from one DOM to the other. The concept of this exchange is pretty similar to the function cloneNode(): The particular + node is copied on the low-level to the opposite DOM implementation. + + Since the DOM implementations cannot coexist within one document, one is forced to copy each node that should be used. Because you are always + keeping two nodes this may cause quite an impact on a machines memory usage. + + XML::LibXML provides two functions to export or import GDOME nodes: import_GDOME() and export_GDOME(). Both function have two parameters: the + node and a flag for recursive import. The flag works as in cloneNode(). + + The two functions allow one to export and import XML::GDOME nodes explicitly, however, XML::LibXML also allows the transparent import of + XML::GDOME nodes in functions such as appendChild(), insertAfter() and so on. While native nodes are automatically adopted in most functions + XML::GDOME nodes are always cloned in advance. Thus if the original node is modified after the operation, the node in the XML::LibXML document will + not have this information. + + + + import_GDOME + + + + $libxmlnode = XML::LibXML->import_GDOME( $node, $deep ); + + + This clones an XML::GDOME node to an XML::LibXML node explicitly. + + + + + export_GDOME + + + + $gdomenode = XML::LibXML->export_GDOME( $node, $deep ); + + + Allows one to clone an XML::LibXML node into an + XML::GDOME node. + + + + + + + CONTACTS + + For bug reports, please use the CPAN request tracker on http://rt.cpan.org/NoAuth/Bugs.html?Dist=XML-LibXML + For suggestions etc., and other issues + related to XML::LibXML you may use the perl XML mailing list + (perl-xml@listserv.ActiveState.com), + where most XML-related Perl modules are discussed. + In case of problems you should check the archives of that + list first. Many problems are already discussed there. You + can find the list's archives and subscription options at + http://aspn.activestate.com/ASPN/Mail/Browse/Threaded/perl-xml. + + + + + + Parsing XML Data with XML::LibXML + + XML::LibXML::Parser + + + Synopsis + + use XML::LibXML '1.70'; + + + + + Parsing + + An XML document is read into a data structure such as a DOM tree by a piece of software, called a parser. XML::LibXML currently provides four + different parser interfaces: + + + + A DOM Pull-Parser + + + + A DOM Push-Parser + + + + A SAX Parser + + + + A DOM based SAX Parser. + + + + + Creating a Parser Instance + + XML::LibXML provides an OO interface to the libxml2 parser functions. Thus you have to create a parser instance before you can parse any + XML data. + + + + new + + + + # Parser constructor + + + +$parser = XML::LibXML->new(); +$parser = XML::LibXML->new(option=>value, ...); +$parser = XML::LibXML->new({option=>value, ...}); + + + Create a new XML and HTML parser instance. + Each parser instance holds default + values for various parser options. + Optionally, + one can pass a hash reference or + a list of option => value pairs to + set a different default set of options. + Unless specified otherwise, the options + load_ext_dtd, and + expand_entities are set to 1. + See for a list of libxml2 parser's options. + + + + + + + + DOM Parser + + One of the common parser interfaces of XML::LibXML is the DOM parser. This parser reads XML data into a DOM like data structure, so each + tag can get accessed and transformed. + + XML::LibXML's DOM parser is not only capable to parse XML data, but also (strict) HTML files. There are three ways to parse + documents - as a string, as a Perl filehandle, or as a filename/URL. The return value from each is a object, which is a DOM + object. + + All of the functions listed below will throw an exception if the document is invalid. To prevent this causing your program exiting, wrap + the call in an eval{} block + + + + load_xml + + + +# Parsing XML + + + +$dom = XML::LibXML->load_xml( + location => $file_or_url + # parser options ... + ); +$dom = XML::LibXML->load_xml( + string => $xml_string + # parser options ... + ); +$dom = XML::LibXML->load_xml( + string => (\$xml_string) + # parser options ... + ); +$dom = XML::LibXML->load_xml({ + IO => $perl_file_handle + # parser options ... + ); +$dom = $parser->load_xml(...); + + + This function is available since XML::LibXML 1.70. It provides easy to use interface to the XML parser that parses + given file (or non-HTTPS URL), string, or input stream + to a DOM tree. The arguments + can be passed in a HASH reference + or as name => value pairs. + The function can be called + as a class method or an object method. + In both cases it internally creates a new + parser instance passing + the specified parser options; + if called as an object method, + it clones the original parser (preserving + its settings) and additionally applies + the specified options to the new parser. + See the constructor new + and + for more information. + + Note that, due to a limitation in the underlying libxml2 + library, this call does not recognize HTTPS-based URLs. (It + will treat an HTTPS URL as a filename, likely throwing a "No such + file or directory" exception.) + + + + + load_html + + + # Parsing HTML + + + +$dom = XML::LibXML->load_html(...); +$dom = $parser->load_html(...); + + + This function is available since XML::LibXML 1.70. It has the same usage as load_xml, + providing interface to the HTML parser. + See load_xml for more information. + + + + + + Parsing HTML may cause problems, especially if + the ampersand ('&') is used. This is a common + problem if HTML code is parsed that contains links to + CGI-scripts. Such links cause the parser to throw + errors. In such cases libxml2 still parses the entire + document as there was no error, but the error causes + XML::LibXML to stop the parsing process. However, the + document is not lost. Such HTML documents should be + parsed using the recover flag. By + default recovering is deactivated. + + The functions described above are implemented to + parse well formed documents. In some cases a program + gets well balanced XML instead of well formed + documents (e.g. an XML fragment from a database). With + XML::LibXML it is not required to wrap such fragments + in the code, because XML::LibXML is capable even to + parse well balanced XML fragments. + + + + parse_balanced_chunk + + + # Parsing well-balanced XML chunks + + + + $fragment = $parser->parse_balanced_chunk( $wbxmlstring, $encoding ); + + + This function parses a well balanced XML string into a . The first arguments contains the input string, the optional second argument can be used to specify character encoding of the input (UTF-8 is assumed by default). + + + + + parse_xml_chunk + + + This is the old name of parse_balanced_chunk(). Because it may causes confusion with the push parser interface, this function + should not be used anymore. + + + + + By default XML::LibXML does not process XInclude tags + within an XML Document (see options section below). + XML::LibXML allows one to post-process a document to expand + XInclude tags. + + + + process_xincludes + + + +# Processing XInclude + + + + $parser->process_xincludes( $doc ); + + + After a document is parsed into a DOM structure, you may want to expand the documents XInclude tags. This function processes + the given document structure and expands all XInclude tags (or throws an error) by using the flags and callbacks of the given parser + instance. + + Note that the resulting Tree contains some extra nodes (of type XML_XINCLUDE_START and XML_XINCLUDE_END) after successfully + processing the document. These nodes indicate where data was included into the original tree. if the document is serialized, these + extra nodes will not show up. + + Remember: A Document with processed XIncludes differs from the original document after serialization, because the original + XInclude tags will not get restored! + + If the parser flag "expand_xincludes" is set to 1, you need not to post process the parsed document. + + + + + processXIncludes + + + + $parser->processXIncludes( $doc ); + + + This is an alias to process_xincludes, but through a JAVA like function name. + + + + + parse_file + + + + +# Old-style parser interfaces + + + + $doc = $parser->parse_file( $xmlfilename ); + + + This function parses an XML document from a file or network; + $xmlfilename can be either a filename or a (non-HTTPS) URL. + Note that for parsing files, this function is the fastest choice, + about 6-8 times faster then parse_fh(). + + + + + + + parse_fh + + + + $doc = $parser->parse_fh( $io_fh ); + + + parse_fh() parses a IOREF or a subclass of IO::Handle. + + Because the data comes from an open handle, libxml2's parser does not know about the base URI of the document. To set the + base URI one should use parse_fh() as follows: + + my $doc = $parser->parse_fh( $io_fh, $baseuri ); + + + + + parse_string + + + + $doc = $parser->parse_string( $xmlstring); + + + This function is similar to parse_fh(), but it parses an XML document that is available as a single string in memory, or alternatively as a reference to a scalar containing a string. Again, + you can pass an optional base URI to the function. + + my $doc = $parser->parse_string( $xmlstring, $baseuri ); +my $doc = $parser->parse_string(\$xmlstring, $baseuri); + + + + + + parse_html_file + + + + $doc = $parser->parse_html_file( $htmlfile, \%opts ); + + + Similar to parse_file() but parses HTML (strict) documents; + $htmlfile can be filename or (non-HTTPS) URL. + + An optional second argument can be + used to pass some options to the HTML + parser as a HASH reference. + See options labeled with HTML in . + + + + + + parse_html_fh + + + + $doc = $parser->parse_html_fh( $io_fh, \%opts ); + + Similar to parse_fh() but parses HTML (strict) streams. + + An optional second argument can be used + to pass some options to the HTML parser + as a HASH reference. + See options labeled with HTML in . + + + Note: encoding option may + not work correctly with this function + in libxml2 < 2.6.27 if the HTML file + declares charset using a META tag. + + + + + + parse_html_string + + + + $doc = $parser->parse_html_string( $htmlstring, \%opts ); + + Similar to parse_string() but parses HTML (strict) strings. + An optional second argument can be used to pass some options to the + HTML parser as a HASH reference. + See options labeled with HTML in . + + + + + + + + Push Parser + + XML::LibXML provides a push parser interface. Rather than pulling the data from a given source the push parser waits for the data to be + pushed into it. + + This allows one to parse large documents without waiting for the parser to finish. The interface is especially useful if a program needs + to pre-process the incoming pieces of XML (e.g. to detect document boundaries). + + While XML::LibXML parse_*() functions force the data to be a well-formed XML, the push parser will take any arbitrary string that contains + some XML data. The only requirement is that all the pushed strings are together a well formed document. With the push parser interface a + program can interrupt the parsing process as required, where the parse_*() functions give not enough flexibility. + + Different to the pull parser implemented in parse_fh() or parse_file(), the push parser is not able to find out about the documents end + itself. Thus the calling program needs to indicate explicitly when the parsing is done. + + In XML::LibXML this is done by a single function: + + + + parse_chunk + + + + +# Push parser + + + + $parser->parse_chunk($string, $terminate); + + + parse_chunk() tries to parse a given chunk of data, which isn't necessarily well balanced data. The function takes two + parameters: The chunk of data as a string and optional a termination flag. If the termination flag is set to a true value (e.g. 1), + the parsing will be stopped and the resulting document will be returned as the following example describes: + + my $parser = XML::LibXML->new; +for my $string ( "<", "foo", ' bar="hello world"', "/>") { + $parser->parse_chunk( $string ); +} +my $doc = $parser->parse_chunk("", 1); # terminate the parsing + + + + + Internally XML::LibXML provides three functions that control the push parser process: + + + + init_push + + + + $parser->init_push(); + + + Initializes the push parser. + + + + + push + + + + $parser->push(@data); + + + This function pushes the data stored inside the array to libxml2's parser. Each entry in @data must be a normal scalar! This method can be called repeatedly. + + + + + finish_push + + + + $doc = $parser->finish_push( $recover ); + + + This function returns the result of the parsing process. If this function is called without a parameter it will complain about + non well-formed documents. If $restore is 1, the push parser can be used to restore broken or non well formed (XML) documents as the + following example shows: + + eval { + $parser->push( "<foo>", "bar" ); + $doc = $parser->finish_push(); # will report broken XML +}; +if ( $@ ) { + # ... +} + + This can be annoying if the closing tag is missed by accident. The following code will restore the document: + + eval { + $parser->push( "<foo>", "bar" ); + $doc = $parser->finish_push(1); # will return the data parsed + # unless an error happened +}; + +print $doc->toString(); # returns "<foo>bar</foo>" + + Of course finish_push() will return nothing if there was no data pushed to the parser before. + + + + + + + Pull Parser (Reader) + XML::LibXML also provides a pull-parser interface similar to the XmlReader interface in .NET. + This interface is almost streaming, and is usually faster and simpler to use than SAX. + See . + + + + Direct SAX Parser + XML::LibXML provides a direct SAX parser in the module. + + + + DOM based SAX Parser + + XML::LibXML also provides a DOM based SAX parser. The SAX parser is defined in + the module XML::LibXML::SAX::Parser. As it is not a stream based parser, it + parses documents into a DOM and traverses the DOM tree instead. + + The API of this parser is exactly the same as any other Perl SAX2 parser. See XML::SAX::Intro for details. + + Aside from the regular parsing methods, you can access the DOM tree traverser directly, using the generate() method: + + my $doc = build_yourself_a_document(); +my $saxparser = $XML::LibXML::SAX::Parser->new( ... ); +$parser->generate( $doc ); + + This is useful for serializing DOM trees, for example that you might have done prior processing on, or that you have as a result of XSLT + processing. + + WARNING + + This is NOT a streaming SAX parser. As I said above, this parser reads the entire document into a DOM and serialises it. Some people + couldn't read that in the paragraph above so I've added this warning. If you want a streaming SAX parser look at the man page + + + + + Serialization + + XML::LibXML provides some functions to serialize nodes and documents. The serialization functions are described on the + manpage or the manpage. XML::LibXML checks three global flags that alter the serialization process: + + + + skipXMLDeclaration + + + + skipDTD + + + + setTagCompression + + + + of that three functions only setTagCompression is available for all serialization functions. + + Because XML::LibXML does these flags not itself, one has to define them locally as the following example shows: + + local $XML::LibXML::skipXMLDeclaration = 1; +local $XML::LibXML::skipDTD = 1; +local $XML::LibXML::setTagCompression = 1; + + If skipXMLDeclaration is defined and not '0', the XML declaration is omitted during serialization. + + If skipDTD is defined and not '0', an existing DTD would not be serialized with the document. + + If setTagCompression is defined and not '0' empty tags are displayed as open and closing tags rather than the shortcut. For example + the empty tag foo will be rendered as <foo></foo> rather than <foo/>. + + + + Parser Options + + Handling of libxml2 parser options has been unified and improved in XML::LibXML 1.70. + You can now set default options for a particular parser instance by + passing them to the constructor as XML::LibXML->new({name=>value, ...}) + or XML::LibXML->new(name=>value,...). + The options can be queried and changed using the following methods (pre-1.70 interfaces such as $parser->load_ext_dtd(0) also exist, see below): + + + + + option_exists + + + +# Set/query parser options + + + + $parser->option_exists($name); + + Returns 1 if the current XML::LibXML version supports + the option $name, otherwise returns 0 (note that this does not necessarily mean that the option is supported + by the underlying libxml2 library). + + + + get_option + + + $parser->get_option($name); + + Returns the current value of the parser option $name. + + + + set_option + + + $parser->set_option($name,$value); + + Sets option $name to value $value. + + + + set_options + + + $parser->set_options({$name=>$value,...}); + + Sets multiple parsing options at once. + + + + + IMPORTANT NOTE: This documentation reflects the parser flags available in libxml2 2.7.3. + Some options have no effect if an older version of libxml2 is used. + + Each of the flags listed below is labeled + + + /parser/ + + if it can be used with a XML::LibXML + parser object (i.e. passed to XML::LibXML->new, XML::LibXML->set_option, etc.) + + + + + /html/ + + if it can be used passed to the parse_html_* methods + + + + /reader/ + + if it can be used with the XML::LibXML::Reader. + + + + + Unless specified otherwise, the default for boolean valued options is 0 (false). + + The available options are: + + + URI + + /parser, html, reader/ + In case of parsing strings or file handles, XML::LibXML doesn't know about the base uri of the document. To make relative + references such as XIncludes work, one has to set a base URI, that is then used for the parsed document. + + + + line_numbers + + /parser, html, reader/ + If this option is activated, libxml2 will store the line number of each element node in the parsed document. + The line number can be obtained using the line_number() method + of the XML::LibXML::Node class (for non-element nodes + this may report the line number of the containing element). + The line numbers are also used for reporting positions of validation errors. + + IMPORTANT: + Due to limitations in the libxml2 library line numbers greater than + 65535 will be returned as 65535. Unfortunately, this is a long and sad story, please see + http://bugzilla.gnome.org/show_bug.cgi?id=325533 for more details. + + + + + encoding + + /html/ + character encoding of the input + + + + recover + + /parser, html, reader/ + recover from errors; possible values are 0, 1, and 2 + + A true value turns on recovery mode which + allows one to parse broken XML or HTML data. + The recovery mode allows the parser to return + the successfully parsed portion of the input document. + This is useful for almost well-formed documents, where for example + a closing tag is missing somewhere. Still, + XML::LibXML will only parse until the first fatal (non-recoverable) error occurs, + reporting recoverable parsing errors as warnings. To suppress + even these warnings, use recover=>2. + Note that validation is switched off automatically in recovery mode. + + + + expand_entities + + /parser, reader/ + substitute entities; possible values are 0 and 1; default is 1 + Note that although this flag disables entity substitution, it + does not prevent the parser from loading external entities; + when substitution of an external entity is disabled, the + entity will be represented in the document tree by an XML_ENTITY_REF_NODE node + whose subtree will be the content obtained by parsing the external resource; + Although this nesting is visible from the DOM + it is transparent to XPath data model, so it is possible to + match nodes in an unexpanded entity by the same XPath expression + as if the entity were expanded. See also ext_ent_handler. + + + + + ext_ent_handler + + /parser/ + Provide a custom external entity handler + to be used when expand_entities is set to 1. + Possible value is a subroutine reference. + + This feature does not work properly in libxml2 < 2.6.27! + The subroutine provided is called whenever + the parser needs to retrieve the content of an external entity. + It is called with two arguments: the system ID (URI) and the public ID. + The value returned by the subroutine is parsed as the content of the entity. + + This method can be used to completely disable entity loading, + e.g. to prevent exploits of the type described at + , + where a service is tricked to expose its private data + by letting it parse a remote file (RSS feed) that contains an entity reference to a local + file (e.g. /etc/fstab). + + A more granular solution to this problem, however, is + provided by custom URL resolvers, as in + +my $c = XML::LibXML::InputCallback->new(); +sub match { # accept file:/ URIs except for XML catalogs in /etc/xml/ + my ($uri) = @_; + return ($uri=~m{^file:/} + and $uri !~ m{^file:///etc/xml/}) + ? 1 : 0; +} +$c->register_callbacks([ \&match, sub{}, sub{}, sub{} ]); +$parser->input_callbacks($c); + + + + + + load_ext_dtd + + /parser, reader/ + load the external DTD subset while parsing; possible values are 0 and 1. Unless specified, + XML::LibXML sets this option to 1. + This flag is also required for DTD Validation, to provide complete attribute, + and to expand entities, regardless if the document has an internal subset. + Thus switching off external DTD loading, will disable entity expansion, + validation, and complete attributes on internal subsets as well. + + + + complete_attributes + + /parser, reader/ + create default DTD attributes; possible values are 0 and 1 + + + + validation + + /parser, reader/ + validate with the DTD; possible values are 0 and 1 + + + + suppress_errors + + /parser, html, reader/ + suppress error reports; possible values are 0 and 1 + + + + suppress_warnings + + /parser, html, reader/ + suppress warning reports; possible values are 0 and 1 + + + + pedantic_parser + + /parser, html, reader/ + pedantic error reporting; possible values are 0 and 1 + + + + no_blanks + + /parser, html, reader/ + remove blank nodes; possible values are 0 and 1 + + + + no_defdtd + + /html/ + do not add a default DOCTYPE; possible values are 0 and 1 + the default is (0) to add a DTD when the input html lacks one + + + + expand_xinclude or xinclude + + /parser, reader/ + Implement XInclude substitution; possible values are 0 and 1 + Expands XInclude tags immediately while parsing the document. + Note that the parser will use the URI resolvers installed + via XML::LibXML::InputCallback to parse the included document (if any). + + + + no_xinclude_nodes + + /parser, reader/ + do not generate XINCLUDE START/END nodes; possible values are 0 and 1 + + + + no_network + + /parser, html, reader/ + Forbid network access; possible values are 0 and 1 + If set to true, all + attempts to fetch non-local resources (such as + DTD or external entities) will fail (unless + custom callbacks are defined). + It may be + necessary to use the flag recover for + processing documents requiring such resources + while networking is off. + + + + + clean_namespaces + + /parser, reader/ + remove redundant namespaces declarations during parsing; possible values are 0 and 1. + + + + + no_cdata + + /parser, html, reader/ + merge CDATA as text nodes; possible values are 0 and 1 + + + + no_basefix + + /parser, reader/ + not fixup XINCLUDE xml#base URIS; possible values are 0 and 1 + + + + huge + + /parser, html, reader/ + relax any hardcoded limit from the parser; possible values are 0 and 1. Unless specified, + XML::LibXML sets this option to 0. + Note: the default value for this option was changed to protect against denial + of service through entity expansion attacks. Before enabling the option ensure + you have taken alternative measures to protect your application against this type + of attack. + + + + gdome + + /parser/ + THIS OPTION IS EXPERIMENTAL! + Although quite powerful, XML::LibXML's DOM implementation is incomplete with respect to + the DOM level 2 or level 3 specifications. + XML::GDOME is based on libxml2 as well, and provides a rather complete DOM implementation by wrapping libgdome. + This flag allows you to make + use of XML::LibXML's full parser options and XML::GDOME's DOM implementation at the same time. + To make use of this function, one has to install libgdome and configure XML::LibXML to use this library. + For this you need to rebuild XML::LibXML! + Note: this feature was not seriously tested in recent XML::LibXML releases. + + + + For compatibility with XML::LibXML versions prior to 1.70, + the following methods are also supported for querying and setting the corresponding parser options + (if called without arguments, the methods return + the current value of the corresponding parser options; with an argument sets the option to a given value): + + $parser->validation(); +$parser->recover(); +$parser->pedantic_parser(); +$parser->line_numbers(); +$parser->load_ext_dtd(); +$parser->complete_attributes(); +$parser->expand_xinclude(); +$parser->gdome_dom(); +$parser->clean_namespaces(); +$parser->no_network(); + The following obsolete methods trigger parser options in some + special way: + + + recover_silently + + + $parser->recover_silently(1); + + If called without an argument, + returns true if the current value of the recover parser + option is 2 and returns false otherwise. + With a true argument sets the recover parser option to 2; + with a false argument sets the recover parser option to 0. + + + + + expand_entities + + + $parser->expand_entities(0); + + Get/set the expand_entities option. + If called with a true argument, also turns + the load_ext_dtd option to 1. + + + + + keep_blanks + + + $parser->keep_blanks(0); + + This is actually the opposite of the no_blanks parser option. + If used without an argument retrieves negated value of no_blanks. + If used with an argument sets no_blanks to the opposite value. + + + + + base_uri + + + + $parser->base_uri( $your_base_uri ); + + Get/set the URI option. + + + + + + XML Catalogs + libxml2 supports XML catalogs. + Catalogs are used to + map remote resources to their local copies. + Using catalogs can speed up parsing processes if + many external resources from remote addresses + are loaded into the parsed documents (such as DTDs or XIncludes). + + + Note that libxml2 has a global pool of loaded catalogs, + so if you apply the method load_catalog + to one parser instance, all parser instances will start using the catalog + (in addition to other previously loaded catalogs). + + Note also that catalogs are not used + when a custom external entity handler is specified. At the + current state it is not possible to make use of both + types of resolving systems at the same time. + + + load_catalog + + + +# XML catalogs + + + + $parser->load_catalog( $catalog_file ); + + Loads the XML catalog file $catalog_file. + +# Global external entity loader (similar to ext_ent_handler option +# but this works really globally, also in XML::LibXSLT include etc..) + +XML::LibXML::externalEntityLoader(\&my_loader); + + + + + + + Error Reporting + + XML::LibXML throws exceptions during parsing, validation or XPath processing (and some other occasions). These errors can be caught by using + eval blocks. The error is stored in $@. + There are two implementations: the old one throws $@ which is just a message string, + in the new one $@ is an object from the class XML::LibXML::Error; + this class overrides the operator "" so that when printed, + the object flattens to the usual error message. + + + XML::LibXML throws errors as they occur. This is a very common misunderstanding in the use of XML::LibXML. If the eval is omitted, XML::LibXML will always halt your script by + "croaking" (see Carp man page for details). + + Also note that an increasing number of functions throw errors if bad data is passed as arguments. If you cannot assure valid data passed to XML::LibXML you should eval + these functions. + + Note: since version 1.59, get_last_error() is no longer available in XML::LibXML for thread-safety reasons. + + + + + XML::LibXML direct SAX parser + + XML::LibXML::SAX + + + Description + + XML::LibXML provides an interface to libxml2 direct SAX interface. Through this interface it is possible to generate SAX events directly while + parsing a document. While using the SAX parser XML::LibXML will not create a DOM Document tree. + + Such an interface is useful if very large XML documents have to be processed and no DOM functions are required. By using this interface it is + possible to read data stored within an XML document directly into the application data structures without loading the document into memory. + + The SAX interface of XML::LibXML is based on the famous XML::SAX interface. It uses the generic interface as provided by XML::SAX::Base. + + Additionally to the generic functions, which are only able to process entire documents, XML::LibXML::SAX provides parse_chunk(). + This method generates SAX events from well balanced data such as is often provided by databases. + + + + Features + + NOTE: This feature is experimental. + + You can enable character data joining which may yield a + significant speed boost in your XML processing in lower markup + ratio situations by enabling the + http://xmlns.perl.org/sax/join-character-data feature of this + parser. This is done via the set_feature method like + this: + + + $p->set_feature('http://xmlns.perl.org/sax/join-character-data', 1); + + + + You can also specify a 0 to disable. The default is to have + this feature disabled. + + + + + + Building DOM trees from SAX events. + + XML::LibXML::SAX::Builder + + + Synopsis + + use XML::LibXML::SAX::Builder; +my $builder = XML::LibXML::SAX::Builder->new(); + +my $gen = XML::Generator::DBI->new(Handler => $builder, dbh => $dbh); +$gen->execute("SELECT * FROM Users"); + +my $doc = $builder->result(); + + + + Description + + This is a SAX handler that generates a DOM tree from SAX events. Usage is as above. Input is accepted from any SAX1 or SAX2 event generator. + + Building DOM trees from SAX events is quite easy with XML::LibXML::SAX::Builder. The class is designed as a SAX2 final handler not as a + filter! + + Since SAX is strictly stream oriented, you should not expect anything to return from a generator. Instead you have to ask the builder instance + directly to get the document built. XML::LibXML::SAX::Builder's result() function holds the document generated from the last SAX stream. + + + + + XML::LibXML DOM Implementation + XML::LibXML::DOM + + Description + XML::LibXML provides a lightweight interface to + modify a node of the document tree + generated by the XML::LibXML parser. This interface + follows as far as possible the DOM Level 3 + specification. In addition to the specified functions, + XML::LibXML supports some functions that are more handy to + use in the perl environment. + + One also has to remember, that XML::LibXML is an + interface to libxml2 nodes which actually reside on the + C-Level of XML::LibXML. This means each node is a + reference to a structure which is different from a perl hash or + array. The only way to access these structures' values is + through the DOM interface provided by XML::LibXML. This + also means, that one can't simply + inherit an XML::LibXML node and add new member variables as + if they were hash keys. + + The DOM interface of XML::LibXML does not intend to + implement a full DOM interface as it is done by XML::GDOME + and used for full featured application. Moreover, it + offers an simple way to build or modify documents that are + created by XML::LibXML's parser. + + Another target of the XML::LibXML interface is to + make the interfaces of libxml2 available to the perl + community. This includes also some workarounds to some + features where libxml2 assumes more control over the + C-Level that most perl users don't have. + + One of the most important parts of the XML::LibXML + DOM interface is that the interfaces try to follow the + DOM Level 3 specification rather strictly. This means the + interface functions are named as the DOM specification + says and not what widespread Java interfaces claim to be + the standard. Although there are several functions that have + only a singular interface that conforms to the DOM spec + XML::LibXML provides an additional Java style alias + interface. + + Moreover, there are some function interfaces left over + from early stages of XML::LibXML for compatibility + reasons. These interfaces are for compatibility reasons + only. They might disappear in one of + the future versions of XML::LibXML, so a user is requested + to switch over to the official functions. + + Encodings and XML::LibXML's DOM implementation + See the section on Encodings in the XML::LibXML manual page. + + + Namespaces and XML::LibXML's DOM implementation + + XML::LibXML's DOM implementation is + limited by the DOM implementation of libxml2 + which treats namespaces slightly differently than + required by the DOM Level 2 specification. + + According to the DOM Level 2 specification, + namespaces of elements and attributes should be + persistent, and nodes should be permanently bound to + namespace URIs as they get created; it should be + possible to manipulate the special attributes used for + declaring XML namespaces just as other attributes + without affecting the namespaces of other nodes. + In DOM Level 2, the application is responsible + for creating the special attributes consistently and/or for correct + serialization of the document. + + + This is both inconvenient, causes problems in serialization + of DOM to XML, and most importantly, seems almost impossible + to implement over libxml2. + + + In libxml2, namespace URI and prefix of a node is + provided by a pointer to a namespace declaration + (appearing as a special xmlns attribute in the XML + document). If the prefix or namespace URI of the + declaration changes, the prefix and namespace URI of all + nodes that point to it changes as well. Moreover, in + contrast to DOM, a node (element or attribute) can only + be bound to a namespace URI if there is some namespace + declaration in the document to point to. + + + Therefore current DOM implementation in XML::LibXML tries + to treat namespace declarations in a compromise between + reason, common sense, limitations of libxml2, and the DOM + Level 2 specification. + + In XML::LibXML, special attributes declaring XML namespaces + are often created automatically, usually when + a namespaced node is attached to a document + and no existing declaration of the namespace and prefix is in the + scope to be reused. + In this respect, + XML::LibXML DOM implementation differs from the DOM + Level 2 specification according to which special + attributes for declaring the appropriate XML namespaces + should not be added when a node with a namespace prefix + and namespace URI is created. + + + Namespace declarations are also created when + 's + createElementNS() or createAttributeNS() function are used. If the + a namespace is not declared on the documentElement, the + namespace will be locally declared for the newly created + node. In case of Attributes this may look a bit confusing, + since these nodes cannot have namespace declarations + itself. In this case the namespace is internally applied + to the attribute and later declared on the node the + attribute is appended to (if required). + The following example may explain this a bit: + my $doc = XML::LibXML->createDocument; +my $root = $doc->createElementNS( "", "foo" ); +$doc->setDocumentElement( $root ); + +my $attr = $doc->createAttributeNS( "bar", "bar:foo", "test" ); +$root->setAttributeNodeNS( $attr ); + + This piece of code will result in the following document: + + <?xml version="1.0"?> +<foo xmlns:bar="bar" bar:foo="test"/> + + The namespace is declared on the document element + during the setAttributeNodeNS() call. + + Namespaces can be also declared explicitly by the use of XML::LibXML::Element's setNamespace() function. + Since 1.61, they can also be manipulated with functions + setNamespaceDeclPrefix() and setNamespaceDeclURI() (not available in DOM). + Changing an URI or prefix of an existing namespace declaration + affects the namespace URI and prefix of all nodes which point to it + (that is the nodes in its scope). + + It is also important to repeat the specification: + While working with namespaces you should use the namespace + aware functions instead of the simplified versions. For + example you should never use + setAttribute() but setAttributeNS(). + + + + + + XML::LibXML DOM Document Class + + XML::LibXML::Document + + Synopsis + use XML::LibXML; +# Only methods specific to Document nodes are listed here, +# see the XML::LibXML::Node manpage for other methods + + + Description + The Document Class is in most cases the result of a parsing process. But sometimes it is necessary to create a Document from scratch. The DOM + Document Class provides functions that conform to the DOM Core naming style. + + It inherits all functions from as specified in the DOM specification. This enables access to the nodes + besides the root element on document level - a DTD for example. The support for these nodes is limited at the moment. + + While generally nodes are bound to a document in the DOM concept it is suggested that one should always create a node not bound to any document. + There is no need of really including the node to the document, but once the node is bound to a document, it is quite safe that all strings have the + correct encoding. If an unbound text node with an ISO encoded string is created (e.g. with $CLASS->new()), the toString function + may not return the expected result. + + To prevent such problems, it is recommended to pass all data to XML::LibXML methods + as character strings (i.e. UTF-8 encoded, with the UTF8 flag on). + + + Methods + + Many functions listed here are + extensively documented in the DOM Level 3 specification. Please refer to + the specification for extensive documentation. + + + + new + + + $dom = XML::LibXML::Document->new( $version, $encoding ); + + + alias for createDocument() + + + + + createDocument + + + + $dom = XML::LibXML::Document->createDocument( $version, $encoding ); + + + The constructor for the document class. As Parameter it takes the version string and (optionally) the encoding string. Simply calling + createDocument() will create the document: + + <?xml version="your version" encoding="your encoding"?> + + Both parameter are optional. The default value for $version is 1.0, of course. If the + $encoding parameter is not set, the encoding will be left unset, which means UTF-8 is implied. + + The call of createDocument() without any parameter will result the following code: + + <?xml version="1.0"?> + + Alternatively one can call this constructor directly from the XML::LibXML class level, to avoid some typing. This will not have any + effect on the class instance, which is always XML::LibXML::Document. + + my $document = XML::LibXML->createDocument( "1.0", "UTF-8" ); + + is therefore a shortcut for + + my $document = XML::LibXML::Document->createDocument( "1.0", "UTF-8" ); + + + + URI + + + + $strURI = $doc->URI(); + + + Returns the URI (or filename) of the original document. + For documents obtained by parsing a string of a FH + without using the URI parsing argument of the corresponding parse_* function, + the result is a generated string unknown-XYZ where XYZ is some number; + for documents created with the constructor new, + the URI is undefined. + + The value can be modified by calling setURI + method on the document node. + + + + setURI + + + + $doc->setURI($strURI); + + + Sets the URI of the document reported by the method URI + (see also the URI argument to the various parse_* functions). + + + + + + encoding + + + + $strEncoding = $doc->encoding(); + + + returns the encoding string of the document. + + my $doc = XML::LibXML->createDocument( "1.0", "ISO-8859-15" ); +print $doc->encoding; # prints ISO-8859-15 + + + + + actualEncoding + + + + $strEncoding = $doc->actualEncoding(); + + + returns the encoding in which the XML will be returned by $doc->toString(). + This is usually the original encoding of the document as declared + in the XML declaration and returned by $doc->encoding. + If the original encoding is not known (e.g. if created in memory or parsed from a + XML without a declared encoding), 'UTF-8' is returned. + + + my $doc = XML::LibXML->createDocument( "1.0", "ISO-8859-15" ); +print $doc->encoding; # prints ISO-8859-15 + + + + + + setEncoding + + + + $doc->setEncoding($new_encoding); + + + This method allows one to change the declaration of + encoding in the XML declaration of the document. + The value also affects the encoding in which the + document is serialized to XML by $doc->toString(). + Use setEncoding() to remove the encoding declaration. + + + + + + version + + + + $strVersion = $doc->version(); + + + returns the version string of the document + + getVersion() is an alternative form of this function. + + + + + standalone + + + + $doc->standalone + + + This function returns the Numerical value of a documents XML declarations standalone attribute. It returns 1 if + standalone="yes" was found, 0 if standalone="no" was found and -1 if standalone + was not specified (default on creation). + + + + + setStandalone + + + + $doc->setStandalone($numvalue); + + + Through this method it is possible to alter the value of a documents standalone attribute. Set it to 1 to set + standalone="yes", to 0 to set standalone="no" or set it to -1 to remove the + standalone attribute from the XML declaration. + + + + + compression + + + + my $compression = $doc->compression; + + + libxml2 allows reading of documents directly from gzipped files. In this case the compression variable is set to the compression level + of that file (0-8). If XML::LibXML parsed a different source or the file wasn't compressed, the returned value will be + -1. + + + + + setCompression + + + + $doc->setCompression($ziplevel); + + + If one intends to write the document directly to a file, it is possible to set the compression level for a given document. This level + can be in the range from 0 to 8. If XML::LibXML should not try to compress use -1 (default). + + Note that this feature will only work if libxml2 is compiled with zlib support and toFile() is used for output. + + + + + toString + + + + $docstring = $dom->toString($format); + + + toString is a DOM serializing function, + so the DOM Tree is serialized into an XML string, ready for output. + IMPORTANT: unlike toString for other nodes, on document nodes + this function returns the XML as a byte string in the original encoding of the + document (see the actualEncoding() method)! This means you + can simply do: + + open my $out_fh, '>', $file; +print {$out_fh} $doc->toString; + regardless of the actual encoding of the document. + See the section on encodings in for more details. + The optional $format parameter sets the indenting of the output. This parameter is expected to be an + integer value, that specifies that indentation should be used. The format parameter can have three different values if + it is used: + + If $format is 0, than the document is dumped as it was originally parsed + + If $format is 1, libxml2 will add ignorable white spaces, so the nodes content is easier to read. Existing text nodes will not be + altered + + If $format is 2 (or higher), libxml2 will act as $format == 1 but it add a leading and a trailing line break to each text node. + + libxml2 uses a hard-coded indentation of 2 space characters per indentation level. This value can not be altered on run-time. + + + + + toStringC14N + + + $c14nstr = $doc->toStringC14N($comment_flag, $xpath [, $xpath_context ]); + See the documentation in . + + + + + toStringEC14N + + + $ec14nstr = $doc->toStringEC14N($comment_flag, $xpath [, $xpath_context ], $inclusive_prefix_list); + See the documentation in . + + + + + serialize + + + + $str = $doc->serialize($format); + + + An alias for toString(). This function was name added to be more consistent + with libxml2. + + + + + serialize_c14n + + + An alias for toStringC14N(). + + + + serialize_exc_c14n + + + An alias for toStringEC14N(). + + + + + toFile + + + + $state = $doc->toFile($filename, $format); + + + This function is similar to toString(), but it writes the document directly into a filesystem. This function is very useful, if one + needs to store large documents. + + The format parameter has the same behaviour as in toString(). + + + + + toFH + + + + $state = $doc->toFH($fh, $format); + + + This function is similar to toString(), but it writes the document directly to a filehandle or a stream. A byte stream in the document encoding is passed to the file handle. Do NOT apply any :encoding(...) or :utf8 PerlIO layer to + the filehandle! See the section on encodings in for more details. + + The format parameter has the same behaviour as in toString(). + + + + + toStringHTML + + + + $str = $document->toStringHTML(); + + + toStringHTML serialize the tree to a byte string in the document encoding as HTML. With this method indenting is automatic and managed by + libxml2 internally. + + + + + serialize_html + + + + $str = $document->serialize_html(); + + + An alias for toStringHTML(). + + + + + is_valid + + + + $bool = $dom->is_valid(); + + + Returns either TRUE or FALSE depending on whether the DOM Tree is a valid Document or not. + + You may also pass in a object, to validate against an external DTD: + + if (!$dom->is_valid($dtd)) { + warn("document is not valid!"); + } + + + + + validate + + + + $dom->validate(); + + + This is an exception throwing equivalent of is_valid. If the document is not valid it will throw an exception containing the error. + This allows you much better error reporting than simply is_valid or not. + + Again, you may pass in a DTD object + + + + + documentElement + + + + $root = $dom->documentElement(); + + + Returns the root element of the Document. A document can have just one root element to contain the documents data. + + Optionally one can use getDocumentElement. + + + + + setDocumentElement + + + + $dom->setDocumentElement( $root ); + + + This function enables you to set the root element for a document. The function supports the import of a node from a different document + tree, but does not support a document fragment as $root. + + + + + createElement + + + + $element = $dom->createElement( $nodename ); + + + This function creates a new Element Node bound to the DOM with the name $nodename. + + + + + createElementNS + + + + $element = $dom->createElementNS( $namespaceURI, $nodename ); + + + This function creates a new Element Node bound to the DOM with the name $nodename and placed in the given + namespace. + + + + + createTextNode + + + + $text = $dom->createTextNode( $content_text ); + + + As an equivalent of createElement, but it creates a Text Node bound to the DOM. + + + + + createComment + + + + $comment = $dom->createComment( $comment_text ); + + + As an equivalent of createElement, but it creates a Comment Node bound to the DOM. + + + + + createAttribute + + + + $attrnode = $doc->createAttribute($name [,$value]); + + + Creates a new Attribute node. + + + + + createAttributeNS + + + + $attrnode = $doc->createAttributeNS( namespaceURI, $name [,$value] ); + + + Creates an Attribute bound to a namespace. + + + + + createDocumentFragment + + + + $fragment = $doc->createDocumentFragment(); + + + This function creates a DocumentFragment. + + + + + createCDATASection + + + + $cdata = $dom->createCDATASection( $cdata_content ); + + + Similar to createTextNode and createComment, this function creates a CDataSection bound to the current DOM. + + + + + createProcessingInstruction + + + + my $pi = $doc->createProcessingInstruction( $target, $data ); + + + create a processing instruction node. + + Since this method is quite long one may use its short form createPI(). + + + + + createEntityReference + + + + my $entref = $doc->createEntityReference($refname); + + + If a document has a DTD specified, one can create entity references by using this function. If one wants to add a entity reference to + the document, this reference has to be created by this function. + + An entity reference is unique to a document and cannot be passed to other documents as other nodes can be passed. + + NOTE: A text content containing something that looks like an entity reference, will not be expanded to a real + entity reference unless it is a predefined entity + + my $string = "&foo;"; + $some_element->appendText( $string ); + print $some_element->textContent; # prints "&amp;foo;" + + + + + createInternalSubset + + + + $dtd = $document->createInternalSubset( $rootnode, $public, $system); + + + This function creates and adds an internal subset to the given document. Because the function automatically adds the DTD to the document + there is no need to add the created node explicitly to the document. + + my $document = XML::LibXML::Document->new(); + my $dtd = $document->createInternalSubset( "foo", undef, "foo.dtd" ); + + will result in the following XML document: + + <?xml version="1.0"?> + <!DOCTYPE foo SYSTEM "foo.dtd"> + + By setting the public parameter it is possible to set PUBLIC DTDs to a given document. So + + my $document = XML::LibXML::Document->new(); +my $dtd = $document->createInternalSubset( "foo", "-//FOO//DTD FOO 0.1//EN", undef ); + + + will cause the following declaration to be created on the document: + + <?xml version="1.0"?> +<!DOCTYPE foo PUBLIC "-//FOO//DTD FOO 0.1//EN"> + + + + + createExternalSubset + + + + $dtd = $document->createExternalSubset( $rootnode_name, $publicId, $systemId); + + + This function is similar to createInternalSubset() but this DTD is considered to be external and is therefore not + added to the document itself. Nevertheless it can be used for validation purposes. + + + + + importNode + + + + $document->importNode( $node ); + + + If a node is not part of a document, it can be imported to another document. As specified in DOM Level 2 Specification the Node will + not be altered or removed from its original document ($node->cloneNode(1) will get called implicitly). + + NOTE: Don't try to use importNode() to import sub-trees that contain an entity reference - even if the entity + reference is the root node of the sub-tree. This will cause serious problems to your program. This is a limitation of libxml2 and not of + XML::LibXML itself. + + + + + adoptNode + + + + $document->adoptNode( $node ); + + + If a node is not part of a document, it can be imported to another document. As specified in DOM Level 3 Specification the Node will + not be altered but it will removed from its original document. + + After a document adopted a node, the node, its attributes and all its descendants belong to the new document. Because the node does + not belong to the old document, it will be unlinked from its old location first. + + NOTE: Don't try to adoptNode() to import sub-trees that contain entity references - even if the entity + reference is the root node of the sub-tree. This will cause serious problems to your program. This is a limitation of libxml2 and not of + XML::LibXML itself. + + + + + externalSubset + + + + my $dtd = $doc->externalSubset; + + + If a document has an external subset defined it will be returned by this function. + + NOTE Dtd nodes are no ordinary nodes in libxml2. The support for these nodes in XML::LibXML is still limited. In + particular one may not want use common node function on doctype declaration nodes! + + + + + internalSubset + + + + my $dtd = $doc->internalSubset; + + + If a document has an internal subset defined it will be returned by this function. + + NOTE Dtd nodes are no ordinary nodes in libxml2. The support for these nodes in XML::LibXML is still limited. In + particular one may not want use common node function on doctype declaration nodes! + + + + + setExternalSubset + + + + $doc->setExternalSubset($dtd); + + + EXPERIMENTAL! + + This method sets a DTD node as an external subset of the given document. + + + + + setInternalSubset + + + + $doc->setInternalSubset($dtd); + + + EXPERIMENTAL! + + This method sets a DTD node as an internal subset of the given document. + + + + + removeExternalSubset + + + + my $dtd = $doc->removeExternalSubset(); + + + EXPERIMENTAL! + + If a document has an external subset defined it can be removed from the document by using this function. The removed dtd node will be + returned. + + + + + removeInternalSubset + + + + my $dtd = $doc->removeInternalSubset(); + + + EXPERIMENTAL! + + If a document has an internal subset defined it can be removed from the document by using this function. The removed dtd node will be + returned. + + + + + getElementsByTagName + + + + my @nodelist = $doc->getElementsByTagName($tagname); + + + Implements the DOM Level 2 function + + In SCALAR context this function returns an XML::LibXML::NodeList object. + + + + + getElementsByTagNameNS + + + + my @nodelist = $doc->getElementsByTagNameNS($nsURI,$tagname); + + + Implements the DOM Level 2 function + + In SCALAR context this function returns an XML::LibXML::NodeList object. + + + + + getElementsByLocalName + + + + my @nodelist = $doc->getElementsByLocalName($localname); + + + This allows the fetching of all nodes from a given document with the given Localname. + + In SCALAR context this function returns an XML::LibXML::NodeList object. + + + + + getElementById + + + + my $node = $doc->getElementById($id); + + Returns the element that has an ID attribute + with the given value. If no such element exists, + this returns undef. + Note: the ID of an element + may change while manipulating the document. + For documents with a DTD, the information about ID attributes + is only available if DTD loading/validation has been requested. + For HTML documents parsed with the HTML + parser ID detection is done + automatically. In XML documents, all "xml:id" + attributes are considered to be of type ID. + You can test ID-ness of an attribute node + with $attr->isId(). + + In versions 1.59 and earlier this method was + called getElementsById() (plural) by + mistake. Starting from 1.60 this name is + maintained as an alias only for backward compatibility. + + + + + + indexElements + + + + $dom->indexElements(); + + + This function causes libxml2 to stamp all elements in a document with their document position index which considerably speeds up XPath + queries for large documents. It should only be used with static documents that won't be further changed by any DOM methods, because once + a document is indexed, XPath will always prefer the index to other methods of determining the document order of nodes. XPath could therefore + return improperly ordered node-lists when applied on a document that has been changed after being indexed. It is of course possible to use + this method to re-index a modified document before using it with XPath again. This function is not a part of the DOM specification. + + This function returns number of elements indexed, -1 if error occurred, or -2 if this feature is not available in the running libxml2. + + + + + + + + Abstract Base Class of XML::LibXML Nodes + + XML::LibXML::Node + + Synopsis + use XML::LibXML; + + + Description + + XML::LibXML::Node defines functions that are common to + all Node Types. An XML::LibXML::Node should never be created + standalone, but as an instance of a high level class such as + XML::LibXML::Element or XML::LibXML::Text. The class itself should + provide only common functionality. In XML::LibXML each node is + part either of a document or a document-fragment. Because of + this there is no node without a parent. This may causes + confusion with "unbound" nodes. + + + Methods + + Many functions listed here are + extensively documented in the DOM Level 3 specification. Please refer to + the specification for extensive documentation. + + + + + nodeName + + + + $name = $node->nodeName; + + + Returns the node's name. This function is + aware of namespaces and returns the full name of + the current node (prefix:localname). + + Since 1.62 this function also returns the correct + DOM names for node types with constant names, namely: + #text, #cdata-section, #comment, #document, + #document-fragment. + + + + + + setNodeName + + + + $node->setNodeName( $newName ); + + + In very limited situations, it is useful to change a nodes name. In the DOM specification this should throw an error. This Function is + aware of namespaces. + + + + + + + isSameNode + + + + $bool = $node->isSameNode( $other_node ); + + + returns TRUE (1) if the given nodes refer to + the same node structure, otherwise FALSE (0) is + returned. + + + + + + + isEqual + + + + $bool = $node->isEqual( $other_node ); + + + deprecated version of isSameNode(). + + NOTE isEqual will change behaviour to follow the DOM specification + + + + + + + unique_key + + + + $num = $node->unique_key; + + + This function is not specified for any DOM level. It returns a key guaranteed to be unique for this node, and to always be the same value for this node. In other words, two node objects return the same key if and only if isSameNode indicates that they are the same node. + + The returned key value is useful as a key in hashes. + + + + + + + nodeValue + + + + $content = $node->nodeValue; + + + If the node has any content (such as stored in a text node) it can get requested through this function. + + NOTE: Element Nodes have no content per definition. To get the text value of an Element use textContent() + instead! + + + + + + + textContent + + + + $content = $node->textContent; + + + this function returns the content of all text nodes in the descendants of the given node as specified in DOM. + + + + + + + nodeType + + + + $type = $node->nodeType; + + + Return a numeric value representing the node type of this node. + The module XML::LibXML by default exports constants + for the node types (see the EXPORT section in the + manual page). + + + + + unbindNode + + + + $node->unbindNode(); + + + Unbinds the Node from its siblings and Parent, but not from the Document it belongs to. If the node is not inserted into the DOM + afterwards, it will be lost after the program terminates. From a low level view, the unbound node is stripped from the context it is and + inserted into a (hidden) document-fragment. + + + + + + + removeChild + + + + $childnode = $node->removeChild( $childnode ); + + + This will unbind the Child Node from its parent $node. The function returns the unbound node. If + $childnode is not a child of the given Node the function will fail. + + + + + + + replaceChild + + + + $oldnode = $node->replaceChild( $newNode, $oldNode ); + + + Replaces the $oldNode with the $newNode. The $oldNode will be unbound + from the Node. This function differs from the DOM L2 specification, in the case, if the new node is not part of the document, the node will + be imported first. + + + + + + + replaceNode + + + + $node->replaceNode($newNode); + + + This function is very similar to replaceChild(), but it replaces the node itself rather than a childnode. This is useful if a node + found by any XPath function, should be replaced. + + + + + + + appendChild + + + + $childnode = $node->appendChild( $childnode ); + + + The function will add the $childnode to the end of $node's children. The function should + fail, if the new childnode is already a child of $node. This function differs from the DOM L2 specification, in the + case, if the new node is not part of the document, the node will be imported first. + + + + + + + addChild + + + + $childnode = $node->addChild( $childnode ); + + + As an alternative to appendChild() one can use the addChild() function. This function is a bit faster, because it avoids all DOM + conformity checks. Therefore this function is quite useful if one builds XML documents in memory where the order and ownership (ownerDocument) + is assured. + + addChild() uses libxml2's own xmlAddChild() function. Thus it has to be used with extra care: If a text node is added to a node + and the node itself or its last childnode is as well a text node, the node to add will be merged with the one already available. The current + node will be removed from memory after this action. Because perl is not aware of this action, the perl instance is still available. + XML::LibXML will catch the loss of a node and refuse to run any function called on that node. + + my $t1 = $doc->createTextNode( "foo" ); + my $t2 = $doc->createTextNode( "bar" ); + $t1->addChild( $t2 ); # is OK + my $val = $t2->nodeValue(); # will fail, script dies + + Also addChild() will not check if the added node belongs to the same document as the node it will be added to. This could lead to + inconsistent documents and in more worse cases even to memory violations, if one does not keep track of this issue. + + Although this sounds like a lot of trouble, addChild() is useful if a document is built from a stream, such as happens sometimes in + SAX handlers or filters. + + If you are not sure about the source of your nodes, you better stay with appendChild(), because this function is more user friendly in + the sense of being more error tolerant. + + + + + + + addNewChild + + + + $node = $parent->addNewChild( $nsURI, $name ); + + + Similar to addChild(), this function uses low level libxml2 functionality to provide faster interface for DOM + building. addNewChild() uses xmlNewChild() to create a new node on a given parent element. + + addNewChild() has two parameters $nsURI and $name, where $nsURI is an (optional) namespace URI. $name is the fully qualified element + name; addNewChild() will determine the correct prefix if necessary. + + The function returns the newly created node. + + This function is very useful for DOM building, where a created node can be directly associated with its parent. NOTE + this function is not part of the DOM specification and its use will limit your code to XML::LibXML. + + + + + + + addSibling + + + + $node->addSibling($newNode); + + + addSibling() allows adding an additional node to the end of a nodelist, defined by the given node. + + + + + + + cloneNode + + + + $newnode =$node->cloneNode( $deep ); + + + cloneNode creates a + copy of $node. When $deep is + set to 1 (true) the function will copy all + child nodes as well. If $deep is 0 only the current + node will be copied. Note that in case of element, + attributes are copied even if $deep is 0. + + Note that the behavior of this function for $deep=0 + has changed in 1.62 in order to be consistent with the DOM spec + (in older versions attributes and namespace information + was not copied for elements). + + + + + + + parentNode + + + + $parentnode = $node->parentNode; + + + Returns simply the Parent Node of the current node. + + + + + + + nextSibling + + + + $nextnode = $node->nextSibling(); + + + Returns the next sibling if any . + + + + + nextNonBlankSibling + + + + $nextnode = $node->nextNonBlankSibling(); + + + Returns the next non-blank sibling if any (a node is blank if it is a Text or CDATA node consisting of whitespace only). This method is not defined by DOM. + + + + + + + previousSibling + + + + $prevnode = $node->previousSibling(); + + + Analogous to getNextSibling the function returns the previous sibling if any. + + + + + previousNonBlankSibling + + + + $prevnode = $node->previousNonBlankSibling(); + + + Returns the previous non-blank sibling if any (a node is blank if it is a Text or CDATA node consisting of whitespace only). This method is not defined by DOM. + + + + + + + hasChildNodes + + + + $boolean = $node->hasChildNodes(); + + + If the current node has child nodes this function returns TRUE (1), otherwise it returns FALSE (0, not undef). + + + + + + + firstChild + + + + $childnode = $node->firstChild; + + + If a node has child nodes this function will return the first node in the child list. + + + + + + + lastChild + + + + $childnode = $node->lastChild; + + + If the $node has child nodes this function returns the last child node. + + + + + + + ownerDocument + + + + $documentnode = $node->ownerDocument; + + + Through this function it is always possible to access the document the current node is bound to. + + + + + + + getOwner + + + + $node = $node->getOwner; + + + This function returns the node the current node is associated with. In most cases this will be a document node or a document fragment + node. + + + + + + + setOwnerDocument + + + + $node->setOwnerDocument( $doc ); + + + This function binds a node to another DOM. This method unbinds the node first, if it is already bound to another document. + + This function is the opposite calling of 's adoptNode() function. Because of this it has the same limitations + with Entity References as adoptNode(). + + + + + + + insertBefore + + + + $node->insertBefore( $newNode, $refNode ); + + + The method inserts $newNode before $refNode. If $refNode is undefined, + the newNode will be set as the new last child of the parent node. This function differs from the DOM L2 specification, in the case, if the + new node is not part of the document, the node will be imported first, automatically. + + $refNode has to be passed to the function even if it is undefined: + + $node->insertBefore( $newNode, undef ); # the same as $node->appendChild( $newNode ); + $node->insertBefore( $newNode ); # wrong + + Note, that the reference node has to be a direct child of the node the function is called on. Also, $newChild is not allowed to be an + ancestor of the new parent node. + + + + + + + insertAfter + + + + $node->insertAfter( $newNode, $refNode ); + + + The method inserts $newNode after $refNode. If $refNode is undefined, + the newNode will be set as the new last child of the parent node. + + Note, that $refNode has to be passed explicitly even if it is undef. + + + + + findnodes + + + + @nodes = $node->findnodes( $xpath_expression ); + + + findnodes evaluates the xpath expression (XPath 1.0) on the current node and returns the resulting node set as an array. In scalar context, returns an XML::LibXML::NodeList object. + The xpath expression can be passed either as a string, or + as a XML::LibXML::XPathExpression object. + + NOTE ON NAMESPACES AND XPATH: + A common mistake about + XPath is to assume that node tests consisting of an + element name with no prefix match elements in the default + namespace. This assumption is wrong - by XPath + specification, such node tests can only match elements + that are in no (i.e. null) namespace. + + + So, for example, one cannot match the root element of an + XHTML document with $node->find('/html') + since '/html' would only match if the + root element <html> had no + namespace, but all XHTML elements belong to the namespace + http://www.w3.org/1999/xhtml. (Note that + xmlns="..." namespace declarations can + also be specified in a DTD, which makes the situation even worse, since + the XML document looks as if there was no default namespace). + + There are several possible ways to deal with namespaces in XPath: + + + + + The recommended way is to use the + module + to define an explicit context + for XPath evaluation, in which a document independent + prefix-to-namespace mapping can be defined. For + example: + + my $xpc = XML::LibXML::XPathContext->new; +$xpc->registerNs('x', 'http://www.w3.org/1999/xhtml'); +$xpc->find('/x:html',$node); + + + Another possibility is to use prefixes declared + in the queried document (if known). + If the document declares a prefix for the + namespace in question (and the context node is in the + scope of the declaration), + XML::LibXML allows you to use the + prefix in the XPath expression, e.g.: + + $node->find('/x:html'); + + + See also XML::LibXML::XPathContext->findnodes. + + + + + + + find + + + + $result = $node->find( $xpath ); + + + find evaluates the XPath 1.0 expression using the current node as the context of the expression, and returns the + result depending on what type of result the XPath expression had. For example, the XPath "1 * 3 + 52" results in a + XML::LibXML::Number object being returned. Other expressions might return an XML::LibXML::Boolean + object, or an XML::LibXML::Literal object (a string). Each of those objects uses Perl's overload feature to "do + the right thing" in different contexts. + The xpath expression can be passed either as a string, + or as a XML::LibXML::XPathExpression object. + + See also ->find. + + + + + + + findvalue + + + + print $node->findvalue( $xpath ); + + + findvalue is exactly equivalent to: + + $node->find( $xpath )->to_literal; + + That is, it returns the literal value of the results. This enables you to ensure that you get a string back from your search, allowing + certain shortcuts. This could be used as the equivalent of XSLT's <xsl:value-of select="some_xpath"/>. + See also ->findvalue. + The xpath expression can be passed either as a string, or + as a XML::LibXML::XPathExpression object. + + + + + + exists + + + + $bool = $node->exists( $xpath_expression ); + + This method behaves like findnodes, except + that it only returns a boolean value (1 if the expression matches a node, 0 otherwise) + and may be faster than findnodes, because + the XPath evaluation may stop early on the first match (this is true for libxml2 >= 2.6.27). + For XPath expressions that do not return node-set, + the method returns true if the returned value is a non-zero number or a non-empty string. + + + + + childNodes + + + + @childnodes = $node->childNodes(); + + + childNodes implements a more intuitive interface to the childnodes of the current node. It enables you to pass + all children directly to a map or grep. If this function is called in scalar context, a + XML::LibXML::NodeList object will be returned. + + + + + nonBlankChildNodes + + + + @childnodes = $node->nonBlankChildNodes(); + + + This is like childNodes, + but returns only non-blank nodes + (where a node is blank if it is a Text or CDATA node consisting of whitespace only). This method is not defined by DOM. + + + + + toString + + + + $xmlstring = $node->toString($format,$docencoding); + + + This method is similar to the method toString of a but for a single node. It returns a string consisting of XML serialization of the given node and all its descendants. Unlike XML::LibXML::Document::toString, in this case the resulting string is by default a character string (UTF-8 encoded with UTF8 flag on). An optional flag $format controls indentation, as in XML::LibXML::Document::toString. If the second optional $docencoding flag is true, the result will be a byte string in the document encoding (see XML::LibXML::Document::actualEncoding). + + + + + toStringC14N + + + + $c14nstring = $node->toStringC14N(); +$c14nstring = $node->toStringC14N($with_comments, $xpath_expression , $xpath_context); + + + The function is similar to + toString(). Instead of simply serializing the + document tree, it transforms it as it is specified + in the XML-C14N Specification + (see http://www.w3.org/TR/xml-c14n). + Such transformation is known as + canonization. + + If $with_comments is 0 or not defined, the + result-document will not contain any comments that + exist in the original document. To include + comments into the canonized document, + $with_comments has to be set to 1. + + The parameter $xpath_expression defines the + nodeset of nodes that should be visible in the + resulting document. This can be used to filter out + some nodes. One has to note, that only the nodes + that are part of the nodeset, will be included + into the result-document. Their child-nodes will + not exist in the resulting document, unless they + are part of the nodeset defined by the xpath + expression. + + If $xpath_expression is omitted or empty, + toStringC14N() will include all nodes in the given + sub-tree, using the following XPath expressions: + with comments + (. | .//node() | .//@* | .//namespace::*) + and without comments + (. | .//node() | .//@* | .//namespace::*)[not(self::comment())] + + + An optional parameter $xpath_context can be used + to pass an object defining + the context for evaluation of $xpath_expression. + This is useful for mapping namespace prefixes used in the XPath expression + to namespace URIs. + Note, however, that + $node will be used as the context node for the evaluation, not + the context node of $xpath_context! + + + + + toStringC14N_v1_1 + + + + $c14nstring = $node->toStringC14N_v1_1(); +$c14nstring = $node->toStringC14N_v1_1($with_comments, $xpath_expression , $xpath_context); + + + This function behaves like toStringC14N() except that + it uses the "XML_C14N_1_1" constant for + canonicalising using the "C14N 1.1 spec". + + + + + toStringEC14N + + + + $ec14nstring = $node->toStringEC14N(); +$ec14nstring = $node->toStringEC14N($with_comments, $xpath_expression, $inclusive_prefix_list); +$ec14nstring = $node->toStringEC14N($with_comments, $xpath_expression, $xpath_context, $inclusive_prefix_list); + + + The function is similar to toStringC14N() but follows + the XML-EXC-C14N Specification (see http://www.w3.org/TR/xml-exc-c14n) + for exclusive canonization of XML. + + The arguments $with_comments, $xpath_expression, $xpath_context are as in toStringC14N(). + An ARRAY reference can be passed as the last argument $inclusive_prefix_list, + listing namespace prefixes that are to be handled in the manner described by the Canonical XML Recommendation (i.e. preserved in the output even if the namespace is not used). C.f. the spec for details. + + + + + + serialize + + + + $str = $doc->serialize($format); + + An alias for toString(). This function was name added to be more consistent + with libxml2. + + + + + serialize_c14n + + + An alias for toStringC14N(). + + + + serialize_exc_c14n + + + An alias for toStringEC14N(). + + + + + + + localname + + + + $localname = $node->localname; + + + Returns the local name of a tag. This is the part behind the colon. + + + + + + + prefix + + + + $nameprefix = $node->prefix; + + + Returns the prefix of a tag. This is the part before the colon. + + + + + + + namespaceURI + + + + $uri = $node->namespaceURI(); + + + returns the URI of the current namespace. + + + + + + + hasAttributes + + + + $boolean = $node->hasAttributes(); + + + returns 1 (TRUE) if the current node has any attributes set, otherwise 0 (FALSE) is returned. + + + + + + + attributes + + + + @attributelist = $node->attributes(); + + + This function returns all attributes and namespace declarations assigned to the given node. + + Because XML::LibXML does not implement namespace declarations and attributes the same way, it is required to test what kind of node is + handled while accessing the functions result. + + If this function is called in array context the attribute nodes are returned as an array. In scalar context, the function will return a + XML::LibXML::NamedNodeMap object. + + + + + + + lookupNamespaceURI + + + + $URI = $node->lookupNamespaceURI( $prefix ); + + + Find a namespace URI by its prefix starting at the current node. + + + + + + + lookupNamespacePrefix + + + + $prefix = $node->lookupNamespacePrefix( $URI ); + + + Find a namespace prefix by its URI starting at the current node. + + NOTE Only the namespace URIs are meant to be unique. The prefix is only document related. Also the document might + have more than a single prefix defined for a namespace. + + + + + + normalize + + + + $node->normalize; + + + This function normalizes adjacent text nodes. This function is not as strict as libxml2's xmlTextMerge() function, since it will + not free a node that is still referenced by the perl layer. + + + + + + + getNamespaces + + + + @nslist = $node->getNamespaces; + + + If a node has any namespaces defined, this function will return these namespaces. Note, that this will not return all namespaces that + are in scope, but only the ones declared explicitly for that node. + + Although getNamespaces is available for all nodes, it only makes sense if used with element nodes. + + + + + + + removeChildNodes + + + + $node->removeChildNodes(); + + + This function is not specified for any DOM level: It removes all childnodes from a node in a single step. Other than the libxml2 + function itself (xmlFreeNodeList), this function will not immediately remove the nodes from the memory. This saves one from getting memory + violations, if there are nodes still referred to from the Perl level. + + + + + baseURI () + + + $strURI = $node->baseURI(); + + + Searches for the base URL of the node. The method should work on both XML + and HTML documents even if base mechanisms for these are completely different. + It returns the base as defined in RFC 2396 sections + "5.1.1. Base URI within Document Content" + and + "5.1.2. Base URI from the Encapsulating Entity". + However it does not return the document base (5.1.3), use method URI + of XML::LibXML::Document for this. + + + + + setBaseURI ($strURI) + + + $node->setBaseURI($strURI); + + This method only does something useful for an element node + in an XML document. + It sets the xml:base attribute on the node to $strURI, which + effectively sets the base URI of the node to the same value. + + + Note: For HTML documents this behaves as if the document was XML + which may not be desired, since it does not effectively + set the base URI of the node. See RFC 2396 appendix D + for an example of how base URI can be specified in HTML. + + + + + + nodePath + + + + $node->nodePath(); + + + This function is not specified for any DOM level: It returns a canonical structure based XPath for a given node. + + + + + + + line_number + + + + $lineno = $node->line_number(); + + + This function returns the line number where the tag was found during parsing. If a node is added to the document the line number is 0. + Problems may occur, if a node from one document is passed to another one. + IMPORTANT: + Due to limitations in the libxml2 library line numbers greater than + 65535 will be returned as 65535. Please see + http://bugzilla.gnome.org/show_bug.cgi?id=325533 for more details. + + Note: line_number() is special to XML::LibXML and not part of the DOM specification. + + If the line_numbers flag of the parser was not activated before parsing, line_number() will always return 0. + + + + + + + + XML::LibXML Class for Element Nodes + + XML::LibXML::Element + + Synopsis + use XML::LibXML; +# Only methods specific to Element nodes are listed here, +# see the XML::LibXML::Node manpage for other methods + + + Methods + + The class inherits from . + The documentation for Inherited methods is not listed here. + + + Many functions listed here are + extensively documented in the DOM Level 3 specification. Please refer to + the specification for extensive documentation. + + + + new + + + + $node = XML::LibXML::Element->new( $name ); + + + This function creates a new node unbound to any DOM. + + + + + + + setAttribute + + + + $node->setAttribute( $aname, $avalue ); + + + This method sets or replaces the node's attribute $aname to the value $avalue + + + + + + + setAttributeNS + + + + $node->setAttributeNS( $nsURI, $aname, $avalue ); + + Namespace-aware version of setAttribute, where + $nsURI is a namespace URI, + $aname is a qualified name, + and $avalue is the value. + The namespace URI may be null (empty or undefined) + in order to create an attribute which has no namespace. + + + The current implementation differs from DOM in the following aspects + + + If an attribute with the same local name and namespace URI already exists + on the element, but its prefix differs from the prefix of $aname, + then this function is supposed to change the prefix (regardless + of namespace declarations and possible collisions). + However, the current implementation does rather the opposite. + If a prefix is declared for the namespace URI in the scope + of the attribute, then the already declared prefix is used, + disregarding the prefix specified in $aname. + If no prefix is declared for the namespace, the function tries + to declare the prefix specified in $aname + and dies if the prefix is already taken by some other namespace. + + According to DOM Level 2 specification, this method can also be used to + create or modify special attributes used for declaring XML namespaces + (which belong to the namespace "http://www.w3.org/2000/xmlns/" and + have prefix or name "xmlns"). This should work since version 1.61, + but again the implementation differs from DOM specification in the following: + if a declaration of the same namespace prefix already exists + on the element, then changing its value via this method + automatically changes the namespace of all elements and attributes + in its scope. This is because in libxml2 the namespace URI of an element + is not static but is computed from a pointer to a namespace declaration attribute. + + + + + + + + getAttribute + + + + $avalue = $node->getAttribute( $aname ); + + + If $node has an attribute with the name $aname, the value of this attribute will get + returned. + + + + + + + getAttributeNS + + + + $avalue = $node->getAttributeNS( $nsURI, $aname ); + + + Retrieves an attribute value by local name and namespace URI. + + + + + + + getAttributeNode + + + + $attrnode = $node->getAttributeNode( $aname ); + + + Retrieve an attribute node by name. If no attribute with a given name exists, undef is returned. + + + + + + + getAttributeNodeNS + + + + $attrnode = $node->getAttributeNodeNS( $namespaceURI, $aname ); + + + Retrieves an attribute node by local name and namespace URI. If no attribute with a given localname and namespace exists, undef is returned. + + + + + + + removeAttribute + + + + $node->removeAttribute( $aname ); + + + The method removes the attribute $aname from the node's attribute list, if the attribute can be found. + + + + + + + removeAttributeNS + + + + $node->removeAttributeNS( $nsURI, $aname ); + + + Namespace version of removeAttribute + + + + + + + hasAttribute + + + + $boolean = $node->hasAttribute( $aname ); + + + This function tests if the named attribute is set for the node. If the attribute is specified, TRUE (1) will be returned, otherwise the + return value is FALSE (0). + + + + + + + hasAttributeNS + + + + $boolean = $node->hasAttributeNS( $nsURI, $aname ); + + + namespace version of hasAttribute + + + + + + + getChildrenByTagName + + + + @nodes = $node->getChildrenByTagName($tagname); + + + The function gives direct access to all child elements of the current node with a given tagname, where + tagname is a qualified name, that is, in case of namespace usage it may consist of a prefix and local + name. This function makes things a lot easier if one needs + to handle big data sets. A special tagname '*' can be used to match any name. + + If this function is called in SCALAR context, it returns the number of elements found. + + + + + + + getChildrenByTagNameNS + + + + @nodes = $node->getChildrenByTagNameNS($nsURI,$tagname); + + + Namespace version of getChildrenByTagName. A special nsURI '*' matches any namespace URI, + in which case the function behaves just like getChildrenByLocalName. + + If this function is called in SCALAR context, it returns the number of elements found. + + + + + + + getChildrenByLocalName + + + + @nodes = $node->getChildrenByLocalName($localname); + + + The function gives direct access to all child elements of the current node with a given local name. It makes things a lot easier if one needs + to handle big data sets. A special localname '*' can be used to match any local name. + + If this function is called in SCALAR context, it returns the number of elements found. + + + + + + + getElementsByTagName + + + + @nodes = $node->getElementsByTagName($tagname); + + + This function is part of the spec. It + fetches all descendants of a node with a given tagname, + where tagname is a qualified name, + that is, in case of namespace usage it may consist of a prefix and + local name. + A special tagname '*' can be used to match any tag name. + + + In SCALAR context this function returns an XML::LibXML::NodeList object. + + + + + + + getElementsByTagNameNS + + + + @nodes = $node->getElementsByTagNameNS($nsURI,$localname); + + + Namespace version of getElementsByTagName as found in the DOM spec. + A special localname '*' can be used to match any local name + and nsURI '*' can be used to match any namespace URI. + + In SCALAR context this function returns an XML::LibXML::NodeList object. + + + + + + + getElementsByLocalName + + + + @nodes = $node->getElementsByLocalName($localname); + + + This function is not found in the DOM specification. It is a mix of getElementsByTagName and getElementsByTagNameNS. It will fetch all + tags matching the given local-name. This allows one to select tags with the same local name across namespace borders. + + In SCALAR context this function returns an XML::LibXML::NodeList object. + + + + + + + appendWellBalancedChunk + + + + $node->appendWellBalancedChunk( $chunk ); + + + Sometimes it is necessary to append a string coded XML Tree to a node. appendWellBalancedChunk will do the trick + for you. But this is only done if the String is well-balanced. + + Note that appendWellBalancedChunk() is only left for compatibility reasons. Implicitly it uses + + my $fragment = $parser->parse_balanced_chunk( $chunk ); + $node->appendChild( $fragment ); + + This form is more explicit and makes it easier to control the flow of a script. + + + + + + + appendText + + + + $node->appendText( $PCDATA ); + + + alias for appendTextNode(). + + + + + + + appendTextNode + + + + $node->appendTextNode( $PCDATA ); + + + This wrapper function lets you add a string directly to an element node. + + + + + + + appendTextChild + + + + $node->appendTextChild( $childname , $PCDATA ); + + + Somewhat similar with appendTextNode: It lets you set an Element, that contains only a text node + directly by specifying the name and the text content. + + + + setNamespace + + + + $node->setNamespace( $nsURI , $nsPrefix, $activate ); + + + setNamespace() allows one to apply a + namespace to an element. The function takes three + parameters: 1. the namespace URI, which is + required and the two optional values prefix, which + is the namespace prefix, as it should be used in + child elements or attributes as well as the + additional activate parameter. If prefix is not given, + undefined or empty, this function tries to create a + declaration of the default namespace. + + The activate parameter is most useful: If + this parameter is set to FALSE (0), a new namespace + declaration is simply added to the element + while the element's namespace itself is not + altered. Nevertheless, activate is set to TRUE (1) + on default. In this case the namespace + is used as the node's effective + namespace. This means the namespace prefix is + added to the node name and if there was a + namespace already active for the node, it will + be replaced (but its declaration is not removed from the document). + A new namespace declaration is only created if necessary + (that is, if the element is already in the scope + of a namespace declaration associating the prefix + with the namespace URI, then this declaration is reused). + + + The following example may clarify this: + + my $e1 = $doc->createElement("bar"); + $e1->setNamespace("http://foobar.org", "foo") + + results + + <foo:bar xmlns:foo="http://foobar.org"/> + + while + + my $e2 = $doc->createElement("bar"); + $e2->setNamespace("http://foobar.org", "foo",0) + + results only + + <bar xmlns:foo="http://foobar.org"/> + + By using $activate == 0 it is possible to + create multiple namespace declarations on a single + element. + The function fails if it is required to + create a declaration associating the prefix + with the namespace URI but the element already + carries a declaration with the same prefix but + different namespace URI. + + + + + setNamespaceDeclURI + + + + $node->setNamespaceDeclURI( $nsPrefix, $newURI ); + + EXPERIMENTAL IN 1.61 ! + This function manipulates + directly with an existing namespace + declaration on an element. It takes + two parameters: the prefix by which it + looks up the namespace declaration and + a new namespace URI which replaces its previous + value. + It returns 1 if the namespace declaration + was found and changed, 0 otherwise. + All elements and attributes (even those previously + unbound from the document) for which the + namespace declaration determines their namespace + belong to the new namespace after + the change. + + If the new URI is undef or empty, the nodes + have no namespace and no prefix after the change. + Namespace declarations + once nulled in this way do not + further appear in the serialized output + (but do remain in the document for internal integrity + of libxml2 data structures). + + This function is NOT part of any DOM API. + + + + setNamespaceDeclPrefix + + + $node->setNamespaceDeclPrefix( $oldPrefix, $newPrefix ); + + EXPERIMENTAL IN 1.61 ! + This function manipulates + directly with an existing namespace + declaration on an element. It takes + two parameters: the old prefix by which it + looks up the namespace declaration and + a new prefix which is to replace the old one. + The function dies with an error + if the element is in the scope of + another declaration whose prefix equals + to the new prefix, or if the change should + result in a declaration with a non-empty prefix but + empty namespace URI. + Otherwise, it returns 1 if the namespace declaration + was found and changed and 0 if not found. + All elements and attributes (even those previously + unbound from the document) for which the + namespace declaration determines their namespace + change their prefix to the new value. + + If the new prefix is undef or empty, + the namespace declaration becomes + a declaration of a default namespace. + The corresponding nodes drop their namespace prefix + (but remain in the, now default, namespace). + In this case the function fails, if the containing element + is in the scope of another default namespace declaration. + + This function is NOT part of any DOM API. + + + + + + + Overloading + XML::LibXML::Element overloads hash dereferencing to + provide access to the element's attributes. For non-namespaced + attributes, the attribute name is the hash key, and the attribute + value is the hash value. For namespaced attributes, the hash key + is qualified with the namespace URI, using Clark notation. + Perl's "tied hash" feature is used, which means that the + hash gives you read-write access to the element's attributes. + For more information, see XML::LibXML::AttributeHash + + + + + XML::LibXML Class for Text Nodes + + XML::LibXML::Text + + Synopsis + use XML::LibXML; +# Only methods specific to Text nodes are listed here, +# see the XML::LibXML::Node manpage for other methods + + + Description + + Unlike the DOM specification, XML::LibXML implements the text node as the base class of all character data node. Therefore there exists no + CharacterData class. This allows one to apply methods of text nodes also to Comments and CDATA-sections. + + + Methods + + The class inherits from . + The documentation for Inherited methods is not listed here. + + + Many functions listed here are + extensively documented in the DOM Level 3 specification. Please refer to + the specification for extensive documentation. + + + + + new + + + + $text = XML::LibXML::Text->new( $content ); + + + The constructor of the class. It creates an unbound text node. + + + + + + + data + + + + $nodedata = $text->data; + + + Although there exists the nodeValue attribute in the Node class, the DOM specification defines data as a separate + attribute. XML::LibXML implements these two attributes not as different attributes, but as aliases, such as + libxml2 does. Therefore + + $text->data; + + and + + $text->nodeValue; + + will have the same result and are not different entities. + + + + + + + setData($string) + + + + $text->setData( $text_content ); + + + This function sets or replaces text content to a node. The node has to be of the type "text", "cdata" or + "comment". + + + + + + + substringData($offset,$length) + + + + $text->substringData($offset, $length); + + + Extracts a range of data from the node. (DOM Spec) This function takes the two parameters $offset and $length and returns the + sub-string, if available. + + If the node contains no data or $offset refers to an non-existing string index, this function will return undef. + If $length is out of range substringData will return the data starting at $offset instead of causing an error. + + + + + + + appendData($string) + + + + $text->appendData( $somedata ); + + + Appends a string to the end of the existing data. If the current text node contains no data, this function has the same effect as + setData. + + + + + + + insertData($offset,$string) + + + + $text->insertData($offset, $string); + + + Inserts the parameter $string at the given $offset of the existing data of the node. This operation will not remove existing data, but + change the order of the existing data. + + The $offset has to be a positive value. If $offset is out of range, insertData will have the same behaviour as + appendData. + + + + + + + deleteData($offset, $length) + + + + $text->deleteData($offset, $length); + + + This method removes a chunk from the existing node data at the given offset. The $length parameter tells, how many characters should + be removed from the string. + + + + + + + deleteDataString($string, [$all]) + + + + $text->deleteDataString($remstring, $all); + + + This method removes a chunk from the existing node data. Since the DOM spec is quite unhandy if you already know which + string to remove from a text node, this method allows more perlish code :) + + The functions takes two parameters: $string and optional the $all flag. If $all is not set, + undef or 0, deleteDataString will remove only the first occurrence of + $string. If $all is TRUE deleteDataString will remove all occurrences of $string + from the node data. + + + + + + + replaceData($offset, $length, $string) + + + + $text->replaceData($offset, $length, $string); + + + The DOM style version to replace node data. + + + + + + + replaceDataString($oldstring, $newstring, [$all]) + + + + $text->replaceDataString($old, $new, $flag); + + + The more programmer friendly version of replaceData() :) + + Instead of giving offsets and length one can specify + the exact string ($oldstring) to + be replaced. Additionally the $all + flag allows one to replace all occurrences of + $oldstring. + + + + + + + + replaceDataRegEx( $search_cond, $replace_cond, $reflags ) + + + + $text->replaceDataRegEx( $search_cond, $replace_cond, $reflags ); + + + This method replaces the node's data by a + simple regular expression. + Optional, this function allows one to pass some flags + that will be added as flag to the replace + statement. + + NOTE: This is a shortcut for + + my $datastr = $node->getData(); + $datastr =~ s/somecond/replacement/g; # 'g' is just an example for any flag + $node->setData( $datastr ); + + This function can make things easier to read for simple replacements. For more complex variants it is recommended to use the code + snippet above. + + + + + + + + XML::LibXML Comment Class + + XML::LibXML::Comment + + Synopsis + use XML::LibXML; +# Only methods specific to Comment nodes are listed here, +# see the XML::LibXML::Node manpage for other methods + + + Description + + This class provides all functions of , but for comment nodes. This can be done, since only the output of the + node types is different, but not the data structure. :-) + + + Methods + + The class inherits from . + The documentation for Inherited methods is not listed here. + + + Many functions listed here are + extensively documented in the DOM Level 3 specification. Please refer to + the specification for extensive documentation. + + + + + new + + + + $node = XML::LibXML::Comment->new( $content ); + + + The constructor is the only provided function for this package. It is required, because libxml2 treats text nodes + and comment nodes slightly differently. + + + + + + + + XML::LibXML Class for CDATA Sections + + XML::LibXML::CDATASection + + Synopsis + use XML::LibXML; +# Only methods specific to CDATA nodes are listed here, +# see the XML::LibXML::Node manpage for other methods + + + Description + This class provides all functions of , but for CDATA nodes. + + + Methods + + The class inherits from . + The documentation for Inherited methods is not listed here. + + + Many functions listed here are + extensively documented in the DOM Level 3 specification. Please refer to + the specification for extensive documentation. + + + + + + new + + + + $node = XML::LibXML::CDATASection->new( $content ); + + + The constructor is the only provided function for this package. It is required, because libxml2 treats the + different text node types slightly differently. + + + + + + + + XML::LibXML Attribute Class + + XML::LibXML::Attr + + Synopsis + use XML::LibXML; +# Only methods specific to Attribute nodes are listed here, +# see the XML::LibXML::Node manpage for other methods + + + Description + + This is the interface to handle Attributes like ordinary nodes. The naming of the class relies on the W3C DOM documentation. + + + Methods + + The class inherits from . + The documentation for Inherited methods is not listed here. + + + Many functions listed here are + extensively documented in the DOM Level 3 specification. Please refer to + the specification for extensive documentation. + + + + new + + + + $attr = XML::LibXML::Attr->new($name [,$value]); + + + Class constructor. If you need to work with ISO encoded strings, you should always use the createAttribute + of . + + + + + getValue + + + + $string = $attr->getValue(); + + + Returns the value stored for the attribute. If undef is returned, the attribute has no value, which is different of being + not specified. + + + + + value + + + + $string = $attr->value; + + + Alias for getValue() + + + + + + + setValue + + + + $attr->setValue( $string ); + + + This is needed to set a new attribute value. If ISO encoded strings are passed as parameter, the node has to be bound to a document, + otherwise the encoding might be done incorrectly. + + + + + + + getOwnerElement + + + + $node = $attr->getOwnerElement(); + + + returns the node the attribute belongs to. If the attribute is not bound to a node, undef will be returned. Overwriting the underlying + implementation, the parentNode function will return undef, instead of the owner element. + + + + + + + setNamespace + + + + $attr->setNamespace($nsURI, $prefix); + + + This function tries to bound the attribute to a given namespace. + If $nsURI is undefined or empty, + the function discards any previous association of the attribute with a namespace. + If the namespace was not previously declared in the context of the + attribute, this function will fail. + In this case you may wish to call setNamespace() on the ownerElement. + If the namespace URI is non-empty and + declared in the context of the attribute, but only with a different + (non-empty) prefix, then the attribute is still bound to the namespace + but gets a different prefix than $prefix. + The function also fails if the prefix is empty but the namespace URI + is not (because unprefixed attributes should by definition belong to + no namespace). + This function returns 1 on success, 0 otherwise. + + + + + + + isId + + + $bool = $attr->isId; + + Determine whether an attribute is of type + ID. For documents with a DTD, this information + is only available if DTD loading/validation has been requested. + For HTML documents parsed with the HTML + parser ID detection is done + automatically. In XML documents, all "xml:id" + attributes are considered to be of type ID. + + + + + + serializeContent($docencoding) + + + + $string = $attr->serializeContent; + + + This function is not part of DOM API. It returns attribute content + in the form in which it serializes into XML, that is + with all meta-characters properly quoted and with raw + entity references (except for entities expanded during parse time). + Setting the optional $docencoding flag to 1 enforces document + encoding for the output string (which is then passed to Perl as a + byte string). Otherwise the string is passed to Perl as (UTF-8 encoded) + characters. + + + + + + + + + + XML::LibXML's DOM L2 Document Fragment Implementation + + XML::LibXML::DocumentFragment + + Synopsis + use XML::LibXML; + + + Description + + This class is a helper class as described in the DOM Level 2 Specification. It is implemented as a node without name. All adding, inserting or + replacing functions are aware of document fragments now. + + As well all unbound nodes (all nodes that do not belong to any document sub-tree) are implicit members of document fragments. + + + + + XML::LibXML Namespace Implementation + + XML::LibXML::Namespace + + Synopsis + use XML::LibXML; +# Only methods specific to Namespace nodes are listed here, +# see the XML::LibXML::Node manpage for other methods + + + Description + + Namespace nodes are returned by both $element->findnodes('namespace::foo') or by $node->getNamespaces(). + + The namespace node API is not part of any current DOM API, and so it is quite minimal. It should be noted that namespace nodes are + not a sub class of , however Namespace nodes act a lot like attribute nodes, and similarly named methods will + return what you would expect if you treated the namespace node as an attribute. Note that in order to fix several inconsistencies between the API and the documentation, the behavior of some functions have been changed in 1.64. + + + Methods + + + new + + + + my $ns = XML::LibXML::Namespace->new($nsURI); + + + Creates a new Namespace node. Note that this is not a 'node' as an attribute or an element node. Therefore you can't do + call all Functions. All functions available for this node are listed below. + + Optionally you can pass the prefix to the namespace constructor. If this second parameter is omitted you will create a so called + default namespace. Note, the newly created namespace is not bound to any document or node, therefore you should not expect it to be + available in an existing document. + + + + + declaredURI + + Returns the URI for this namespace. + + + + declaredPrefix + + Returns the prefix for this namespace. + + + + + nodeName + + + + print $ns->nodeName(); + + + Returns "xmlns:prefix", where prefix is the prefix for this namespace. + + + + name + + + + print $ns->name(); + + + Alias for nodeName() + + + + + getLocalName + + + + $localname = $ns->getLocalName(); + + + Returns the local name of this node as if it were an attribute, that is, the prefix associated with the namespace. + + + + + getData + + + + print $ns->getData(); + + + Returns the URI of the namespace, i.e. the value of this node as if it were an attribute. + + + + + getValue + + + + print $ns->getValue(); + + + Alias for getData() + + + + + value + + + + print $ns->value(); + + + Alias for getData() + + + + + getNamespaceURI + + + + $known_uri = $ns->getNamespaceURI(); + + + Returns the string "http://www.w3.org/2000/xmlns/" + + + + + getPrefix + + + + $known_prefix = $ns->getPrefix(); + + + Returns the string "xmlns" + + + + + unique_key + + + + $key = $ns->unique_key(); + + + This method returns a key guaranteed to be unique for this namespace, and to always be the same value for this namespace. Two namespace objects return the same key if and only if they have the same prefix and the same URI. The returned key value is useful as a key in hashes. + + + + + + + + XML::LibXML Processing Instructions + + XML::LibXML::PI + + Synopsis + use XML::LibXML; +# Only methods specific to Processing Instruction nodes are listed here, +# see the XML::LibXML::Node manpage for other methods + + + Description + + Processing instructions are implemented with XML::LibXML with read and write access. The PI data is the PI without the PI target (as specified in + XML 1.0 [17]) as a string. This string can be accessed with getData as implemented in . + + The write access is aware about the fact, that many processing instructions have attribute like data. Therefore setData() provides besides the DOM + spec conform Interface to pass a set of named parameter. So the code segment + + my $pi = $dom->createProcessingInstruction("abc"); +$pi->setData(foo=>'bar', foobar=>'foobar'); +$dom->appendChild( $pi ); + + will result the following PI in the DOM: + + <?abc foo="bar" foobar="foobar"?> + + Which is how it is specified in the DOM specification. This three step interface creates temporary a node in perl space. This can be avoided while + using the insertProcessingInstruction() method. Instead of the three calls described above, the call + + $dom->insertProcessingInstruction("abc",'foo="bar" foobar="foobar"'); + + will have the same result as above. + + 's implementation of setData() documented below differs a bit from the standard version as available in : + + + setData + + + + $pinode->setData( $data_string ); +$pinode->setData( name=>string_value [...] ); + + + This method allows one to change the content data of + a PI. Additionally to the interface specified for DOM + Level2, the method provides a named parameter + interface to set the data. This parameter list is + converted into a string before it is appended to the + PI. + + + + + + + + XML::LibXML DTD Handling + + XML::LibXML::Dtd + + Synopsis + use XML::LibXML; + + + Description + + This class holds a DTD. You may parse a DTD from either a string, or from an external SYSTEM identifier. + + No support is available as yet for parsing from a filehandle. + + XML::LibXML::Dtd is a sub-class of , so all the methods available to nodes (particularly toString()) are available to Dtd objects. + + + Methods + + + + + new + + + + $dtd = XML::LibXML::Dtd->new($public_id, $system_id); + + + Parse a DTD from the system identifier, and return a DTD object that you can pass to $doc->is_valid() or $doc->validate(). + + my $dtd = XML::LibXML::Dtd->new( + "SOME // Public / ID / 1.0", + "test.dtd" + ); + my $doc = XML::LibXML->new->parse_file("test.xml"); + $doc->validate($dtd); + + + + + parse_string + + + + $dtd = XML::LibXML::Dtd->parse_string($dtd_str); + + + The same as new() above, except you can parse a DTD from a string. Note that parsing from string may fail if the DTD contains external parametric-entity references with relative URLs. + + + + getName + + + + $publicId = $dtd->getName(); + + + Returns the name of DTD; i.e., the name immediately following the DOCTYPE keyword. + + + + publicId + + + + $publicId = $dtd->publicId(); + + + Returns the public identifier of the external subset. + + + + systemId + + + + $systemId = $dtd->systemId(); + + + Returns the system identifier of the external subset. + + + + + + + + XML::LibXML Class for Input Callbacks + + XML::LibXML::InputCallback + + Synopsis + use XML::LibXML; + + + + Synopsis + + my $input_callbacks = XML::LibXML::InputCallback->new(); +$input_callbacks->register_callbacks([ $match_cb1, $open_cb1, + $read_cb1, $close_cb1 ] ); +$input_callbacks->register_callbacks([ $match_cb2, $open_cb2, + $read_cb2, $close_cb2 ] ); +$input_callbacks->register_callbacks( [ $match_cb3, $open_cb3, + $read_cb3, $close_cb3 ] ); + +$parser->input_callbacks( $input_callbacks ); +$parser->parse_file( $some_xml_file ); + + + + Description + + You may get unexpected results if you are trying to load external documents during libxml2 parsing if the location of the resource is not a + HTTP, FTP or relative location but a absolute path for example. To get around this limitation, you may add your own input handler to open, read and + close particular types of locations or URI classes. Using this input callback handlers, you can handle your own custom URI schemes for example. + + The input callbacks are used whenever XML::LibXML has to get something other than externally parsed entities from somewhere. They are implemented + using a callback stack on the Perl layer in analogy to libxml2's native callback stack. + + The XML::LibXML::InputCallback class transparently registers the input callbacks for the libxml2's parser processes. + + + How does XML::LibXML::InputCallback work? + + The libxml2 library offers a callback implementation as global functions only. To work-around the troubles resulting in having only global + callbacks - for example, if the same global callback stack is manipulated by different applications running together in a single Apache + Web-server environment -, XML::LibXML::InputCallback comes with a object-oriented and a function-oriented part. + + Using the function-oriented part the global callback stack of libxml2 can be manipulated. Those functions can be used as interface to the + callbacks on the C- and XS Layer. At the object-oriented part, operations for working with the "pseudo-localized" callback stack are + implemented. Currently, you can register and de-register callbacks on the Perl layer and initialize them on a per parser basis. + + + Callback Groups + + The libxml2 input callbacks come in groups. One group contains a URI matcher (match), a data stream constructor (open), + a data stream reader (read), and a data stream destructor (close). The callbacks can be + manipulated on a per group basis only. + + + + The Parser Process + + The parser process works on an XML data stream, along which, links to other resources can be embedded. This can be links to external + DTDs or XIncludes for example. Those resources are identified by URIs. The callback implementation of libxml2 assumes that one callback + group can handle a certain amount of URIs and a certain URI scheme. Per default, callback handlers for file://*, + file:://*.gz, http://* and ftp://* are registered. + + Callback groups in the callback stack are processed from top to bottom, meaning that callback groups registered later will be + processed before the earlier registered ones. + + While parsing the data stream, the libxml2 parser checks if a registered callback group will handle a URI - if they will not, the URI + will be interpreted as file://URI. To handle a URI, the match callback will have to return + '1'. If that happens, the handling of the URI will be passed to that callback group. Next, the URI will be passed to the + open callback, which should return a reference to the data stream if it successfully opened the + file, '0' otherwise. If opening the stream was successful, the read callback will be called repeatedly until it + returns an empty string. After the read callback, the close callback will be called to close the stream. + + + + Organisation of callback groups in XML::LibXML::InputCallback + + Callback groups are implemented as a stack (Array), + each entry holds a reference to an array of the + callbacks. For the libxml2 library, the + XML::LibXML::InputCallback callback implementation + appears as one single callback group. The Perl + implementation however allows one to manage different + callback stacks on a per libxml2-parser basis. + + + + + Using XML::LibXML::InputCallback + + After object instantiation using the parameter-less constructor, you can register callback groups. + + my $input_callbacks = XML::LibXML::InputCallback->new(); +$input_callbacks->register_callbacks([ $match_cb1, $open_cb1, + $read_cb1, $close_cb1 ] ); +$input_callbacks->register_callbacks([ $match_cb2, $open_cb2, + $read_cb2, $close_cb2 ] ); +$input_callbacks->register_callbacks( [ $match_cb3, $open_cb3, + $read_cb3, $close_cb3 ] ); + +$parser->input_callbacks( $input_callbacks ); +$parser->parse_file( $some_xml_file ); + + + + What about the old callback system prior to XML::LibXML::InputCallback? + + In XML::LibXML versions prior to 1.59 - i.e. without the XML::LibXML::InputCallback module - you could define your callbacks either using + globally or locally. You still can do that using XML::LibXML::InputCallback, and in addition to that you can define the callbacks on a per + parser basis! + + If you use the old callback interface through global callbacks, XML::LibXML::InputCallback will treat them with a lower priority as the + ones registered using the new interface. The global callbacks will not override the callback groups registered using the new interface. Local + callbacks are attached to a specific parser instance, therefore they are treated with highest priority. If the match + callback of the callback group registered as local variable is identical to one of the callback groups registered using the new interface, that + callback group will be replaced. + + Users of the old callback implementation whose open callback returned a plain string, will have to adapt their code + to return a reference to that string after upgrading to version >= 1.59. The new callback system can only deal with the + open callback returning a reference! + + + + + Interface Description + + + Global Variables + + + + $_CUR_CB + + + Stores the current callback and can be used as shortcut to access the callback stack. + + + + + @_GLOBAL_CALLBACKS + + + Stores all callback groups for the current parser process. + + + + + @_CB_STACK + + + Stores the currently used callback group. Used to prevent parser errors when dealing with nested XML data. + + + + + + + Global Callbacks + + + + _callback_match + + + Implements the interface for the match callback at C-level and for the selection of the callback group + from the callbacks defined at the Perl-level. + + + + + _callback_open + + + Forwards the open callback from libxml2 to the corresponding callback function at the Perl-level. + + + + + _callback_read + + + Forwards the read request to the corresponding callback function at the Perl-level and returns the result to libxml2. + + + + + _callback_close + + + Forwards the close callback from libxml2 to the corresponding callback function at the Perl-level.. + + + + + + + Class methods + + + + new() + + + A simple constructor. + + + + + register_callbacks( [ $match_cb, $open_cb, $read_cb, $close_cb ]) + + + The four callbacks have to be given as array reference in the above order match, + open, read, close! + + + + + unregister_callbacks( [ $match_cb, $open_cb, $read_cb, $close_cb ]) + + + With no arguments given, unregister_callbacks() will delete the last registered callback group from the + stack. If four callbacks are passed as array reference, the callback group to unregister will be identified by the + match callback and deleted from the callback stack. Note that if several identical match + callbacks are defined in different callback groups, ALL of them will be deleted from the stack. + + + + + init_callbacks( $parser ) + + + Initializes the callback system for the provided parser before starting a parsing process. + + + + + cleanup_callbacks() + + + Resets global variables and the libxml2 callback stack. + + + + + lib_init_callbacks() + + + Used internally for callback registration at C-level. + + + + + lib_cleanup_callbacks() + + + Used internally for callback resetting at the C-level. + + + + + + + + + + Example callbacks + + The following example is a purely fictitious example that uses a MyScheme::Handler object that responds to methods similar to an IO::Handle. + + +# Define the four callback functions +sub match_uri { + my $uri = shift; + return $uri =~ /^myscheme:/; # trigger our callback group at a 'myscheme' URIs +} + +sub open_uri { + my $uri = shift; + my $handler = MyScheme::Handler->new($uri); + return $handler; +} + +# The returned $buffer will be parsed by the libxml2 parser +sub read_uri { + my $handler = shift; + my $length = shift; + my $buffer; + read($handler, $buffer, $length); + return $buffer; # $buffer will be an empty string '' if read() is done +} + +# Close the handle associated with the resource. +sub close_uri { + my $handler = shift; + close($handler); +} + +# Register them with a instance of XML::LibXML::InputCallback +my $input_callbacks = XML::LibXML::InputCallback->new(); +$input_callbacks->register_callbacks([ \&match_uri, \&open_uri, + \&read_uri, \&close_uri ] ); + +# Register the callback group at a parser instance +$parser->input_callbacks( $input_callbacks ); + +# $some_xml_file will be parsed using our callbacks +$parser->parse_file( $some_xml_file ); + + + + + + + + RelaxNG Schema Validation + + XML::LibXML::RelaxNG + + Synopsis + use XML::LibXML; +$doc = XML::LibXML->new->parse_file($url); + + + Description + + The XML::LibXML::RelaxNG class is a tiny frontend to libxml2's RelaxNG implementation. Currently it supports only schema parsing and document + validation. + + + Methods + + + + new + + + + $rngschema = XML::LibXML::RelaxNG->new( location => $filename_or_url, no_network => 1 ); +$rngschema = XML::LibXML::RelaxNG->new( string => $xmlschemastring, no_network => 1 ); +$rngschema = XML::LibXML::RelaxNG->new( DOM => $doc, no_network => 1 ); + + + The constructor of XML::LibXML::RelaxNG needs to be called with list of parameters. At least location, string or DOM parameter is required to + specify source of schema. Optional parameter no_network set to 1 cause that parser would not access network and optional parameter recover + set 1 cause that parser would not call die() on errors. + + It is important, that each schema only have a single source. + + The location parameter allows one to parse a schema + from the filesystem or a (non-HTTPS) URL. + + The string parameter will parse the schema from the given XML string. + + The DOM parameter allows one to parse the schema from a pre-parsed . + + Note that the constructor will die() if the schema does not meed the constraints of the RelaxNG specification. + + + + + validate + + + + eval { $rngschema->validate( $doc ); }; + + + This function allows one to validate a (parsed) + document against the given RelaxNG schema. The argument + of this function should be an XML::LibXML::Document + object. If this function succeeds, it will return 0, + otherwise it will die() and report the errors found. + Because of this validate() should be always + evaluated. + + + + + + + + + XML Schema Validation + + XML::LibXML::Schema + + Synopsis + use XML::LibXML; +$doc = XML::LibXML->new->parse_file($url); + + + Description + + The XML::LibXML::Schema class is a tiny frontend to libxml2's XML Schema implementation. Currently it supports only schema parsing and + document validation. As of 2.6.32, libxml2 only supports decimal types up to 24 digits (the standard requires at least 18). + + + + Methods + + + new + + + + $xmlschema = XML::LibXML::Schema->new( location => $filename_or_url, no_network => 1 ); +$xmlschema = XML::LibXML::Schema->new( string => $xmlschemastring, no_network => 1 ); + + + The constructor of XML::LibXML::Schema needs to be called with list of parameters. At least location or string parameter is required to + specify source of schema. Optional parameter no_network set to 1 cause that parser would not access network and optional parameter recover + set 1 cause that parser would not call die() on errors. + + It is important, that each schema only have a single source. + + The location parameter allows one to parse a schema + from the filesystem or a (non-HTTPS) URL. + + The string parameter will parse the schema from the given XML string. + + Note that the constructor will die() if the schema does not meed the constraints of the XML Schema specification. + + + + + validate + + + + eval { $xmlschema->validate( $doc ); }; + + + This function allows one to validate a (parsed) + document against the given XML Schema. The argument of + this function should be a object. If this + function succeeds, it will return 0, otherwise it will + die() and report the errors found. Because of this + validate() should be always evaluated. + + + + + + + + XPath Evaluation + XML::LibXML::XPathContext + + Description + + The XML::LibXML::XPathContext + class provides an almost complete + interface to libxml2's XPath implementation. + With XML::LibXML::XPathContext, it is possible to + evaluate XPath expressions in the context + of arbitrary node, context size, and context position, + with a user-defined namespace-prefix mapping, + custom XPath functions written in Perl, and + even a custom XPath variable resolver. + + + + Examples + + Namespaces +This example demonstrates registerNs() method. +It finds all paragraph nodes in an XHTML document. + my $xc = XML::LibXML::XPathContext->new($xhtml_doc); +$xc->registerNs('xhtml', 'http://www.w3.org/1999/xhtml'); +my @nodes = $xc->findnodes('//xhtml:p'); + + + Custom XPath functions +This example demonstrates registerFunction() method +by defining a function filtering nodes based on a Perl regular expression: + sub grep_nodes { + my ($nodelist,$regexp) = @_; + my $result = XML::LibXML::NodeList->new; + for my $node ($nodelist->get_nodelist()) { + $result->push($node) if $node->textContent =~ $regexp; + } + return $result; +}; + +my $xc = XML::LibXML::XPathContext->new($node); +$xc->registerFunction('grep_nodes', \&grep_nodes); +my @nodes = $xc->findnodes('//section[grep_nodes(para,"\bsearch(ing|es)?\b")]'); + + + Variables +This example demonstrates registerVarLookup() +method. We use XPath variables to recycle results of previous evaluations: + sub var_lookup { + my ($varname,$ns,$data)=@_; + return $data->{$varname}; +} + +my $areas = XML::LibXML->new->parse_file('areas.xml'); +my $empl = XML::LibXML->new->parse_file('employees.xml'); + +my $xc = XML::LibXML::XPathContext->new($empl); + +my %variables = ( + A => $xc->find('/employees/employee[@salary>10000]'), + B => $areas->find('/areas/area[district='Brooklyn']/street'), +); + +# get names of employees from $A working in an area listed in $B +$xc->registerVarLookupFunc(\&var_lookup, \%variables); +my @nodes = $xc->findnodes('$A[work_area/street = $B]/name'); + + + + + Methods + + + new + + my $xpc = XML::LibXML::XPathContext->new(); + Creates a new XML::LibXML::XPathContext object + without a context node. + my $xpc = XML::LibXML::XPathContext->new($node); + Creates a new XML::LibXML::XPathContext object with + the context node set to $node. + + + + registerNs + $xpc->registerNs($prefix, $namespace_uri) + Registers namespace $prefix to + $namespace_uri. + + + + unregisterNs + $xpc->unregisterNs($prefix) + Unregisters namespace $prefix. + + + + lookupNs + $uri = $xpc->lookupNs($prefix) + Returns namespace URI registered with + $prefix. If $prefix + is not registered to any namespace URI returns + undef. + + + + registerVarLookupFunc + $xpc->registerVarLookupFunc($callback, $data) + Registers variable lookup function + $callback. The registered function is + executed by the XPath engine each time an XPath variable + is evaluated. It takes three arguments: + $data, variable name, and variable + ns-URI and must return one value: a number or string or + any XML::LibXML:: object that can be a result + of findnodes: Boolean, Literal, Number, Node + (e.g. Document, Element, etc.), or NodeList. For + convenience, simple (non-blessed) array references + containing only objects can be + used instead of an XML::LibXML::NodeList. + + + + getVarLookupData + + $data = $xpc->getVarLookupData(); + + Returns the data that have been associated with a + variable lookup function during a previous call to + registerVarLookupFunc. + + + + getVarLookupFunc + + $callback = $xpc->getVarLookupFunc(); + + Returns the variable lookup function previously registered with + registerVarLookupFunc. + + + + unregisterVarLookupFunc + + $xpc->unregisterVarLookupFunc($name); + Unregisters variable lookup function and the associated lookup data. + + + + registerFunctionNS + $xpc->registerFunctionNS($name, $uri, $callback) + Registers an extension function + $name in $uri + namespace. $callback must be a CODE + reference. The arguments of the callback function are + either simple scalars or XML::LibXML::* objects + depending on the XPath argument types. The function is + responsible for checking the argument number and + types. Result of the callback code must be a single + value of the following types: a simple scalar + (number, string) or an arbitrary XML::LibXML::* + object that can be a result of findnodes: Boolean, + Literal, Number, Node (e.g. Document, Element, etc.), or + NodeList. For convenience, simple (non-blessed) array + references containing only + objects can be used instead of a + XML::LibXML::NodeList. + + + + unregisterFunctionNS + $xpc->unregisterFunctionNS($name, $uri) + + Unregisters extension function $name + in $uri namespace. Has the same + effect as passing undef as + $callback to + registerFunctionNS. + + + + registerFunction + $xpc->registerFunction($name, $callback) + Same as registerFunctionNS but + without a namespace. + + + + unregisterFunction + $xpc->unregisterFunction($name) + Same as unregisterFunctionNS but + without a namespace. + + + + findnodes + @nodes = $xpc->findnodes($xpath) + @nodes = $xpc->findnodes($xpath, $context_node ) + $nodelist = $xpc->findnodes($xpath, $context_node ) + Performs the xpath statement on the current node and + returns the result as an array. In scalar context, + returns an XML::LibXML::NodeList object. Optionally, a + node may be passed as a second argument to set the + context node for the query. + The xpath expression can be passed either as a string, or + as a XML::LibXML::XPathExpression object. + + + + + find + $object = $xpc->find($xpath ) + $object = $xpc->find($xpath, $context_node ) + Performs the xpath expression using the current node + as the context of the expression, and returns the result + depending on what type of result the XPath expression + had. For example, the XPath 1 * 3 + + 52 results in an XML::LibXML::Number object + being returned. Other expressions might return a + XML::LibXML::Boolean object, or a + XML::LibXML::Literal object (a string). Each of those + objects uses Perl's overload feature to ``do the right + thing'' in different contexts. Optionally, a node may be + passed as a second argument to set the context node for + the query. + The xpath expression can be passed either as a string, or + as a XML::LibXML::XPathExpression object. + + + + + findvalue + $value = $xpc->findvalue($xpath ) + $value = $xpc->findvalue($xpath, $context_node ) + Is exactly equivalent to: + $xpc->find( $xpath, $context_node )->to_literal; + That is, it returns the literal value of the + results. This enables you to ensure that you get a string + back from your search, allowing certain shortcuts. This + could be used as the equivalent of <xsl:value-of + select=``some_xpath''/>. Optionally, a node may be + passed in the second argument to set the context node for + the query. + The xpath expression can be passed either as a string, or + as a XML::LibXML::XPathExpression object. + + + + + exists + + + $bool = $xpc->exists( $xpath_expression, $context_node ); + + This method behaves like findnodes, except + that it only returns a boolean value (1 if the expression matches a node, 0 otherwise) + and may be faster than findnodes, because + the XPath evaluation may stop early on the first match (this is true for libxml2 >= 2.6.27). + For XPath expressions that do not return node-set, + the method returns true if the returned value is a non-zero number or a non-empty string. + + + + setContextNode + $xpc->setContextNode($node) + Set the current context node. + + + + getContextNode + + my $node = $xpc->getContextNode; + Get the current context node. + + + + setContextPosition + $xpc->setContextPosition($position) + + Set the current context position. By default, this + value is -1 (and evaluating XPath function + position() in the initial context + raises an XPath error), but can be set to any value up + to context size. This usually only serves to cheat the + XPath engine to return given position when + position() XPath function is + called. Setting this value to -1 restores the default + behavior. + + + + getContextPosition + + my $position = $xpc->getContextPosition; + Get the current context position. + + + + setContextSize + $xpc->setContextSize($size) + + Set the current context size. By default, this value is -1 (and + evaluating XPath function last() in + the initial context raises an XPath error), but can be + set to any non-negative value. This usually only serves + to cheat the XPath engine to return the given value when + last() XPath function is called. If + context size is set to 0, position is automatically also + set to 0. If context size is positive, position is + automatically set to 1. Setting context size to -1 + restores the default behavior. + + + + getContextSize + + my $size = $xpc->getContextSize; + Get the current context size. + + + + setContextNode + $xpc->setContextNode($node) + Set the current context node. + + + + + + Bugs And Caveats + + XML::LibXML::XPathContext objects + are reentrant, meaning that you can call + methods of an XML::LibXML::XPathContext even from XPath + extension functions registered with the same object or from a + variable lookup function. On the other hand, you should rather + avoid registering new extension functions, namespaces and a + variable lookup function from within extension functions and a + variable lookup function, unless you want to experience + untested behavior. + + + + Authors + Ilya Martynov and Petr Pajas, based on + XML::LibXML and XML::LibXSLT code by Matt Sergeant and + Christian Glahn. + + + Historical remark + Prior to XML::LibXML 1.61 this module was distributed separately + for maintenance reasons. + + + + + XML::LibXML::Reader - interface to libxml2 pull parser + XML::LibXML::Reader + + Synopsis + use XML::LibXML::Reader; + my $reader = XML::LibXML::Reader->new(location => "file.xml") + or die "cannot read file.xml\n"; +while ($reader->read) { + processNode($reader); +} + +sub processNode { + my $reader = shift; + printf "%d %d %s %d\n", ($reader->depth, + $reader->nodeType, + $reader->name, + $reader->isEmptyElement); +} + + or + + my $reader = XML::LibXML::Reader->new(location => "file.xml") + or die "cannot read file.xml\n"; + $reader->preservePattern('//table/tr'); + $reader->finish; + print $reader->document->toString(1); + + + + DESCRIPTION + This is a perl interface to libxml2's pull-parser implementation + xmlTextReader + http://xmlsoft.org/html/libxml-xmlreader.html. + This feature requires at least libxml2-2.6.21. + Pull-parsers (such as StAX in Java, or XmlReader in C#) use an iterator + approach to parse XML documents. They are easier to program than + event-based parser (SAX) and much more lightweight than + tree-based parser (DOM), which load the complete tree into + memory. + The Reader acts as a cursor going forward on the document + stream and stopping at each node on the way. At every point, + the DOM-like methods of the Reader object allow one to examine the + current node (name, namespace, attributes, etc.) + The user's code keeps control of the progress and simply + calls the read() function repeatedly to + progress to the next node in the document order. Other + functions provide means for skipping complete sub-trees, or + nodes until a specific element, etc. + At every time, only a very limited portion of the + document is kept in the memory, which makes the API more + memory-efficient than using DOM. However, it is also possible + to mix Reader with DOM. At every point the user may copy the + current node (optionally expanded into a complete sub-tree) + from the processed document to another DOM tree, or to + instruct the Reader to collect sub-document in form of a DOM + tree consisting of selected nodes. + Reader API also supports namespaces, xml:base, entity + handling, and DTD validation. Schema and RelaxNG validation + support will probably be added in some later revision of the + Perl interface. + The naming of methods compared to libxml2 and C# + XmlTextReader has been changed slightly to match the + conventions of XML::LibXML. Some functions have been changed + or added with respect to the C interface. + + + CONSTRUCTOR + Depending on the XML source, the Reader object can be created with either of: + + my $reader = XML::LibXML::Reader->new( location => "file.xml", ... ); + my $reader = XML::LibXML::Reader->new( string => $xml_string, ... ); + my $reader = XML::LibXML::Reader->new( IO => $file_handle, ... ); + my $reader = XML::LibXML::Reader->new( FD => fileno(STDIN), ... ); + my $reader = XML::LibXML::Reader->new( DOM => $dom, ... ); + + where ... are (optional) reader options described below in + or various parser options described in . + The constructor recognizes the following XML sources: + + Source specification + + + location + + Read XML from a local file or (non-HTTPS) URL. + + + + string + + Read XML from a string. + + + + IO + + Read XML a Perl IO filehandle. + + + + FD + + Read XML from a file descriptor (bypasses Perl I/O + layer, only applicable to filehandles for regular + files or pipes). Possibly faster than IO. + + + + DOM + + Use reader API to walk through a pre-parsed + . + + + + + + Reader options + + + encoding => $encoding + + override document encoding. + + + + RelaxNG => $rng_schema + + can be used to pass either a + object or a filename or (non-HTTPS) URL of a RelaxNG schema to the + constructor. The schema is then used to validate the + document as it is processed. + + + + Schema => $xsd_schema + + can be used to pass either a + object or a filename or (non-HTTPS) URL of a W3C XSD schema to the + constructor. The schema is then used to validate the + document as it is processed. + + + + ... + + the reader further supports various + parser options described in + (specifically those + labeled by /reader/). + + + + + + + + METHODS CONTROLLING PARSING PROGRESS + + + read () + + Moves the position to the next node in the stream, + exposing its properties. + Returns 1 if the node was read successfully, 0 if + there is no more nodes to read, or -1 in case of + error + + + + readAttributeValue () + + Parses an attribute value into one or more Text and + EntityReference nodes. + Returns 1 in case of success, 0 if the reader was not positioned on an attribute node or all the attribute values have been read, or -1 in case of error. + + + + readState () + + Gets the read state of the reader. Returns the state + value, or -1 in case of error. The module exports + constants for the Reader states, see STATES + below. + + + + depth () + + The depth of the node in the tree, starts at 0 for + the root node. + + + + next () + + Skip to the node following the current one in the + document order while avoiding the sub-tree if any. + Returns 1 if the node was read successfully, 0 if there + is no more nodes to read, or -1 in case of error. + + + + nextElement (localname?,nsURI?) + + Skip nodes following the current one in the document + order until a specific element is reached. The element's + name must be equal to a given localname if defined, and + its namespace must equal to a given nsURI if defined. + Either of the arguments can be undefined (or omitted, in + case of the latter or both). + Returns 1 if the element was found, 0 if there is no more nodes to read, or -1 in case of error. + + + + nextPatternMatch (compiled_pattern) + + Skip nodes following the current one in the document + order until an element matching a given + compiled pattern is reached. See + for information on + compiled patterns. See also the matchesPattern + method. + Returns 1 if the element was found, 0 if there is no more nodes to read, or -1 in case of error. + + + + skipSiblings () + + Skip all nodes on the same or lower level until the + first node on a higher level is reached. In particular, + if the current node occurs in an element, the reader + stops at the end tag of the parent element, otherwise it + stops at a node immediately following the parent + node. + Returns 1 if successful, 0 if end of the document is reached, or -1 in case of error. + + + + nextSibling () + + It skips to the node following the current one in + the document order while avoiding the sub-tree if + any. + Returns 1 if the node was read successfully, 0 if + there is no more nodes to read, or -1 in case of + error + + + + nextSiblingElement (name?,nsURI?) + + Like nextElement but only processes sibling elements + of the current node (moving forward using + nextSibling () rather than + read (), internally). + Returns 1 if the element was found, 0 if there is no + more sibling nodes, or -1 in case of error. + + + + finish () + + Skip all remaining nodes in the document, reaching end of the document. + Returns 1 if successful, 0 in case of error. + + + + close () + + This method releases any resources allocated by the + current instance and closes any underlying input. It + returns 0 on failure and 1 on success. This method is + automatically called by the destructor when the reader + is forgotten, therefore you do not have to call it + directly. + + + + + + METHODS EXTRACTING INFORMATION + + + name () + + Returns the qualified name of the current node, equal to (Prefix:)LocalName. + + + + nodeType () + + Returns the type of the current node. See NODE TYPES below. + + + + localName () + + Returns the local name of the node. + + + + prefix () + + Returns the prefix of the namespace associated with the node. + + + + namespaceURI () + + Returns the URI defining the namespace associated with the node. + + + + isEmptyElement () + + Check if the current node is empty, this is a bit + bizarre in the sense that <a/> will be considered + empty while <a></a> will not. + + + + hasValue () + + Returns true if the node can have a text value. + + + + value () + + Provides the text value of the node if present or undef if not available. + + + + readInnerXml () + + Reads the contents of the current node, including + child nodes and markup. Returns a string containing the + XML of the node's content, or undef if the current node + is neither an element nor attribute, or has no child + nodes. + + + + readOuterXml () + + Reads the contents of the current node, including + child nodes and markup. + Returns a string containing the XML of the node + including its content, or undef if the current node is + neither an element nor attribute. + + + + nodePath() + + Returns a canonical location path to the current element + from the root node to the current node. Namespaced + elements are matched by '*', because there is no way to declare + prefixes within XPath patterns. Unlike + XML::LibXML::Node::nodePath(), this function + does not provide sibling counts (i.e. instead of e.g. '/a/b[1]' and '/a/b[2]' + you get '/a/b' for both matches). + + + + + matchesPattern(compiled_pattern) + + Returns a true value if the current + node matches a compiled pattern. + See for information on + compiled patterns. See also the nextPatternMatch + method. + + + + + + METHODS EXTRACTING DOM NODES + + + document () + + Provides access to the document tree built by the + reader. This function can be used to collect the + preserved nodes (see preserveNode() + and preservePattern). + CAUTION: Never use this function to modify the tree + unless reading of the whole document is + completed! + + + + copyCurrentNode (deep) + + This function is similar a DOM function + copyNode(). It returns a copy of the + currently processed node as a corresponding DOM object. + Use deep = 1 to obtain the full sub-tree. + + + + preserveNode () + + This tells the XML Reader to preserve the current + node in the document tree. A document tree consisting of + the preserved nodes and their content can be obtained + using the method document() once + parsing is finished. + Returns the node or NULL in case of error. + + + + preservePattern (pattern,\%ns_map) + + This tells the XML Reader to preserve all nodes + matched by the pattern (which is a streaming XPath + subset). A document tree consisting of the preserved + nodes and their content can be obtained using the method + document() once parsing is + finished. + An optional second argument can be used to provide a + HASH reference mapping prefixes used by the XPath to + namespace URIs. + The XPath subset available with this function is + described at + http://www.w3.org/TR/xmlschema-1/#Selector + and matches the production + Path ::= ('.//')? ( Step '/' )* ( Step | '@' NameTest ) + Returns a positive number in case of success and -1 + in case of error + + + + + + METHODS PROCESSING ATTRIBUTES + + + attributeCount () + + Provides the number of attributes of the current + node. + + + + hasAttributes () + + Whether the node has attributes. + + + + getAttribute (name) + + Provides the value of the attribute with the + specified qualified name. + Returns a string containing the value of the + specified attribute, or undef in case of error. + + + + getAttributeNs (localName, namespaceURI) + + Provides the value of the specified + attribute. + Returns a string containing the value of the + specified attribute, or undef in case of error. + + + + getAttributeNo (no) + + Provides the value of the attribute with the + specified index relative to the containing + element. + Returns a string containing the value of the + specified attribute, or undef in case of error. + + + + isDefault () + + Returns true if the current attribute node was + generated from the default value defined in the + DTD. + + + + moveToAttribute (name) + + Moves the position to the attribute with the + specified local name and namespace URI. + Returns 1 in case of success, -1 in case of error, 0 + if not found + + + + moveToAttributeNo (no) + + Moves the position to the attribute with the + specified index relative to the containing + element. + Returns 1 in case of success, -1 in case of error, 0 + if not found + + + + moveToAttributeNs (localName,namespaceURI) + + Moves the position to the attribute with the + specified local name and namespace URI. + Returns 1 in case of success, -1 in case of error, 0 + if not found + + + + moveToFirstAttribute () + + Moves the position to the first attribute associated + with the current node. + Returns 1 in case of success, -1 in case of error, 0 + if not found + + + + moveToNextAttribute () + + Moves the position to the next attribute associated + with the current node. + Returns 1 in case of success, -1 in case of error, 0 + if not found + + + + moveToElement () + + Moves the position to the node that contains the + current attribute node. + Returns 1 in case of success, -1 in case of error, 0 + if not moved + + + + isNamespaceDecl () + + Determine whether the current node is a namespace + declaration rather than a regular attribute. + Returns 1 if the current node is a namespace + declaration, 0 if it is a regular attribute or other + type of node, or -1 in case of error. + + + + + + OTHER METHODS + + + lookupNamespace (prefix) + + Resolves a namespace prefix in the scope of the + current element. + Returns a string containing the namespace URI to + which the prefix maps or undef in case of error. + + + + encoding () + + Returns a string containing the encoding of the + document or undef in case of error. + + + + standalone () + + Determine the standalone status of the document + being read. Returns 1 if the document was declared to be + standalone, 0 if it was declared to be not standalone, + or -1 if the document did not specify its standalone + status or in case of error. + + + + xmlVersion () + + Determine the XML version of the document being + read. Returns a string containing the XML version of the + document or undef in case of error. + + + + baseURI () + + Returns the base URI of a given node. + + + + isValid () + + Retrieve the validity status from the parser. + Returns 1 if valid, 0 if no, and -1 in case of + error. + + + + xmlLang () + + The xml:lang scope within which the node + resides. + + + + lineNumber () + + Provide the line number of the current parsing + point. + + + + columnNumber () + + Provide the column number of the current parsing + point. + + + + byteConsumed () + + This function provides the current index of the + parser relative to the start of the current entity. This + function is computed in bytes from the beginning + starting at zero and finishing at the size in bytes of + the file if parsing a file. The function is of constant + cost if the input is UTF-8 but can be costly if run on + non-UTF-8 input. + + + + setParserProp (prop => value, ...) + + Change the parser processing behaviour by changing + some of its internal properties. The following + properties are available with this function: + ``load_ext_dtd'', ``complete_attributes'', + ``validation'', ``expand_entities''. + Since some of the properties can only be changed + before any read has been done, it is best to set the + parsing properties at the constructor. + Returns 0 if the call was successful, or -1 in case + of error + + + + getParserProp (prop) + + Get value of an parser internal property. The + following property names can be used: ``load_ext_dtd'', + ``complete_attributes'', ``validation'', + ``expand_entities''. + Returns the value, usually 0 or 1, or -1 in case of + error. + + + + + + DESTRUCTION + XML::LibXML takes care of the reader object destruction + when the last reference to the reader object goes out of + scope. The document tree is preserved, though, if either of + $reader->document or $reader->preserveNode was used and + references to the document tree exist. + + + NODE TYPES + The reader interface provides the following constants for + node types (the constant symbols are exported by default or if + tag :types is used). + XML_READER_TYPE_NONE => 0 +XML_READER_TYPE_ELEMENT => 1 +XML_READER_TYPE_ATTRIBUTE => 2 +XML_READER_TYPE_TEXT => 3 +XML_READER_TYPE_CDATA => 4 +XML_READER_TYPE_ENTITY_REFERENCE => 5 +XML_READER_TYPE_ENTITY => 6 +XML_READER_TYPE_PROCESSING_INSTRUCTION => 7 +XML_READER_TYPE_COMMENT => 8 +XML_READER_TYPE_DOCUMENT => 9 +XML_READER_TYPE_DOCUMENT_TYPE => 10 +XML_READER_TYPE_DOCUMENT_FRAGMENT => 11 +XML_READER_TYPE_NOTATION => 12 +XML_READER_TYPE_WHITESPACE => 13 +XML_READER_TYPE_SIGNIFICANT_WHITESPACE => 14 +XML_READER_TYPE_END_ELEMENT => 15 +XML_READER_TYPE_END_ENTITY => 16 +XML_READER_TYPE_XML_DECLARATION => 17 + + + + STATES + The following constants represent the values returned by + readState(). They are exported by default, + or if tag :states is used: + XML_READER_NONE => -1 +XML_READER_START => 0 +XML_READER_ELEMENT => 1 +XML_READER_END => 2 +XML_READER_EMPTY => 3 +XML_READER_BACKTRACK => 4 +XML_READER_DONE => 5 +XML_READER_ERROR => 6 + + + + SEE ALSO + for information about compiled patterns. + http://xmlsoft.org/html/libxml-xmlreader.html + http://dotgnu.org/pnetlib-doc/System/Xml/XmlTextReader.html + + + ORIGINAL IMPLEMENTATION + Heiko Klein, <H.Klein@gmx.net<gt> and Petr Pajas + + + + XML::LibXML::XPathExpression - interface to libxml2 pre-compiled XPath expressions + XML::LibXML::XPathExpression + + Synopsis + use XML::LibXML; +my $compiled_xpath = XML::LibXML::XPathExpression->new('//foo[@bar="baz"][position()<4]'); + +# interface from XML::LibXML::Node + +my $result = $node->find($compiled_xpath); +my @nodes = $node->findnodes($compiled_xpath); +my $value = $node->findvalue($compiled_xpath); + +# interface from XML::LibXML::XPathContext + +my $result = $xpc->find($compiled_xpath,$node); +my @nodes = $xpc->findnodes($compiled_xpath,$node); +my $value = $xpc->findvalue($compiled_xpath,$node); + + + + Description + This is a perl interface to libxml2's pre-compiled XPath expressions. + Pre-compiling an XPath expression can give in some performance + benefit if the same XPath query is evaluated many times. + XML::LibXML::XPathExpression objects + can be passed to all find... + functions XML::LibXML + that expect an XPath expression. + + + + new() + + + $compiled = XML::LibXML::XPathExpression->new( xpath_string ); + + The constructor takes an XPath 1.0 expression as a string + and returns an object representing the pre-compiled + expressions (the actual data structure is internal to libxml2). + + + + + + + + XML::LibXML::Pattern - interface to libxml2 XPath patterns + XML::LibXML::Pattern + + Synopsis + use XML::LibXML; +my $pattern = XML::LibXML::Pattern->new('/x:html/x:body//x:div', { 'x' => 'http://www.w3.org/1999/xhtml' }); +# test a match on an XML::LibXML::Node $node + +if ($pattern->matchesNode($node)) { ... } + +# or on an XML::LibXML::Reader + +if ($reader->matchesPattern($pattern)) { ... } + +# or skip reading all nodes that do not match + +print $reader->nodePath while $reader->nextPatternMatch($pattern); + + + + Description + This is a perl interface to libxml2's pattern matching support + http://xmlsoft.org/html/libxml-pattern.html. + This feature requires recent versions of libxml2. + Patterns are a small subset of XPath language, which is limited + to (disjunctions of) location paths involving the child and descendant axes in abbreviated form + as described by the extended BNF given below: + + Selector ::= Path ( '|' Path )* +Path ::= ('.//' | '//' | '/' )? Step ( '/' Step )* +Step ::= '.' | NameTest +NameTest ::= QName | '*' | NCName ':' '*' + For readability, whitespace may be used in selector XPath expressions even though not explicitly allowed by the grammar: whitespace may be freely added within patterns before or after any token, where + token ::= '.' | '/' | '//' | '|' | NameTest + Note that no predicates or attribute tests are allowed. + Patterns are particularly useful for stream parsing provided via the XML::LibXML::Reader interface. + + + new() + + + $pattern = XML::LibXML::Pattern->new( pattern, { prefix => namespace_URI, ... } ); + + The constructor of a pattern takes a pattern expression (as described + by the BNF grammar above) and an optional HASH reference mapping + prefixes to namespace URIs. The method returns a compiled pattern object. + + + Note that if the document + has a default namespace, it must still be given an prefix in order + to be matched (as demanded by the XPath 1.0 specification). For example, + to match an element <a xmlns="http://foo.bar"</a>, one + should use a pattern like this: + + $pattern = XML::LibXML::Pattern->new( 'foo:a', { foo => 'http://foo.bar' }); + + + + matchesNode($node) + + + $bool = $pattern->matchesNode($node); + + Given an XML::LibXML::Node object, returns a true value if + the node is matched by the compiled pattern expression. + + + + + + SEE ALSO + for other methods involving compiled patterns. + + + + XML::LibXML::RegExp - interface to libxml2 regular expressions + XML::LibXML::RegExp + + Synopsis + use XML::LibXML; +my $compiled_re = XML::LibXML::RegExp->new('[0-9]{5}(-[0-9]{4})?'); +if ($compiled_re->isDeterministic()) { ... } +if ($compiled_re->matches($string)) { ... } + + + + Description + This is a perl interface to libxml2's implementation of regular expressions, which are used e.g. for validation of XML Schema simple types (pattern facet). + + + new() + + + $compiled_re = XML::LibXML::RegExp->new( $regexp_str ); + + The constructor takes a string containing a regular expression + and returns a compiled regexp object. + + + + + matches($string) + + + $bool = $compiled_re->matches($string); + + Given a string value, returns a true value if + the value is matched by the compiled regular expression. + + + + isDeterministic() + + + $bool = $compiled_re->isDeterministic(); + + Returns a true value if the regular expression is deterministic; returns false otherwise. (See the definition of determinism in the XML spec) + + + + + + + A map for named nodes + + XML::LibXML::NamedNodeMap + + Synopsis + use XML::LibXML; +my $map = XML::LibXML::NamedNodeMap->new(@nodes); + +my $nodes_list = $map->nodes(); + +my $node_with_index_2 = $map->item(2); + + + + Description + + XML::LibXML::NamedNodeMap maps nodes' names to nodes. + + + Methods + + + + length + + + + my $length = $map->length; + + + Returns the number of nodes in the map. + + + + + nodes + + + + my $nodes_ref = $node->nodes() + + + Returns a reference to the list of nodes. + + + + + item + + + + my $node_2 = $map->item(2); + + + Returns the node with the index of the argument + (starting from 0) + + + + + getNamedItem + + + + my $node = $map->getNamedItem('phone_number'); + + + Returns the node with the name. + + + + + + setNamedItem + + + + $map->setNamedItem($new_node) + + + Sets the node with the same name as + $new_node to + $new_node. + + + + + removeNamedItem + + + + $map->removeNamedItem($name) + + + Remove the item with the name + $name. + + + + + getNamedItemNS + + + Not implemented yet.. + + + + setNamedItemNS + + + Not implemented yet.. + + + + removeNamedItemNS + + + Not implemented yet.. + + + + + + + Structured Errors + XML::LibXML::Error + + Synopsis + + eval { ... }; + if (ref($@)) { + # handle a structured error (XML::LibXML::Error object) + } elsif ($@) { + # error, but not an XML::LibXML::Error object + } else { + # no error + } + + + + Description + The + XML::LibXML::Error class is a tiny frontend to + libxml2's structured error support. If + XML::LibXML is compiled with structured error support, all errors + reported by libxml2 are transformed to XML::LibXML::Error + objects. These objects automatically serialize to the + corresponding error messages when printed or used in a string + operation, but as objects, can also be used to get a detailed and + structured information about the error that occurred. + + Unlike most other XML::LibXML objects, XML::LibXML::Error + doesn't wrap an underlying libxml2 + structure directly, but rather transforms it to a blessed Perl + hash reference containing the individual fields of the + structured error information as hash key-value pairs. Individual + items (fields) of a structured error can either be + obtained directly as $@->{field}, or using autoloaded + methods such as $@->field() (where field is the field + name). XML::LibXML::Error objects have the following fields: + domain, code, level, file, line, nodename, message, str1, str2, + str3, num1, num2, and _prev (some of them may be undefined). + + + + $XML::LibXML::Error::WARNINGS + + + $XML::LibXML::Error::WARNINGS=1; + + Traditionally, XML::LibXML was suppressing parser + warnings by setting libxml2's global variable + xmlGetWarningsDefaultValue to 0. Since + 1.70 we do not change libxml2's global + variables anymore; for backward compatibility, + XML::LibXML suppresses warnings. + This variable can be set to 1 + to enable reporting of these warnings via + Perl warn + and to 2 to report hem via die. + + + + + as_string + + + $message = $@->as_string(); + + This function serializes an XML::LibXML::Error + object to a string containing the full error message + close to the message produced by libxml2 default error + handlers and tools like xmllint. This method is also used + to overload "" operator on XML::LibXML::Error, so it is + automatically called whenever XML::LibXML::Error object + is treated as a string (e.g. in print $@). + + + + + dump + + + print $@->dump(); + + This function serializes an XML::LibXML::Error to a + string displaying all fields of the error structure + individually on separate lines of the form 'name' => 'value'. + + + + + domain + + + $error_domain = $@->domain(); + + Returns string containing information about what part + of the library raised the error. Can be one of: + "parser", "tree", "namespace", "validity", "HTML parser", + "memory", "output", "I/O", "ftp", "http", + "XInclude", "XPath", "xpointer", "regexp", "Schemas + datatype", + "Schemas parser", "Schemas validity", + "Relax-NG parser", "Relax-NG validity", "Catalog", + "C14N", "XSLT", "validity". + + + + + code + + + $error_code = $@->code(); + + Returns the actual libxml2 error code. + The XML::LibXML::ErrNo module defines + constants for individual error codes. Currently + libxml2 uses over 480 different error codes. + + + + + message + + + $error_message = $@->message(); + + Returns a human-readable informative error + message. + + + + level + + + $error_level = $@->level(); + + Returns an integer value describing how consequent is + the error. XML::LibXML::Error defines the following + constants: + + + + XML_ERR_NONE = 0 + + + XML_ERR_WARNING = 1 : A simple warning. + + + XML_ERR_ERROR = 2 : A recoverable error. + + + XML_ERR_FATAL = 3 : A fatal error. + + + + + + file + + + $filename = $@->file(); + + Returns the filename of the file being processed while + the error occurred. + + + + + line + + + $line = $@->line(); + + The line number, if available. + + + + nodename + + + $nodename = $@->nodename(); + + Name of the node where error occurred, if available. + When this field is non-empty, libxml2 actually returned a + physical pointer to the specified node. Due to memory + management issues, it is very difficult to implement a + way to expose the pointer to the Perl level as a + XML::LibXML::Node. For this reason, XML::LibXML::Error + currently only exposes the name the node. + + + + + str1 + + + $error_str1 = $@->str1(); + + Error specific. Extra string information. + + + + str2 + + + $error_str2 = $@->str2(); + + Error specific. Extra string information. + + + + str3 + + + $error_str3 = $@->str3(); + + Error specific. Extra string information. + + + + num1 + + + $error_num1 = $@->num1(); + + Error specific. Extra numeric information. + + + + num2 + + + $error_num2 = $@->num2(); + + In recent libxml2 versions, this + value contains a column number of the error or 0 if N/A. + + + + context + + + $string = $@->context(); + + For parsing errors, this field contains + about 80 characters of the XML near the place + where the error occurred. The field + $@->column() + contains the corresponding offset. + Where N/A, the field is undefined. + + + + + column + + + $offset = $@->column(); + + See $@->column() above. + + + + + _prev + + + $previous_error = $@->_prev(); + + This field can possibly hold a reference to another + XML::LibXML::Error object representing an error which + occurred just before this error. + + + + + + + Structured Errors + XML::LibXML::ErrNo + + Description + This module is based on xmlerror.h libxml2 C header file. + It defines symbolic constants for all libxml2 error codes. + Currently libxml2 uses over 480 different error codes. + See also XML::LibXML::Error. + + + + + Constants and Character Encoding Routines + XML::LibXML::Common + + Synopsis + use XML::LibXML::Common; + + + Description + + XML::LibXML::Common defines constants for all node types + and provides interface to libxml2 charset conversion + functions. + + Since XML::LibXML use their own node type definitions, + one may want to use XML::LibXML::Common in its compatibility + mode: + + + Exporter TAGS + use XML::LibXML::Common qw(:libxml); + :libxml tag will use the XML::LibXML Compatibility mode, which defines the + old 'XML_' node-type definitions. + use XML::LibXML::Common qw(:gdome); + :gdome tag will use the XML::GDOME Compatibility mode, which defines the + old 'GDOME_' node-type definitions. + use XML::LibXML::Common qw(:w3c); + This uses the nodetype definition names as specified for DOM. + use XML::LibXML::Common qw(:encoding); + + This tag can be used to export only the charset encoding functions of XML::LibXML::Common. + + + + Exports + + By default the W3 definitions as defined in the DOM specifications and + the encoding functions are exported by XML::LibXML::Common. + + + + Encoding functions + + To encode or decode a string to or from UTF-8, XML::LibXML::Common exports + two functions, which provide an interface to the encoding support in libxml2. + Which encodings are supported by these functions depends + on how libxml2 was compiled. UTF-16 is + always supported and on most installations, ISO encodings are + supported as well. + + + This interface was useful for older versions of Perl. + Since Perl >= 5.8 provides similar functions via the Encode module, + it is probably a good idea to use those instead. + + + + encodeToUTF8 + + + $encodedstring = encodeToUTF8( $name_of_encoding, $sting_to_encode ); + + The function will convert a byte string from the specified encoding to an UTF-8 encoded character string. + + + + decodeToUTF8 + + + $decodedstring = decodeFromUTF8($name_of_encoding, $string_to_decode ); + + + This function converts an UTF-8 encoded character string to a specified + encoding. Note that the conversion can raise an error if the + given string contains characters that cannot be represented in the target encoding. + + + + + Both these functions report their errors on the standard + error. If an error occurs the function will croak(). To catch + the error information it is required to call the encoding + function from within an eval block in order to prevent the + entire script from being stopped on encoding error. + + + A note on history + + Before XML::LibXML 1.70, this class was available as a + separate CPAN distribution, intended to provide functionality + shared between XML::LibXML, XML::GDOME, and possibly other + modules. Since there seems to be no progress in this + direction, we decided to merge XML::LibXML::Common 0.13 and + XML::LibXML 1.70 to one CPAN distribution. + + The merge also naturally eliminates a practical and + urgent problem experienced by many XML::LibXML users on certain + platforms, namely mysterious misbehavior of XML::LibXML + occurring if the installed (often pre-packaged) version of + XML::LibXML::Common was compiled against an older version of + libxml2 than XML::LibXML. + + + + + + + diff --git a/dom.c b/dom.c new file mode 100644 index 0000000..10eb33d --- /dev/null +++ b/dom.c @@ -0,0 +1,1325 @@ +/* $Id$ + * + * This is free software, you may use it and distribute it under the same terms as + * Perl itself. + * + * Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas +*/ + +#include "dom.h" +#include "perl-libxml-mm.h" + +/* #define warn(string) fprintf(stderr, string) */ + +#ifdef XS_WARNINGS +#define xs_warn(string) warn("%s",string) +#else +#define xs_warn(string) +#endif + +void +domClearPSVIInList(xmlNodePtr list); + +void +domClearPSVI(xmlNodePtr tree) { + xmlAttrPtr prop; + + if (tree == NULL) + return; + if (tree->type == XML_ELEMENT_NODE) { + tree->psvi = NULL; + prop = tree->properties; + while (prop != NULL) { + if (tree->type == XML_ATTRIBUTE_NODE) + ((xmlAttrPtr) prop)->psvi = NULL; + domClearPSVIInList(prop->children); + prop = prop->next; + } + } else if (tree->type == XML_DOCUMENT_NODE) { + ((xmlDocPtr) tree)->psvi = NULL; + } + if (tree->children != NULL) + domClearPSVIInList(tree->children); +} + +void +domClearPSVIInList(xmlNodePtr list) { + xmlNodePtr cur; + + if (list == NULL) + return; + cur = list; + while (cur != NULL) { + domClearPSVI(cur); + cur = cur->next; + } +} + +/** + * Name: domReconcileNs + * Synopsis: void domReconcileNs( xmlNodePtr tree ); + * @tree: the tree to reconcile + * + * Reconciles namespacing on a tree by removing declarations + * of element and attribute namespaces that are already + * declared in the scope of the corresponding node. + **/ + +void +domAddNsDef(xmlNodePtr tree, xmlNsPtr ns) +{ + xmlNsPtr i = tree->nsDef; + while(i != NULL && i != ns) + i = i->next; + if( i == NULL ) + { + ns->next = tree->nsDef; + tree->nsDef = ns; + } +} + +char +domRemoveNsDef(xmlNodePtr tree, xmlNsPtr ns) +{ + xmlNsPtr i = tree->nsDef; + + if( ns == tree->nsDef ) + { + tree->nsDef = tree->nsDef->next; + ns->next = NULL; + return(1); + } + while( i != NULL ) + { + if( i->next == ns ) + { + i->next = ns->next; + ns->next = NULL; + return(1); + } + i = i->next; + } + return(0); +} + +/* ns->next must be NULL, or bad things could happen */ +xmlNsPtr +_domAddNsChain(xmlNsPtr c, xmlNsPtr ns) +{ + if( c == NULL ) + return(ns); + else + { + xmlNsPtr i = c; + while(i != NULL && i != ns) + i = i->next; + if(i == NULL) + { + ns->next = c; + return(ns); + } + } + return(c); +} + +/* We need to be smarter with attributes, because the declaration is on the parent element */ +void +_domReconcileNsAttr(xmlAttrPtr attr, xmlNsPtr * unused) +{ + xmlNodePtr tree = attr->parent; + if (tree == NULL) + return; + if( attr->ns != NULL ) + { + xmlNsPtr ns; + if ((attr->ns->prefix != NULL) && + (xmlStrEqual(attr->ns->prefix, BAD_CAST "xml"))) { + /* prefix 'xml' has no visible declaration */ + ns = xmlSearchNsByHref(tree->doc, tree, XML_XML_NAMESPACE); + attr->ns = ns; + return; + } else { + ns = xmlSearchNs( tree->doc, tree->parent, attr->ns->prefix ); + } + if( ns != NULL && ns->href != NULL && attr->ns->href != NULL && + xmlStrcmp(ns->href,attr->ns->href) == 0 ) + { + /* Remove the declaration from the element */ + if( domRemoveNsDef(tree, attr->ns) ) + /* Queue up this namespace for freeing */ + *unused = _domAddNsChain(*unused, attr->ns); + + /* Replace the namespace with the one found */ + attr->ns = ns; + } + else + { + /* If the declaration is here, we don't need to do anything */ + if( domRemoveNsDef(tree, attr->ns) ) + domAddNsDef(tree, attr->ns); + else + { + /* Replace/Add the namespace declaration on the element */ + attr->ns = xmlCopyNamespace(attr->ns); + if (attr->ns) { + domAddNsDef(tree, attr->ns); + } + } + } + } +} + +void +_domReconcileNs(xmlNodePtr tree, xmlNsPtr * unused) +{ + if( tree->ns != NULL + && ((tree->type == XML_ELEMENT_NODE) + || (tree->type == XML_ATTRIBUTE_NODE))) + { + xmlNsPtr ns = xmlSearchNs( tree->doc, tree->parent, tree->ns->prefix ); + if( ns != NULL && ns->href != NULL && tree->ns->href != NULL && + xmlStrcmp(ns->href,tree->ns->href) == 0 ) + { + /* Remove the declaration (if present) */ + if( domRemoveNsDef(tree, tree->ns) ) + /* Queue the namespace for freeing */ + *unused = _domAddNsChain(*unused, tree->ns); + + /* Replace the namespace with the one found */ + tree->ns = ns; + } + else + { + /* If the declaration is here, we don't need to do anything */ + if( domRemoveNsDef(tree, tree->ns) ) { + domAddNsDef(tree, tree->ns); + } + else + { + /* Restart the namespace at this point */ + tree->ns = xmlCopyNamespace(tree->ns); + domAddNsDef(tree, tree->ns); + } + } + } + /* Fix attribute namespacing */ + if( tree->type == XML_ELEMENT_NODE ) + { + xmlElementPtr ele = (xmlElementPtr) tree; + /* attributes is set to xmlAttributePtr, + but is an xmlAttrPtr??? */ + xmlAttrPtr attr = (xmlAttrPtr) ele->attributes; + while( attr != NULL ) + { + _domReconcileNsAttr(attr, unused); + attr = attr->next; + } + } + { + /* Recurse through all child nodes */ + xmlNodePtr child = tree->children; + while( child != NULL ) + { + _domReconcileNs(child, unused); + child = child->next; + } + } +} + +void +domReconcileNs(xmlNodePtr tree) +{ + xmlNsPtr unused = NULL; + _domReconcileNs(tree, &unused); + if( unused != NULL ) + xmlFreeNsList(unused); +} + +/** + * NAME domParseChar + * TYPE function + * SYNOPSIS + * int utf8char = domParseChar( curchar, &len ); + * + * The current char value, if using UTF-8 this may actually span + * multiple bytes in the given string. This function parses an utf8 + * character from a string into a UTF8 character (an integer). It uses + * a slightly modified version of libxml2's character parser. libxml2 + * itself does not provide any function to parse characters dircetly + * from a string and test if they are valid utf8 characters. + * + * XML::LibXML uses this function rather than perls native UTF8 + * support for two reasons: + * 1) perls UTF8 handling functions often lead to encoding errors, + * which partly comes, that they are badly documented. + * 2) not all perl versions XML::LibXML intends to run with have native + * UTF8 support. + * + * domParseChar() allows to use the very same code with all versions + * of perl :) + * + * Returns the current char value and its length + * + * NOTE: If the character passed to this function is not a UTF + * character, the return value will be 0 and the length of the + * character is -1! + */ +int +domParseChar( xmlChar *cur, int *len ) +{ + unsigned char c; + unsigned int val; + + /* + * We are supposed to handle UTF8, check it's valid + * From rfc2044: encoding of the Unicode values on UTF-8: + * + * UCS-4 range (hex.) UTF-8 octet sequence (binary) + * 0000 0000-0000 007F 0xxxxxxx + * 0000 0080-0000 07FF 110xxxxx 10xxxxxx + * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx + * + * Check for the 0x110000 limit too + */ + + if ( cur == NULL || *cur == 0 ) { + *len = 0; + return(0); + } + + c = *cur; + if ( c & 0x80 ) { + if ((c & 0xe0) == 0xe0) { + if ((c & 0xf0) == 0xf0) { + /* 4-byte code */ + *len = 4; + val = (cur[0] & 0x7) << 18; + val |= (cur[1] & 0x3f) << 12; + val |= (cur[2] & 0x3f) << 6; + val |= cur[3] & 0x3f; + } else { + /* 3-byte code */ + *len = 3; + val = (cur[0] & 0xf) << 12; + val |= (cur[1] & 0x3f) << 6; + val |= cur[2] & 0x3f; + } + } else { + /* 2-byte code */ + *len = 2; + val = (cur[0] & 0x1f) << 6; + val |= cur[1] & 0x3f; + } + if ( !IS_CHAR(val) ) { + *len = -1; + return(0); + } + return(val); + } + else { + /* 1-byte code */ + *len = 1; + return((int)c); + } +} + +/** + * Name: domReadWellBalancedString + * Synopsis: xmlNodePtr domReadWellBalancedString( xmlDocPtr doc, xmlChar *string ) + * @doc: the document, the string should belong to + * @string: the string to parse + * + * this function is pretty neat, since you can read in well balanced + * strings and get a list of nodes, which can be added to any other node. + * (sure - this should return a doucment_fragment, but still it doesn't) + * + * the code is pretty heavy i think, but deep in my heard i believe it's + * worth it :) (e.g. if you like to read a chunk of well-balanced code + * from a databasefield) + * + * in 99% the cases i believe it is faster than to create the dom by hand, + * and skip the parsing job which has to be done here. + * + * the repair flag will not be recognized with the current libxml2 + **/ +xmlNodePtr +domReadWellBalancedString( xmlDocPtr doc, xmlChar* block, int repair ) { + int retCode = -1; + xmlNodePtr nodes = NULL; + + if ( block ) { + /* read and encode the chunk */ + retCode = xmlParseBalancedChunkMemory( doc, + NULL, + NULL, + 0, + block, + &nodes ); + +/* retCode = xmlParseBalancedChunkMemoryRecover( doc, */ +/* NULL, */ +/* NULL, */ +/* 0, */ +/* block, */ +/* &nodes, */ +/* repair ); */ + + /* error handling */ + if ( retCode != 0 && repair == 0 ) { + /* if the code was not well balanced, we will not return + * a bad node list, but we have to free the nodes */ + xmlFreeNodeList( nodes ); + nodes = NULL; + } + else { + xmlSetListDoc(nodes,doc); + } + } + + return nodes; +} + +/** + * internal helper: insert node to nodelist + * synopsis: xmlNodePtr insert_node_to_nodelist( leader, insertnode, followup ); + * while leader and followup are already list nodes. both may be NULL + * if leader is null the parents children will be reset + * if followup is null the parent last will be reset. + * leader and followup has to be followups in the nodelist!!! + * the function returns the node inserted. if a fragment was inserted, + * the first node of the list will returned + * + * i ran into a misconception here. there should be a normalization function + * for the DOM, so sequences of text nodes can get replaced by a single + * text node. as i see DOM Level 1 does not allow text node sequences, while + * Level 2 and 3 do. + **/ +int +domAddNodeToList(xmlNodePtr cur, xmlNodePtr leader, xmlNodePtr followup) +{ + xmlNodePtr c1 = NULL, c2 = NULL, p = NULL; + if ( cur ) { + c1 = c2 = cur; + if( leader ) { + p = leader->parent; + } + else if( followup ) { + p = followup->parent; + } + else { + return 0; /* can't insert */ + } + + if ( cur->type == XML_DOCUMENT_FRAG_NODE ) { + c1 = cur->children; + while ( c1 ){ + c1->parent = p; + c1 = c1->next; + } + c1 = cur->children; + c2 = cur->last; + cur->last = cur->children = NULL; + } + else { + cur->parent = p; + } + + if (c1 && c2 && c1!=leader) { + if ( leader ) { + leader->next = c1; + c1->prev = leader; + } + else if ( p ) { + p->children = c1; + } + + if ( followup ) { + followup->prev = c2; + c2->next = followup; + } + else if ( p ) { + p->last = c2; + } + } + return 1; + } + return 0; +} + +/** + * domIsParent tests, if testnode is parent of the reference + * node. this test is very important to avoid circular constructs in + * trees. if the ref is a parent of the cur node the + * function returns 1 (TRUE), otherwise 0 (FALSE). + **/ +int +domIsParent( xmlNodePtr cur, xmlNodePtr refNode ) { + xmlNodePtr helper = NULL; + + if ( cur == NULL || refNode == NULL) return 0; + if (refNode==cur) return 1; + if ( cur->doc != refNode->doc + || refNode->children == NULL + || cur->parent == (xmlNodePtr)cur->doc + || cur->parent == NULL ) { + return 0; + } + + if( refNode->type == XML_DOCUMENT_NODE ) { + return 1; + } + + helper= cur; + while ( helper && (xmlDocPtr) helper != cur->doc ) { + if( helper == refNode ) { + return 1; + } + helper = helper->parent; + } + + return 0; +} + +int +domTestHierarchy(xmlNodePtr cur, xmlNodePtr refNode) +{ + if ( !refNode || !cur ) { + return 0; + } + if (cur->type == XML_ATTRIBUTE_NODE) { + switch ( refNode->type ){ + case XML_TEXT_NODE: + case XML_ENTITY_REF_NODE: + return 1; + break; + default: + return 0; + break; + } + } + + switch ( refNode->type ){ + case XML_ATTRIBUTE_NODE: + case XML_DOCUMENT_NODE: + return 0; + break; + default: + break; + } + + if ( domIsParent( cur, refNode ) ) { + return 0; + } + + return 1; +} + +int +domTestDocument(xmlNodePtr cur, xmlNodePtr refNode) +{ + if ( cur->type == XML_DOCUMENT_NODE ) { + switch ( refNode->type ) { + case XML_ATTRIBUTE_NODE: + case XML_ELEMENT_NODE: + case XML_ENTITY_NODE: + case XML_ENTITY_REF_NODE: + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_NAMESPACE_DECL: + return 0; + break; + default: + break; + } + } + return 1; +} + +void +domUnlinkNode( xmlNodePtr node ) { + if ( node == NULL + || ( node->prev == NULL + && node->next == NULL + && node->parent == NULL ) ) { + return; + } + + if (node->type == XML_DTD_NODE) { + /* This clears the doc->intSubset pointer. */ + xmlUnlinkNode(node); + return; + } + + if ( node->prev != NULL ) { + node->prev->next = node->next; + } + + if ( node->next != NULL ) { + node->next->prev = node->prev; + } + + if ( node->parent != NULL ) { + if ( node == node->parent->last ) { + node->parent->last = node->prev; + } + + if ( node == node->parent->children ) { + node->parent->children = node->next; + } + } + + node->prev = NULL; + node->next = NULL; + node->parent = NULL; +} + +xmlNodePtr +domImportNode( xmlDocPtr doc, xmlNodePtr node, int move, int reconcileNS ) { + xmlNodePtr return_node = node; + + if ( move ) { + return_node = node; + domUnlinkNode( node ); + } + else { + if ( node->type == XML_DTD_NODE ) { + return_node = (xmlNodePtr) xmlCopyDtd((xmlDtdPtr) node); + } + else { + return_node = xmlDocCopyNode( node, doc, 1 ); + } + } + + + /* tell all children about the new boss */ + if ( node && node->doc != doc ) { + /* if the source document contained psvi, mark the current document as psvi tainted */ + if (PmmIsPSVITainted(node->doc)) + PmmInvalidatePSVI(doc); + xmlSetTreeDoc(return_node, doc); + } + + if ( reconcileNS && doc && return_node + && return_node->type != XML_ENTITY_REF_NODE ) { + domReconcileNs(return_node); + } + + return return_node; +} + +/** + * Name: domName + * Synopsis: string = domName( node ); + * + * domName returns the full name for the current node. + * If the node belongs to a namespace it returns the prefix and + * the local name. otherwise only the local name is returned. + **/ +xmlChar* +domName(xmlNodePtr node) { + const xmlChar *prefix = NULL; + const xmlChar *name = NULL; + xmlChar *qname = NULL; + + if ( node == NULL ) { + return NULL; + } + + switch ( node->type ) { + case XML_XINCLUDE_START : + case XML_XINCLUDE_END : + case XML_ENTITY_REF_NODE : + case XML_ENTITY_NODE : + case XML_DTD_NODE : + case XML_ENTITY_DECL : + case XML_DOCUMENT_TYPE_NODE : + case XML_PI_NODE : + case XML_NOTATION_NODE : + case XML_NAMESPACE_DECL : + name = node->name; + break; + + case XML_COMMENT_NODE : + name = (const xmlChar *) "#comment"; + break; + + case XML_CDATA_SECTION_NODE : + name = (const xmlChar *) "#cdata-section"; + break; + + case XML_TEXT_NODE : + name = (const xmlChar *) "#text"; + break; + + + case XML_DOCUMENT_NODE : + case XML_HTML_DOCUMENT_NODE : + case XML_DOCB_DOCUMENT_NODE : + name = (const xmlChar *) "#document"; + break; + + case XML_DOCUMENT_FRAG_NODE : + name = (const xmlChar *) "#document-fragment"; + break; + + case XML_ELEMENT_NODE : + case XML_ATTRIBUTE_NODE : + if ( node->ns != NULL ) { + prefix = node->ns->prefix; + } + name = node->name; + break; + + case XML_ELEMENT_DECL : + prefix = ((xmlElementPtr) node)->prefix; + name = node->name; + break; + + case XML_ATTRIBUTE_DECL : + prefix = ((xmlAttributePtr) node)->prefix; + name = node->name; + break; + } + + if ( prefix != NULL ) { + qname = xmlStrdup( prefix ); + qname = xmlStrcat( qname , (const xmlChar *) ":" ); + qname = xmlStrcat( qname , name ); + } + else { + qname = xmlStrdup( name ); + } + + return qname; +} + +/** + * Name: domAppendChild + * Synopsis: xmlNodePtr domAppendChild( xmlNodePtr par, xmlNodePtr newCld ); + * @par: the node to append to + * @newCld: the node to append + * + * Returns newCld on success otherwise NULL + * The function will unbind newCld first if nesseccary. As well the + * function will fail, if par or newCld is a Attribute Node OR if newCld + * is a parent of par. + * + * If newCld belongs to a different DOM the node will be imported + * implicit before it gets appended. + **/ +xmlNodePtr +domAppendChild( xmlNodePtr self, + xmlNodePtr newChild ){ + xmlNodePtr fragment = NULL; + if ( self == NULL ) { + return newChild; + } + + if ( !(domTestHierarchy(self, newChild) + && domTestDocument(self, newChild))){ + croak("appendChild: HIERARCHY_REQUEST_ERR\n"); + return NULL; + } + + if ( newChild->doc == self->doc ){ + domUnlinkNode( newChild ); + } + else { + xs_warn("WRONG_DOCUMENT_ERR - non conform implementation\n"); + /* xmlGenericError(xmlGenericErrorContext,"WRONG_DOCUMENT_ERR\n"); */ + newChild = domImportNode( self->doc, newChild, 1, 0 ); + } + + if ( self->children != NULL ) { + if (newChild->type == XML_DOCUMENT_FRAG_NODE ) + fragment = newChild->children; + domAddNodeToList( newChild, self->last, NULL ); + } + else if (newChild->type == XML_DOCUMENT_FRAG_NODE ) { + xmlNodePtr c1 = NULL; + self->children = newChild->children; + fragment = newChild->children; + c1 = fragment; + while ( c1 ){ + c1->parent = self; + c1 = c1->next; + } + self->last = newChild->last; + newChild->last = newChild->children = NULL; + } + else { + self->children = newChild; + self->last = newChild; + newChild->parent= self; + } + + if ( fragment ) { + /* we must reconcile all nodes in the fragment */ + newChild = fragment; /* return the first node in the fragment */ + while ( fragment ) { + domReconcileNs(fragment); + fragment = fragment->next; + } + } + else if ( newChild->type != XML_ENTITY_REF_NODE ) { + domReconcileNs(newChild); + } + + return newChild; +} + +xmlNodePtr +domRemoveChild( xmlNodePtr self, xmlNodePtr old ) { + if ( self == NULL || old == NULL ) { + return NULL; + } + if ( old->type == XML_ATTRIBUTE_NODE + || old->type == XML_NAMESPACE_DECL ) { + return NULL; + } + if ( self != old->parent ) { + /* not a child! */ + return NULL; + } + + domUnlinkNode( old ); + if ( old->type == XML_ELEMENT_NODE ) { + domReconcileNs( old ); + } + + return old ; +} + +xmlNodePtr +domReplaceChild( xmlNodePtr self, xmlNodePtr new, xmlNodePtr old ) { + xmlNodePtr fragment = NULL; + xmlNodePtr fragment_next = NULL; + if ( self== NULL ) + return NULL; + + if ( new == old ) + return NULL; + + if ( new == NULL ) { + /* level2 sais nothing about this case :( */ + return domRemoveChild( self, old ); + } + + if ( old == NULL ) { + domAppendChild( self, new ); + return old; + } + + if ( !(domTestHierarchy(self, new) + && domTestDocument(self, new))){ + croak("replaceChild: HIERARCHY_REQUEST_ERR\n"); + return NULL; + } + + if ( new->doc == self->doc ) { + domUnlinkNode( new ); + } + else { + /* WRONG_DOCUMENT_ERR - non conform implementation */ + new = domImportNode( self->doc, new, 1, 1 ); + } + + if( old == self->children && old == self->last ) { + domRemoveChild( self, old ); + domAppendChild( self, new ); + } + else if ( new->type == XML_DOCUMENT_FRAG_NODE + && new->children == NULL ) { + /* want to replace with an empty fragment, then remove ... */ + fragment = new->children; + fragment_next = old->next; + domRemoveChild( self, old ); + } + else { + domAddNodeToList(new, old->prev, old->next ); + old->parent = old->next = old->prev = NULL; + } + if ( fragment ) { + while ( fragment && fragment != fragment_next ) { + domReconcileNs(fragment); + fragment = fragment->next; + } + } else if ( new->type != XML_ENTITY_REF_NODE ) { + domReconcileNs(new); + } + + return old; +} + + +xmlNodePtr +domInsertBefore( xmlNodePtr self, + xmlNodePtr newChild, + xmlNodePtr refChild ){ + xmlNodePtr fragment = NULL; + if ( refChild == newChild ) { + return newChild; + } + + if ( self == NULL || newChild == NULL ) { + return NULL; + } + + if ( refChild != NULL ) { + if ( refChild->parent != self + || ( newChild->type == XML_DOCUMENT_FRAG_NODE + && newChild->children == NULL ) ) { + /* NOT_FOUND_ERR */ + xmlGenericError(xmlGenericErrorContext,"NOT_FOUND_ERR\n"); + return NULL; + } + } + + if ( self->children == NULL ) { + return domAppendChild( self, newChild ); + } + + if ( !(domTestHierarchy( self, newChild ) + && domTestDocument( self, newChild ))) { + croak("insertBefore/insertAfter: HIERARCHY_REQUEST_ERR\n"); + return NULL; + } + + if ( self->doc == newChild->doc ){ + domUnlinkNode( newChild ); + } + else { + newChild = domImportNode( self->doc, newChild, 1, 0 ); + } + + if ( newChild->type == XML_DOCUMENT_FRAG_NODE ) { + fragment = newChild->children; + } + if ( refChild == NULL ) { + domAddNodeToList(newChild, self->last, NULL); + } + else { + domAddNodeToList(newChild, refChild->prev, refChild); + } + + if ( fragment ) { + newChild = fragment; /* return the first node in the fragment */ + while ( fragment && fragment != refChild ) { + domReconcileNs(fragment); + fragment = fragment->next; + } + } else if ( newChild->type != XML_ENTITY_REF_NODE ) { + domReconcileNs(newChild); + } + + return newChild; +} + +/* + * this function does not exist in the spec although it's useful + */ +xmlNodePtr +domInsertAfter( xmlNodePtr self, + xmlNodePtr newChild, + xmlNodePtr refChild ){ + if ( refChild == NULL ) { + return domInsertBefore( self, newChild, NULL ); + } + return domInsertBefore( self, newChild, refChild->next ); +} + +xmlNodePtr +domReplaceNode( xmlNodePtr oldNode, xmlNodePtr newNode ) { + xmlNodePtr prev = NULL, next = NULL, par = NULL, fragment = NULL; + + if ( oldNode == NULL + || newNode == NULL ) { + /* NOT_FOUND_ERROR */ + return NULL; + } + + if ( oldNode->type == XML_ATTRIBUTE_NODE + || newNode->type == XML_ATTRIBUTE_NODE + || newNode->type == XML_DOCUMENT_NODE + || domIsParent( newNode, oldNode ) ) { + /* HIERARCHY_REQUEST_ERR + * wrong node type + * new node is parent of itself + */ + croak("replaceNode: HIERARCHY_REQUEST_ERR\n"); + return NULL; + } + + par = oldNode->parent; + prev = oldNode->prev; + next = oldNode->next; + + if ( oldNode->_private == NULL ) { + xmlUnlinkNode( oldNode ); + } + else { + domUnlinkNode( oldNode ); + } + + if ( newNode->type == XML_DOCUMENT_FRAG_NODE ) { + fragment = newNode->children; + } + if( prev == NULL && next == NULL ) { + /* oldNode was the only child */ + domAppendChild( par , newNode ); + } + else { + domAddNodeToList( newNode, prev, next ); + } + + if ( fragment ) { + while ( fragment && fragment != next ) { + domReconcileNs(fragment); + fragment = fragment->next; + } + } else if ( newNode->type != XML_ENTITY_REF_NODE ) { + domReconcileNs(newNode); + } + + return oldNode; +} + +xmlChar* +domGetNodeValue( xmlNodePtr n ) { + xmlChar * retval = NULL; + if( n != NULL ) { + switch ( n->type ) { + case XML_ATTRIBUTE_NODE: + case XML_ENTITY_DECL: + case XML_TEXT_NODE: + case XML_COMMENT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_PI_NODE: + case XML_ENTITY_REF_NODE: + break; + default: + return retval; + break; + } + if ( n->type != XML_ENTITY_DECL ) { + retval = xmlXPathCastNodeToString(n); + } + else { + if ( n->content != NULL ) { + xs_warn(" dublicate content\n" ); + retval = xmlStrdup(n->content); + } + else if ( n->children != NULL ) { + xmlNodePtr cnode = n->children; + xs_warn(" use child content\n" ); + /* ok then toString in this case ... */ + while (cnode) { + xmlBufferPtr buffer = xmlBufferCreate(); + /* buffer = xmlBufferCreate(); */ + xmlNodeDump( buffer, n->doc, cnode, 0, 0 ); + if ( buffer->content != NULL ) { + xs_warn( "add item" ); + if ( retval != NULL ) { + retval = xmlStrcat( retval, buffer->content ); + } + else { + retval = xmlStrdup( buffer->content ); + } + } + xmlBufferFree( buffer ); + cnode = cnode->next; + } + } + } + } + + return retval; +} + +void +domSetNodeValue( xmlNodePtr n , xmlChar* val ){ + if ( n == NULL ) + return; + if ( val == NULL ){ + val = (xmlChar *) ""; + } + + if( n->type == XML_ATTRIBUTE_NODE ){ + /* can't use xmlNodeSetContent - for Attrs it parses entities */ + if ( n->children != NULL ) { + n->last = NULL; + xmlFreeNodeList( n->children ); + } + n->children = xmlNewText( val ); + n->children->parent = n; + n->children->doc = n->doc; + n->last = n->children; + } + else { + xmlNodeSetContent( n, val ); + } +} + + +xmlNodeSetPtr +domGetElementsByTagName( xmlNodePtr n, xmlChar* name ){ + xmlNodeSetPtr rv = NULL; + xmlNodePtr cld = NULL; + + if ( n != NULL && name != NULL ) { + cld = n->children; + while ( cld != NULL ) { + if ( xmlStrcmp( name, cld->name ) == 0 ){ + if ( rv == NULL ) { + rv = xmlXPathNodeSetCreate( cld ) ; + } + else { + xmlXPathNodeSetAdd( rv, cld ); + } + } + cld = cld->next; + } + } + + return rv; +} + + +xmlNodeSetPtr +domGetElementsByTagNameNS( xmlNodePtr n, xmlChar* nsURI, xmlChar* name ){ + xmlNodeSetPtr rv = NULL; + + if ( nsURI == NULL ) { + return domGetElementsByTagName( n, name ); + } + + if ( n != NULL && name != NULL ) { + xmlNodePtr cld = n->children; + while ( cld != NULL ) { + if ( xmlStrcmp( name, cld->name ) == 0 + && cld->ns != NULL + && xmlStrcmp( nsURI, cld->ns->href ) == 0 ){ + if ( rv == NULL ) { + rv = xmlXPathNodeSetCreate( cld ) ; + } + else { + xmlXPathNodeSetAdd( rv, cld ); + } + } + cld = cld->next; + } + } + + return rv; +} + +xmlNsPtr +domNewNs ( xmlNodePtr elem , xmlChar *prefix, xmlChar *href ) { + xmlNsPtr ns = NULL; + + if (elem != NULL) { + ns = xmlSearchNs( elem->doc, elem, prefix ); + } + /* prefix is not in use */ + if (ns == NULL) { + ns = xmlNewNs( elem , href , prefix ); + } else { + /* prefix is in use; if it has same URI, let it go, otherwise it's + an error */ + if (!xmlStrEqual(href, ns->href)) { + ns = NULL; + } + } + return ns; +} + +xmlAttrPtr +domGetAttrNode(xmlNodePtr node, const xmlChar *qname) { + xmlChar * prefix = NULL; + xmlChar * localname = NULL; + xmlAttrPtr ret = NULL; + xmlNsPtr ns = NULL; + + if ( qname == NULL || node == NULL ) + return NULL; + + /* first try qname without namespace */ + ret = xmlHasNsProp(node, qname, NULL); + if ( ret == NULL ) { + localname = xmlSplitQName2(qname, &prefix); + if ( localname != NULL ) { + ns = xmlSearchNs( node->doc, node, prefix ); + if ( ns != NULL ) { + /* then try localname with the namespace bound to prefix */ + ret = xmlHasNsProp( node, localname, ns->href ); + } + if ( prefix != NULL) { + xmlFree( prefix ); + } + xmlFree( localname ); + } + } + if (ret && ret->type != XML_ATTRIBUTE_NODE) { + return NULL; /* we don't want fixed attribute decls */ + } + else { + return ret; + } +} + +xmlAttrPtr +domSetAttributeNode( xmlNodePtr node, xmlAttrPtr attr ) { + if ( node == NULL || attr == NULL ) { + return attr; + } + if ( attr != NULL && attr->type != XML_ATTRIBUTE_NODE ) + return NULL; + if ( node == attr->parent ) { + return attr; /* attribute is already part of the node */ + } + if ( attr->doc != node->doc ){ + attr = (xmlAttrPtr) domImportNode( node->doc, (xmlNodePtr) attr, 1, 1 ); + } + else { + xmlUnlinkNode( (xmlNodePtr) attr ); + } + + /* stolen from libxml2 */ + if ( attr != NULL ) { + if (node->properties == NULL) { + node->properties = attr; + } else { + xmlAttrPtr prev = node->properties; + + while (prev->next != NULL) prev = prev->next; + prev->next = attr; + attr->prev = prev; + } + } + + return attr; +} + +void +domAttrSerializeContent(xmlBufferPtr buffer, xmlAttrPtr attr) +{ + xmlNodePtr children; + + children = attr->children; + while (children != NULL) { + switch (children->type) { + case XML_TEXT_NODE: + xmlAttrSerializeTxtContent(buffer, attr->doc, + attr, children->content); + break; + case XML_ENTITY_REF_NODE: + xmlBufferAdd(buffer, BAD_CAST "&", 1); + xmlBufferAdd(buffer, children->name, + xmlStrlen(children->name)); + xmlBufferAdd(buffer, BAD_CAST ";", 1); + break; + default: + /* should not happen unless we have a badly built tree */ + break; + } + children = children->next; + } +} + + +int +domNodeNormalize( xmlNodePtr node ); + +int +domNodeNormalizeList( xmlNodePtr nodelist ) +{ + while ( nodelist ){ + if ( domNodeNormalize( nodelist ) == 0 ) + return(0); + nodelist = nodelist->next; + } + return(1); +} + +int +domNodeNormalize( xmlNodePtr node ) +{ + xmlNodePtr next = NULL; + + if ( node == NULL ) + return(0); + + switch ( node->type ) { + case XML_TEXT_NODE: + while ( node->next + && node->next->type == XML_TEXT_NODE ) { + next = node->next; + xmlNodeAddContent(node, next->content); + xmlUnlinkNode( next ); + + /** + * keep only nodes that are referred by perl (or GDOME) + */ + if ( !next->_private ) + xmlFreeNode( next ); + } + break; + case XML_ELEMENT_NODE: + domNodeNormalizeList( (xmlNodePtr) node->properties ); + case XML_ATTRIBUTE_NODE: + case XML_DOCUMENT_NODE: + return( domNodeNormalizeList( node->children ) ); + break; + default: + break; + } + return(1); +} + +int +domRemoveNsRefs(xmlNodePtr tree, xmlNsPtr ns) { + xmlAttrPtr attr; + xmlNodePtr node = tree; + + if ((node == NULL) || (node->type != XML_ELEMENT_NODE)) return(0); + while (node != NULL) { + if (node->ns == ns) + node->ns = NULL; /* remove namespace reference */ + attr = node->properties; + while (attr != NULL) { + if (attr->ns == ns) + attr->ns = NULL; /* remove namespace reference */ + attr = attr->next; + } + /* + * Browse the full subtree, deep first + */ + if (node->children != NULL && node->type != XML_ENTITY_REF_NODE) { + /* deep first */ + node = node->children; + } else if ((node != tree) && (node->next != NULL)) { + /* then siblings */ + node = node->next; + } else if (node != tree) { + /* go up to parents->next if needed */ + while (node != tree) { + if (node->parent != NULL) + node = node->parent; + if ((node != tree) && (node->next != NULL)) { + node = node->next; + break; + } + if (node->parent == NULL) { + node = NULL; + break; + } + } + /* exit condition */ + if (node == tree) + node = NULL; + } else + break; + } + return(1); +} + diff --git a/dom.h b/dom.h new file mode 100644 index 0000000..29cf2b5 --- /dev/null +++ b/dom.h @@ -0,0 +1,281 @@ +/* dom.h + * $Id$ + * Author: Christian Glahn (2001) + * + * This header file provides some definitions for wrapper functions. + * These functions hide most of libxml2 code, and should make the + * code in the XS file more readable . + * + * The Functions are sorted in four parts: + * part 0 ..... general wrapper functions which do not belong + * to any of the other parts and not specified in DOM. + * part A ..... wrapper functions for general nodeaccess + * part B ..... document wrapper + * part C ..... element wrapper + * + * I did not implement any Text, CDATASection or comment wrapper functions, + * since it is pretty straightforeward to access these nodes. + */ + +#ifndef __LIBXML_DOM_H__ +#define __LIBXML_DOM_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "EXTERN.h" +#include "perl.h" +#include "XSUB.h" +#include "ppport.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +} +#endif + +/** + * part 0: + * + * unsortet. + **/ + +void +domReconcileNs(xmlNodePtr tree); + +/** + * NAME domParseChar + * TYPE function + * SYNOPSIS + * int utf8char = domParseChar( curchar, &len ); + * + * The current char value, if using UTF-8 this may actually span + * multiple bytes in the given string. This function parses an utf8 + * character from a string into a UTF8 character (an integer). It uses + * a slightly modified version of libxml2's character parser. libxml2 + * itself does not provide any function to parse characters dircetly + * from a string and test if they are valid utf8 characters. + * + * XML::LibXML uses this function rather than perls native UTF8 + * support for two reasons: + * 1) perls UTF8 handling functions often lead to encoding errors, + * which partly comes, that they are badly documented. + * 2) not all perl versions XML::LibXML intends to run with have native + * UTF8 support. + * + * domParseChar() allows to use the very same code with all versions + * of perl :) + * + * Returns the current char value and its length + * + * NOTE: If the character passed to this function is not a UTF + * character, the return value will be 0 and the length of the + * character is -1! + */ +int +domParseChar( xmlChar *characters, int *len ); + +xmlNodePtr +domReadWellBalancedString( xmlDocPtr doc, xmlChar* string, int repair ); + +/** + * NAME domIsParent + * TYPE function + * + * tests if a node is an ancestor of another node + * + * SYNOPSIS + * if ( domIsParent(cur, ref) ) ... + * + * this function is very useful to resolve if an operation would cause + * circular references. + * + * the function returns 1 if the ref node is a parent of the cur node. + */ +int +domIsParent( xmlNodePtr cur, xmlNodePtr ref ); + +/** + * NAME domTestHierarchy + * TYPE function + * + * tests the general hierarchy error + * + * SYNOPSIS + * if ( domTestHierarchy(cur, ref) ) ... + * + * this function tests the general hierarchy error. + * it tests if the ref node would cause any hierarchical error for + * cur node. the function evaluates domIsParent() internally. + * + * the function will retrun 1 if there is no hierarchical error found. + * otherwise it returns 0. + */ +int +domTestHierarchy( xmlNodePtr cur, xmlNodePtr ref ); + +/** +* NAME domTestDocument +* TYPE function +* SYNOPSIS +* if ( domTestDocument(cur, ref) )... +* +* this function extends the domTestHierarchy() function. It tests if the +* cur node is a document and if so, it will check if the ref node can be +* inserted. (e.g. Attribute or Element nodes must not be appended to a +* document node) +*/ +int +domTestDocument( xmlNodePtr cur, xmlNodePtr ref ); + +/** +* NAME domAddNodeToList +* TYPE function +* SYNOPSIS +* domAddNodeToList( cur, prevNode, nextNode ) +* +* This function inserts a node between the two nodes prevNode +* and nextNode. prevNode and nextNode MUST be adjacent nodes, +* otherwise the function leads into undefined states. +* Either prevNode or nextNode can be NULL to mark, that the +* node has to be inserted to the beginning or the end of the +* nodelist. in such case the given reference node has to be +* first or the last node in the list. +* +* if prevNode is the same node as cur node (or in case of a +* Fragment its first child) only the parent information will +* get updated. +* +* The function behaves different to libxml2's list functions. +* The function is aware about document fragments. +* the function does not perform any text node normalization! +* +* NOTE: this function does not perform any highlevel +* errorhandling. use this function with caution, since it can +* lead into undefined states. +* +* the function will return 1 if the cur node is appended to +* the list. otherwise the function returns 0. +*/ +int +domAddNodeToList( xmlNodePtr cur, xmlNodePtr prev, xmlNodePtr next ); + +/** + * part A: + * + * class Node + **/ + +/* A.1 DOM specified section */ + +xmlChar * +domName( xmlNodePtr node ); + +void +domSetName( xmlNodePtr node, xmlChar* name ); + +xmlNodePtr +domAppendChild( xmlNodePtr self, + xmlNodePtr newChild ); +xmlNodePtr +domReplaceChild( xmlNodePtr self, + xmlNodePtr newChlid, + xmlNodePtr oldChild ); +xmlNodePtr +domRemoveChild( xmlNodePtr self, + xmlNodePtr Child ); +xmlNodePtr +domInsertBefore( xmlNodePtr self, + xmlNodePtr newChild, + xmlNodePtr refChild ); + +xmlNodePtr +domInsertAfter( xmlNodePtr self, + xmlNodePtr newChild, + xmlNodePtr refChild ); + +/* A.3 extra functionality not specified in DOM L1/2*/ +xmlChar* +domGetNodeValue( xmlNodePtr self ); + +void +domSetNodeValue( xmlNodePtr self, xmlChar* value ); + +xmlNodePtr +domReplaceNode( xmlNodePtr old, xmlNodePtr new ); + +/** + * part B: + * + * class Document + **/ + +/** + * NAME domImportNode + * TYPE function + * SYNOPSIS + * node = domImportNode( document, node, move, reconcileNS); + * + * the function will import a node to the given document. it will work safe + * with namespaces and subtrees. + * + * if move is set to 1, then the node will be entirely removed from its + * original document. if move is set to 0, the node will be copied with the + * deep option. + * + * if reconcileNS is 1, namespaces are reconciled. + * + * the function will return the imported node on success. otherwise NULL + * is returned + */ +xmlNodePtr +domImportNode( xmlDocPtr document, xmlNodePtr node, int move, int reconcileNS ); + +/** + * part C: + * + * class Element + **/ + +xmlNodeSetPtr +domGetElementsByTagName( xmlNodePtr self, xmlChar* name ); + +xmlNodeSetPtr +domGetElementsByTagNameNS( xmlNodePtr self, xmlChar* nsURI, xmlChar* name ); + +xmlNsPtr +domNewNs ( xmlNodePtr elem , xmlChar *prefix, xmlChar *href ); + +xmlAttrPtr +domGetAttrNode(xmlNodePtr node, const xmlChar *qname); + +xmlAttrPtr +domSetAttributeNode( xmlNodePtr node , xmlAttrPtr attr ); + +int +domNodeNormalize( xmlNodePtr node ); + +int +domNodeNormalizeList( xmlNodePtr nodelist ); + +int +domRemoveNsRefs(xmlNodePtr tree, xmlNsPtr ns); + +void +domAttrSerializeContent(xmlBufferPtr buffer, xmlAttrPtr attr); + +void +domClearPSVI(xmlNodePtr tree); + +#endif diff --git a/example/JBR-ALLENtrees.htm b/example/JBR-ALLENtrees.htm new file mode 100644 index 0000000..98de95d --- /dev/null +++ b/example/JBR-ALLENtrees.htm @@ -0,0 +1,601 @@ + + + + + + ALLEN Descendancies + + + + + +
+ +
+

The ALLEN Patrilineage Descendancies

+ +
+ + +
+

Posted Patrilineage Pedigrees & Descendancies

+
+

+The following ancestral ALLEN descendancies have been contributed by researchers of this ALLEN patrilineage, and are generally substantiated with +evidence. Each descendancy begins with the earliest known male ancestor of a particular sublineange and continues down to the tested male descendant. +Since this DNA patrilineage project is focused on tested or testable male ALLENs, these descendancy trees have been pruned not only of daughters, but +also of male lines which are known to have gone extinct or “daughtered out”. However, in some instances complete reconstructed families of +the first generation or two will be included because of their broad-based genealogical interest; in such cases males known or presumed to have died +without children will be flagged “no known issue”, or “(NKI)”.

+ +

+The information provided for each male ALLEN should be sufficient in most cases to uniquely identify him in the USCensus and other readily available +sources. These data comprise (insofar as is known): date and place of birth, date and place of death, the name(s) of his wife (or wives) and the +date and place of marriage. Indefinite dates are always qualified as either approximate (“abt”, “bef”, “aft”, or +“by”) or merely estimated (“say”). Approximated dates imply supporting evidence which merely fails of complete accuracy, while +“say” dates are guesstimates based on typical patterns of the time, place, and social group. In some places, I have adjusted dates provided +by the sources to conform to these conventions, taking into consideration the accompanying evidence.

+ +

+The yDNA-tested male descendants are flagged below with their Project #s and the “handle” of the Principal Researcher, e.g.  +Donald-05, Camilla-06).

+ +

+Inferences about the placement of the distinctive yDNA mutations of project members have been interwoven with their descendancies, below, in red; please note, however, the careful qualifications where they appear. Most inferences drawn from DNA evidence +are probabilistic in nature and one needs to keep an open mind about alternative interpretations, just as one does with the genealogy itself.

+ + +

invisible writing

+

Early ALLENs—Possible Project Ancestors

+

invisible writing

+

1-William of York County, Virginia, died about 1677

+

(SourcesHT ALLEN)

+

1--William Allen (- bef 12Nov1677 YorkCoVA)  m. Judith

+

|--2-Hudson Allen [no known descendants]

+

|--2-William Allen [supposed to be father of William of New Kent … Albemarle Cos, below]

+

invisible writing

+ +

Known ALLEN Project Sublineages

+ +

invisible writing

+

1-Robert Allen of New Kent, Hanover, Goochland, and Henrico Counties, VA, born say 1680

+

(Sources: This Robert descendancy is the main focus of MILLER, +and except for the final generations connecting to the present, has been derived from MILLER by John B. Robb. + It should be noted, however, that Melba Allen of Mississippi has an entirely different reconstruction of the top of this Allen tree.   Donna Bailey, who is a descendant of this line through Robert->Robert->Josiah of South Carolina, has furnished additional +material for her more recent ancestors.

+ +

1-Robert Allen (say 1680 - abt 1756 HenricoCoVA)

+

The mutation DYS447- probably appears early downstream of the patriarch Robert.

+

|--m1. say 1708 Elizabeth [WALKER?]

+

|--2-Robert Allen (say 1709 NewKentCoVA - abt 1783 CaswellCoNC)

+

|   |--m1. Elizabeth [YOUNG??]

+

|   |--3-Young Allen (by 1732 - 1774 WakeCoNC)

+

|   |   |--m. Martha [COLEMAN?]

+

|   |   |   MILLER adds sons Robert & Drury citing Young’s will +but that document lists only the sons below;

+

|   |   |   the bogus Drury was supposedly the progenitor of one of the IredellCoNC Allen families.

+

|   |   |--4-William Allen (say 1757 - after 1791)—no known issue

+

|   |   |--4-Coleman Allen (say 1760 - [GA?])—Colemans & other descendants turn up in GA

+

|   |   |--4-Robin Allen (say 1763 -)—no known issue

+

|   |   |--4-Young Allen (say 1766 -)—no known issue

+

|   |   |--4-Josiah Allen (say 1766 - 1823 AnsonCoNC)

+

|   |   |   |--m. 1794 Nancy WHITE, GranvilleCoNC—Sons William, Joshua, Young, & Joseph

+

|   |   |--4-David Allen (abt 1771 -)—removed to AnsonCoNC; no known issue

+

+ |   |--3-Josiah Allen (abt 1733 - 1781 EdgefieldCoSC)

+

|   |   |--m1. say 1757 _?_

+

|   |   |--4-James Allen (abt 1758 - 1826 MontgomeryCoAL)

+

|   |   |   |--m1. _?_

+

|   |   |   |--5-Lemuel Young Allen (abt 1797 EdgefieldCoSC - abt 1851 RuskCoTX)

+

|   |   |   |--m2. Rebecca HUDSON

+

|   |   |   |--5- ...who had sons James Jefferson, Alsey, Madison, and Josiah (all minors in 1826)

+

|   |   |--4-Robert Allen (abt 1760 - 1829 MontgomeryCoAL)

+

|   |   |--m. Nancy HAHM, in SC—Sons Dean, Wade Hampton, James, Robert, George

+

|   |   |--4-Josiah Allen (abt 1762 - 1796 EdgefieldCoSC)

+

|   |   |--m. Jennet—Sons William Winn, and Benjamin

+

|   |   |--4-Young Allen (abt 1764 - abt 1834 EdgefieldCoSC)

+

|   |   |--m. Susanna RICHARDSON— Son Aaron

+

|   |   |--4-Drury Allen (1777 EdgefieldCoSC - btw 26Apr1856 and 4Feb1857 ClarkeCoAL)

+

|   |   |--m1. abt 1799 [Margaret WAITE], EdgefieldCoSC

+

|   |   |   |--5-Josiah Allen 1800 EdgefieldCoSC - 1891 JasperCoMS)

+

|   |   |   |   |--m. abt 1833 Elizabeth HOWELL, RankinCoMS

+

|   |   |   |--5-Henry Allen abt 1801 EdgefieldCoSC - 1891 JasperCoMS)

+

|   |   |   |   |--m2. Sarah LATHAM

+

|   |   |   |--5-Drury Allen 1806 EdgefieldCoSC - 1908 ErathCoTX)

+

|   |   |   |   |--m1. 1826 Elizabeth BUCKELEW, ClarkeCoAL

+

|   |   |   |   |--m2. 1841 Elizabeth (_?_) WOOD, ClarkeCoAL

+

|   |   |   |   |--6-George Washington Allen (1851 ClarkeCoAL - 1943 ErathCoTX)

+

|   |   |   |   |   |--m. 1869 Sarah Elizabeth JORDAN, ClarkeCoAL

+

|   |   |   |   |   |--7-Henry Hampton Allen (1874 ClarkeCoAL - 1964 HoodCoTX)

+

|   |   |   |   |   |   |--m. 1892 Ida Leona Batts

+

|   |   |   |   |   |   |--8-Boyd Earl Allen (1910 ErathCoTX - 1990 ErathCoTX)

+

|   |   |   |   |   |   |   |--9-Danny Ray Allen     + *** Donna-08 ***     (has the DYS447- mutation)

+

|   |   |--m2. 1821 Nanch/Fannie WEBB, ClarkeCoAL

+

|   |--3-Robert Allen (abt 1735 - 1801 RichmondCoGA)

+

|   |        Robert settled initially with brother Josiah in old ColletonCoSC (now extinct)

+

|   |   |--m. abt 1760 Elizabeth WEST—Sons West, Jesse, Robert, Drury, Young, William

+

|   |        The wide gap between 3-Robert & 3-Drury suggests that there may have been 2 fathers Robert;

+

|   |        there is solid evidence at least that 3-Drury was the son of 2-Robert who died in CaswellCoNC..

+

|   |--3-Drury Allen (1749 LunenburgCoVA - 1826 PikeCoGA)

+

|   |   |--m. Elizabeth YARBOROUGH

+

|   |   |--4-Josiah Allen (abt 1769 [ButeCoNC?] - abt 1816 GreeneCoGA)

+

|   |   |   |--m. 1791 Elizabeth BROWNING—Sons Robert A., Young Drury, and Pleasant Josiah

+

|   |   |--4-Clement Allen (say 1773 - bef 1823 PikeCoGA

+

|   |   |   |--m. Nancy McKISSICK—Sons Josiah, Young Stokes, John Cunningham, Drury, and James.

+

|   |   |--4-Stokes Allen (say 1775 - by 1831 PikeCoGA

+

|   |   |   |--m. Susanna (GRAVES) FOUSHEE—Sons Clement Young, Thomas G., Stephen W., Young Drewry

+

|   |--m2. by 1765 Hannah (EDWARDS) HUDSON

+

|--2-Benjamin Allen (twin) (1711 NewKentCoVA - aft 1766 [HenricoCoVA?])—no known issue

+

|--2-Joseph Allen (twin) (1711 NewKentCoVA - 1771 CharlotteCoVA)

+

|   |--m. Sarah HUNT

+

|   |--3-Memucan Hunt Allen (1753 - abt 1833 AndersonCoKY)—sons Joseph and Charles

+

|   |--3-Memucan had brothers Benjamin, Robert, and Charles; the latter came with him to KY.

+

|--2-Drury Allen (1714 NewKentCoVA - abt 1803 PersonCoNC)

+

|   |--m. Sarah TINSLEY

+

|   |--3-David Allen (1746 LunenburgCoVA - 1828 PersonCoNC)

+

|   |   |--m. Catherine LANIER—sons Thomas, Drury, William

+

|--2-Richard Allen (say 1718 NewKentCoVA - aft 1772 [Henrico/HanoverCoVA])

+

|   |--m. _?_ (no record of marriage); probably had son Drury who sold his HanoverCo land

+

|--2-William Allen (say 1721 NewKentCoVA - abt 1785 GranvilleCoNC)—married twice but left no sons

+

|--m2. aft 1734 Sarah [WINSTON??]

+

invisible writing

+ +

invisible writing

+

1-William Allen of Mecklenburg County, VA, born say 1725

+

(SourcesDr. Bob Allen; + additions by John B. Robb based on MILLER, + CARPENTER/McKEE,
and the USCensus)

+

1-William Allen (say 1725 VA - 1789 MecklenburgCoVA)

+

|--m1. say 1750, _?_ WRAY

+

|--2-Turner Allen (abt 1751 - [LunenburgCoVA?])  m. Sally NESBITT

+

|--2-Drury Allen (abt 1753 - 1823 AnsonCoNC)

+

|   |--m. abt 1772, Eleanor (“Nelly”) JARROTT

+

|   |--3-Julius Allen (say 1773 - 1845 AnsonCoNC)

+

|   |   |--m. _?_ SMITH

+

|   |   |--4-Henderson Allen (1814 AnsonsCoNC - 1897 GA)---m. 1842 Sarah C. HAMMOND

+

|   |--3-Thomas Allen (abt 1775 - 1858 FayetteCoTN

+

|   |   |--m1. _?_ SMITH

+

|   |   |--4-Jeremiah Allen (say 1808 AnsonCoNC - aft 1858)

+

|   |   |--4-Miles Allen (abt 1811 AnsonCoNC - abt 1843 [FayetteCoTN?])

+

|   |   |   |--m. abt 1832 Eliza Ann MORRIS

+

|   |   |   |--5-William Thomas Allen (1834 TN - abt 1859)

+

|   |   |   |   |--m. abt 1857 Rosanna HYATT

+

|   |   |   |   |--6-William Addison Allen (1858 AnsonCoNC - 1928 AnsonCoNC)

+

|   |   |   |   |   |--m. 1878 Louisa Charlotte TOMLINSON

+

|   |   |   |   |   |--7-William Addison Allen (1891 AnsonCoNC - 1955 AnsonCoNC)

+

|   |   |   |   |   |   |--8-Clinton Thomas Allen     + *** Lynda-09 ***    (no mutations)

+

+ |   |   |--4-Edmond Allen (abt 1816 NC -)

+

|   |   |   |--m. Rebecca SMITH b.AL)

+

|   |   |--m2. Nancy

+

|   |--3-David Allen (abt 1780 VA - abt 1850 MarshallCoMS)

+

|   |   |--m1. Mary PARKER; m2. Elizabeth TURNER

+

|   |   |-?4-Claiborne Allen (abt 1815 NC - aft 1850 [MarshallCoMS?])

+

|   |--3-Benjamin Allen (abt 1781 NC - 1878 SumterCoAL)

+

|   |   |--m. abt 1812 Huldah PARKER

+

|   |   |--4-Richmond Allen (1813 NC - 1889 SumterCoAL)---m. 1834 Sarah R. ALLEN

+

|   |   |--4-Wiley Allen (1815 NC - 1891 SumterCoAL)---m. Elvira Ann Tiras PONDS

+

|   |   |        (removed to AL about 1840)

+

|   |   |--4-William Allen (abt 1817 NC - aft 1850 [ChoctawCoAL?])---m. Frances L.

+

|   |   |- 4-Enoch Allen (abt 1824 NC - 1896 Sumter/ChoctawCoAL?])---m. Jane S.

+

|   |   |- 4-Hampton Allen (1829 NC - 1903)---m. Susan Elizabeth BOWERS

+

|   |   |- 4-Robert Allen (abt 1834 NC - 1903)---m. Susan Elizabeth BOWERS

+

|   |   |- 4-Alexander Allen, a twin (abt 1837 NC - 1903)---m. Susan Elizabeth BOWERS

+

|   |   |- 4-Jeremiah Allen, a twin (abt 1837 NC -)---m. Molly SIMPSON (NKI)

+

|   |   |--4-other children: Jules, Dock (NKI), Norriss (NKI), Mary Ann, Harriet, Thomas (NKI)

+

|   |--3-William Allen ( -)

+

|   |--3-John Allen (abt 1792- 1857 AnsonCoNC)

+

The mutation CDYb- first appears with John or one of his descendants.

+

|   |   |--m. abt 1812 Mary (“Polly”) ALLEN (a 1st cousin)

+

|   |   |--4-Drury Allen (abt 1825 NC - bef 1880 [AnsonCoNC?])

+

|   |   |   |--m. Catharine Rowena BAUCOM

+

|   |   |   |--5-William Henry Allen (abt 1851 AnsonCoNC - 1924 StanlyCoNC)

+

|   |   |   |   |--m. 1873 Ellen CURLEE

+

|   |   |   |   |--6-Robert Sidney Allen (1876 AnsonCoNC - 1953 LeeCoSC)

+

|   |   |   |   |   |--m. 1903 Lilla Emile CRUMP

+

|   |   |   |   |   |--7-Flake Shellum Allen (1918 AnsonCoNC - 1963 AnsonCoNC)

+

|   |   |   |   |   |   |--8-Robert Shepherd Allen     + *** Dr.Bob-04 *** +    (has the CDYb- mutation)

+

|--2-William Allen (abt 1757 -)

+

|--2-John Allen (abt 1759 -)  m. 1783, Nancy MORGAN

+

|--2-Darling Allen (abt 1760 - 1802 AnsonCoNC)

+

|   |--m. 1793, Judith NANCE (Sons: Robert Nance b.1792, Darling)

+

|--2-Young Allen (abt 1764 - [LunenburgCoVA?])  m. Sarah POOLE

+

|--2-Pleasant Allen (abt 1766 -)  m. 1787, Rebecca WATSON

+

|--2-Meredith Allen (say 1768 - 1829 HenryCoVA)  m. 1788, Nancy COOPER

+

|--2-Gray Allen (abt 1770 -)  m. 1791, Molly NANCE (Son: Gray)

+

|--m2. say 1775, Ann

+

|--2-Robert Allen (abt 1776 - [HenryCoVA?])  m. Celia MULLINS

+

|--2-Joseph Smith Allen (1779 MecklenburgCoVA - StCharlesCoMO)

+

|   |--m1. 1807, Sarah WADE (Son William)

+

|   |--m2. abt 1811, Rachel MAY (Sons: William M., Robert L., John Pines, Joseph J.)

+

|--2-Pines Allen (abt 1782 MecklenburgCoVA - MO)

+

|   |--m1. 1807 HenryCoVA, Charlotte BAILEY (Sons: Robert B., Joseph J., John Parks, Charles C.

+

|   |--m2. 1821 Nancy HUGHES (Sons: Pines Henderson b.1825 NC, William M. b.1832 MO

+

-

+ +

invisible writing

+

1-Reynold Allen of Granville, Johnston, Wake, and Iredell Cos NC, born say 1723

+

(SourcesMILLER + (mostly based on contributions by Sara C. Allen))

+ +

1-Reynold Allen (say 1723 - abt 1808 [IredellCoNC?])

+

|--m. say 1751, Mary

+

|--2-Young Allen (say 1756 NC - 1834 WakeCoNC)

+

|   |--m. say 1779 Phebe PULLEN

+

|   |--3-James Akin Allen (abt 1782 WakeCoNC - 1862 WakeCoNC)

+

|   |   |--m1. Mary WYNNE

+

|   |   |--4-William Gaston Allen (1810 NC - 1858)   m. 1839 Martha B. SHIPP, WakeCoNC

+

|   |   |--4-Henry Young Allen (1824 NC -)  1850 Susan Ann OVERBY, GranvilleCoNC

+

|   |   |--4-Solomon J. Allen (abt 1831 - abt 1901)—no known issue

+

|   |   |--m2. 1849 Priscilla JACKSON

+

|   |--3-John Allen (abt 1784 NC - [aft 1860 CarrollCoTN??])  m. 1808 Sarah HARRISON WakeCoNC

+

|   |   |--4-Moses Harrison Allen (1808-1886 NC)   m. 1827 Lucy Williams Rhodes, WakeCoNC

+

|   |   |--4-Henry Anderson Allen (1814 - aft 1880 [WakeCoNC?]).

+

|   |   |   |--m. 1837, Sarah Elizabeth ROGERS, WakeCoNC

+

|   |   |--4-Wyatt Marion Allen (1824 WakeCoNC - 1863 WakeCoNC)

+

|   |   |   |--m. 1855 Martha Ann BAILEY, WakeCoNC

+

|   |--3-Henry Allen (- abt 1841 WakeCoNC)  m. 1831 Aley Sharpe Allen

+

|   |   |--4-James H. Allen (1815 NC -)  m. 1838, Elizabeth C. SPAIN

+

|   |--3-Miles Allen (- bef 1839 GibsonCoTN)

+

|   |   |--m. 1810 Jinsey ("Jane") BLEDSOE

+

|   |   |--4-Reynold Allen (1791 NC - abt 1879 WakeCoNC)

+

|   |   |   |--m1. 1824 Betsy Ann HARRISON

+

|   |   |   |--5-William Anderson Allen (1825 NC - 1884)  m. 1849 Maria G. HICKS

+

|   |   |   |--m2. 1836 Jane H. CANNON

+

|   |   |   |--5-Charles Nickolas Allen (abt 1837 -)  m. 1862 Caroline V. JOHNS WakeCoNC

+

|   |   |   |--5-James Bascumb Allen (abt 1851 -)

+

|   |--3-Young W. Allen (abt 1796 NC - aft 1880 [CarrollCoTN?]   m. Ann ROGERS

+

|--2-Reynold Allen (say 1758 - abt 1812 IredellCoNC)

+

|   |--3-William Moore Allen (abt 1784 NC - abt 1851 IredellCoNC)

+

|   |   |--4-Hinchea Allen  m. 1835 Catherine LITTLE, LincolnCoNC

+

|   |   |--4-David Allen

+

|   |   |--4-Burrell Allen  m. 1832 Rhoda L. HOKE, LincolnCoNC

+

|   |   |--4-John Allen

+

|   |   |--4-William Lee Allen

+

|   |   |--4-Augustus Allen

+

|   |--3-David Allen (say 1786 -)

+

|   |--3-Darling Allen (1788 IredellCoNC - 1867 WilkesCoNC)  m. 1809 Susan WALLIS, WilkesCoNC

+

|   |   |--4-(1841 - 1843 IredellCoNC)—no known issue

+

|   |   |--4-William Wallace Allen (1827 IredellCoNC - abt 1863)  m. Agnes MOORE

+

|   |   |--4-John Allen

+

|   |   |--4-Benton Carlton Allen (abt 1828 IredellCoNC -)

+

|   |--3-John Haden Allen ()

+

-

+ + +

invisible writing

+

1-William Allen of New Kent, Goochland, and Albemarle Counties, Virginia, born say 1691

+

(SourcesBill A. Allen, Donald + Allen, WICKER. Project members Bill A., and Donald have researched their lines extensively and published +books on them which credibly link their TN ancestors to NC and VA respectively, and both books are full of anecdotal and background information which +brings their genealogy to life: see Bill-A_ALLEN, and Donald_ALLEN)

+ +

1-William Allen (say 1691 - 1752 AlbemarleCoVA)

+

|--m1. abt 1712 Hannah [WATSON?]

+

|--2-Samuel Allen (1713 -)

+

|   |--m. Martha ARCHER

+

|--m2. 1720 Mary (HUNT) MINGE

+

|--2-William Hunt Allen (1724 -1806 BuckinghamCoVA)  m. Elizabeth

+

+ |--2-John Allen (1726 - 1754 AlbemarleCoVA)

+

|   |--m. abt 1748 Betheniah Thomas NEVIL

+

|   |--3-Samuel Allen (1747 - 1800 AmherstCoVA)

+

|   |   |--m. 1771 Hannah JOPLING

+

|   |   |--4-George Allen (1773 [AmherstCoVA?] - 1835 WhiteCo, later DekalbCoTN)

+

|   |   |   |--m. 1798 Phebe WALKER, VA

+

|   |   |   |--5-John W[alker?] Allen (abt 1800 [AmherstCoVA?] - aft 1870 [DeKalbCoTN?])

+

|   |   |   |   |--m. abt 1823 Lucy W. FLOWERS

+

|   |   |   |   |--6-George Allen (abt 1832 TN -)

+

|   |   |   |   |--6-William Allen (abt 1834 TN -)

+

|   |   |   |   |--6-John M. Allen (1843 SmithCoTN - 1902 WiseCoTN)

+

|   |   |   |   |   |--m. 1876 Sarah Ann PELTON TarrantCoTX

+

|   |   |   |   |   |--7-O'Guster ("Gus") Allen (1877 TarrantCoTN - 1959 OklahomaCityOK)

+

|   |   |   |   |   |   |--m. 1895 Frances Rosine JACKSON JackCoTX

+

|   |   |   |   |   |   |--8-John Elmer Allen (1907 OklahomaTerr - 1989 OklahomaCityOK)

+

|   |   |   |   |   |   |   |--9-Donald Lee Allen     + *** Donald-05 ***

+

|   |   |   |--5-John W. had possible brothers Jesse, Samuel, William D, and George H

+

|   |   |--4-Jesse Allen (1777 [AmherstCoVA?] - 1857 DekalbCoTN)

+

|   |   |   |--m. Nancy WALKER va

+

|   |   |--4-Samuel Hunt Allen (abt 1785 AmherstCoVA? - abt 1831)

+

|   |   |   |--m. by 1810 Polly WALKER VA

+

|   |   |--4-John Allen (abt 1786 AmherstCoVA? - 1854)

+

|   |--3-Jesse Allen (abt 1748 - abt 1781 BuckinghamCoVA)

+

|--2-Valentine Allen (1730 - 1797 RockinghamCoNC)  m. Nancy Ann ARNOLD

+

The mutation DYS449+ first appears with Valentine or one of his descendants.

+

|   |--m. 1753 Nancy Ann ARNOLD CumberlandCoVA

+

|   |--3-William Hunt Allen (by 1755 - abt 1822 BedfordCoTN)

+

|   |   |--m. 1777 Agatha SCALES RockinghamCoNC

+

|   |   |--4-George Hunt Allen (1780 RockinghamCoNC - 1874 MarshallCoTN)

+

|   |   |   |--m. 1804 Mary OGILVIE DavidsonCoTN

+

|   |   |   |--5-Grant Iverson Allen (1805 WilliamsonCoTN - 1891 MarshallCoTN)

+

|   |   |   |   |--m. 1830 Nancy Elizabeth ALLEN WilliamsonCoTN

+

|   |   |   |   | +--6-Thomas Alexander Allen (1837 MarshallCoTN - 1917 MarshallCoTN)

+

|   |   |   |   |   |    +---m. 1867 Mary Fredonia Jane Ewing WilliamsonCoTN

+

|   |   |   |   |   | +--7-William Harris Allen (1876 MarshallCoTN - 1942 MarshallCoTN)

+

|   |   |   |   |   |   |    +---m. 1909 Ruth Hunter WilliamsonCoTN

+

|   |   |   |   |   |   | +--8-Thomas Hunter Allen (1912 MarshallCoTN - 1972 MarshallCoTN)

+

|   |   |   |   |   |   |   | +--9-William Alfred Allen    *** Bill-A-03 *** +    (has the DYS449+ mutation)

+

|   |--3-George Allen (abt 1774 - 1854 MarshallCoTN)  m. Annie Eliza PATRICK

+

|--2-George Hunt Allen (1734 - 1778)  m. Mary BALLARD

+

|--2-Phillip Allen (1740 - 1763)

+

-

+ +

invisible writing

+

1-James T Allen of Bastrop, and Wise County, TX, born 1832

+

(SourcesBill Bernard Allen, + Fletcher Thomason)

+

The mutation DYS449+ occurs either downstream or within a few generations upstream of James T. Allen.

+

1-James T. Allen (1832 TN - 1900 WiseCoTX)

+

|--m. say 1755, Sarah ("Sallie") C. MILLER

+

|--2-Henry Clay Allen (1857 EllisCoTX - 1946 PhoenixAZ)

+

|   |--m1. abt 1878 Edna Eliza TIMMONS

+

|   |--3-William Louis Allen (1879 WiseCoTX - 1966 MercerCoIL)

+

|   |   |--m. abt 1910 Maude SWARTOUT

+

|   |   |--4-James Elwood Allen (1927 MercerCoIL - 2008 RockIslandCoIL)

+

|   |   |   |--m. Violet Irene GEORGE

+

|   |   |   |--5-William Bernard Allen (1949 -)

+

|   |   |   |   | +--6-William Bernard Allen (1971)    *** Bill-B-01 *** +    (has the DYS449+ mutation)

+

|   |--m2. 1881 Molly Eugenia Brazziel (Children: Tom, James, Etta Anna)

+

|--2-Frank Marion Allen (1866 - )

+

|--m1. abt 1895 Loucille LYNCH

+

|   |--3-Tincy boy Kenneth Allen (1912 CottleCoTX - 1992 WiseCoTX)

+

|   |   |--m. Dovie Elizabeth BARTON

+

|   |   |--4-Lee Roy Allen (1942 TarrantCoTX -) *** Fletcher-12 ***

+

invisible writing

+

1-Charles Henry Francis Marion Allen of Georgia, and Cherokee County, Alabama, born abt 1826

+

(Sources: Winston Allen, and John B. Robb)

+

The mutation DYS447- occurs either downstream or within a few generations upstream of Charles Henry Allen.

+ +

1-Charles Henry Francis Marion Allen (abt 1825 GA - bef 1900 [AL?])

+

|--m. abt 1846 Francis [TOWERS?]

+

|--2-James T. Allen (abt 1849 AL -)

+

|--2-A[sa] Benjamin R. Allen (1861 AL - bef 1910 [LamarCoTX?])

+

|   |--m. abt 1884 Martha Lou CHISENHALL

+

|   |--3-James Lawrence Allen (1891 LamarCoTX -)

+

|   |   |--m. Essie Velma RATLIFF

+

|   |   |--4-Winston Allen     + *** Winston-14 *** + (has the DYS447- mutation) +

+ +

invisible writing

+

1-Joseph Allen of Elbert County, Georgia, born abt 1760

+

(SourcesLouDean Mayes)

+

The mutation DYS447- occurs upstream of the patriarch, Joseph, of this sublineage.

+

The mutations DYS570- & 464- occur probably downstream of Joseph.

+ +

1-Joseph Allen (say 1760 GA - 1833 ElbertCoGA)

+

|--m. bef 1790 Agnes PATTERSON

+

|--2-James Allen (1808 ElbertCoGA - 1880 HartCoGA)

+

|   |--m. 1837 Mary A. HAYNES, ElbertCoGA

+

|   |--3-James Monroe Allen (1846 ElbertCoGA - 1921 AndersonCoSC)

+

|   |   |--m. 1867 Mary Elizabeth Frances SANDERS, HartCoGA

+

|   |   |--4-James Henry Edward Allen (1869 HartCoGA - 1943 HartCoGA)

+

|   |   |--m. 1910 Mary Lula OWENS, HartCoGA

+

|   |   |   |--5-Ira Wilson Allen 1914 HartCoGA - 1976 HartCoGA)

+

|   |   |   |   |--6-Ira Wilson Allen, Jr     + *** LouDean-07 ***     + (has the DYS447- mutation, also 570- and 464-)

+

invisible writing

+

1-Russell Allen of Franklin County, AL, born abt 1828 in GA

+

(SourcesOwen Allen)

+

The mutation CDYb- occurs either downstream or within a few generations upstream of Russell.

+

1-Russell Allen (abt 1828 GA - bef 1880 [FranklinCoAL?])

+

|--m. abt 1849 Millie

+

|--2-Peter Allen (abt 1857 - bef 1920)

+

|   |--m. abt 1880, Savannah

+

|   |--3-Morrison Allen (1881 AL - 1976)

+

|   |   |--m. Lula Elizabeth WHITE

+

|   |   |--4-Owen Tillman Allen (1909 ColbertCoAL - 1958)

+

|   |   |   |--5-Owen Allen    *** Owen-10 *** +    (has the CDYb- mutation)

+

-

+ +

invisible writing

+

1-James Allen of Gonzales County, TX, born 1814 in NC

+

(SourcesCamilla Mitchell)

+

1-James Allen (1814 NC - 1868 GonzalesCoTX)

+

|--m1. 1835 Camilla Catherine Tores LILLY [MontgomeryCoNC?])

+

|--2-James Robberson Allen (1841 - 1842)

+

|--2-Robbert Alexander Allen (1843 - 1863 MadisonParishLA)—no known issue

+

|--m2. 1852 Mary Ann Key, MaconCoAL

+

|--2-Henry Clarence Allen (1852 AL -)

+

|--2-Augustus Key Allen (1858 -)

+

|--2-Marion Jackson Allen (1861 -)

+

|--2-Thomas Jefferson Allen (1865 GonzalesCoTX - 1926 SanAntonioTX)

+

|   |--m. 1898 Lillie Louisa BEACH, ElPasoTX

+

|   |--3-Marion Thomas Allen (1905 DimmitCoTX - 1986)

+

|   |   |--4-William Douglas Allen    *** Camilla-06 *** +    (no mutations—is the project RPH)

+

-

+ +

invisible writing

+

1-Robert N. Nesbitt of Hunt County, TX, born 1831 in TN

+

(SourcesGeoff Nesbitt)

+

1-Robert N. Nesbitt (1831 TN - aft 1910)

+

|--m. abt 1854 Martha E. SHEPARD

+

|--2-William Allen Nesbitt (1860 TX - 1937)

+

|   |--m. 1862 Caddie VANNERSON, HopkinsCoTX

+

|   |--3-Robert Allen Nesbitt (1889 TX - 1934)

+

|   |   |--m. Robbie RIKE

+

|   |   |--4-Robert Allen Nesbitt, Jr. (1914 AdaOK - 1988 GalvestonTX)

+

|   |   |   |--5-Geoffrey Robert Nesbitt +    *** Geoff-11 *** +    (no mutations)

+

-

+ +

invisible writing

+

1-Frank Harris of Marion County, Alabama, born say 1850

+

(SourcesSandra Hunt)

+

The mutation CDYB+ occurs either downstream or within a few generations upstream of Frank Harris.

+

1-Frank Harris (say 1850 AL - 1900 MarionCoAL)

+

|--m. abt 1875 Lucy M. [PATE?]

+

|--2-William Earl Harris (1883 AL - 1972)

+

|   |--m1. abt 1903 Neety PALMER

+

|   |--3-Willie Dalton Harris (abt 1904 MarionCoAL - 1992)

+

|   |   |--4-Hugh E. Harris +    *** Sandra-13 *** +    (has the CDYb+ mutation)

+

|   |--m2. abt 1909 Artie TICE

+ +

invisible writing

+

1-Gideon A. Allen of Twiggs Co GA, and Bossier Parish Louisiana, born 1804 in NC

+

(SourcesCary Allen, whose web page has more on his Allen line)

+ +

The mutation 464- occurs either downstream or within a few generations upstream of Gideon.

+

1-Gideon A. Allen (1804 NC - 1875 BossierParLA)

+

|--m. 1828 Mary Ann HORN, TwiggsCoGA

+

|--2-John G. Allen (1835 TwiggsCoGA - 1906 BossierParLA)

+

|   |--m. 1854 Emily L. SPURLIN, BossierParLA

+

|   |--3-John Claud Allen (1869 BossierParLA - 1927 BossierParLA)

+

|   |   |--m. 1896 Mary Louwellen ALLEN

+

|   |   |--4-Claud Tarkington Allen (1900 BossierParLA - 1990 GreenvilleCoSC)

+

|   |   |   |--m. 1921 Iona Olive ARNOLD, CaddoParLA

+

|   |   |   |--5-John Clyde Allen (1931 BossierParLA - 2003)

+

|   |   |   |   |--6-Cary Neal Allen +    *** Cary-02 *** +    (has a 464- mutation, to 11-13-14-15)

+ +

+
+
+ + +
+

-

+ + +
+ +
Navigating from here
+

+The menu buttons at top right takes you to other pages on this site, while the nav panel above targets other points on +thispage, or brings up other resources. If you find yourself lost, the browser BACK button will take you back to where you +were (some people also have a convenient BACK button on their mouse, right under their thumb). Or hitting the Home key of +your keyboard will take you back to the top of this page where you are now.

+ + +
+
+ + + + +
+ + + + + + + + + + + diff --git a/example/article.xml b/example/article.xml new file mode 100644 index 0000000..ddb825e --- /dev/null +++ b/example/article.xml @@ -0,0 +1,9 @@ +
+Something here +12345 +2001-04-01 +XML.com +
Foo
+Here's some leading text +And here is the rest... +
diff --git a/example/article_bad.xml b/example/article_bad.xml new file mode 100644 index 0000000..469ac86 --- /dev/null +++ b/example/article_bad.xml @@ -0,0 +1,8 @@ +
+Something here +12345 +XML.com +
Foo
+Here's some leading text +And here is the rest... +
diff --git a/example/article_external_bad.xml b/example/article_external_bad.xml new file mode 100644 index 0000000..fc8715e --- /dev/null +++ b/example/article_external_bad.xml @@ -0,0 +1,10 @@ + +
+Something here +12345 +2001-04-01 +XML.com +
Foo
+Here's some leading text +And here is the rest... +
diff --git a/example/article_internal.xml b/example/article_internal.xml new file mode 100644 index 0000000..72e3354 --- /dev/null +++ b/example/article_internal.xml @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + + +]> +
+Something here +12345 +2001-04-01 +XML.com +
Foo
+Here's some leading text +And here is the rest... +
diff --git a/example/article_internal_bad.xml b/example/article_internal_bad.xml new file mode 100644 index 0000000..cde96fd --- /dev/null +++ b/example/article_internal_bad.xml @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + + +]> +
+Something here +12345 +2001-04-01 +XML.com +
Foo
+Here's some leading text +And here is the rest... +
diff --git a/example/bad.dtd b/example/bad.dtd new file mode 100644 index 0000000..dfa3219 --- /dev/null +++ b/example/bad.dtd @@ -0,0 +1 @@ + diff --git a/example/bad.xml b/example/bad.xml new file mode 100644 index 0000000..ca77223 --- /dev/null +++ b/example/bad.xml @@ -0,0 +1,3 @@ + + + diff --git a/example/catalog.xml b/example/catalog.xml new file mode 100644 index 0000000..4268ced --- /dev/null +++ b/example/catalog.xml @@ -0,0 +1,5 @@ + + + + + diff --git a/example/cb_example.pl b/example/cb_example.pl new file mode 100644 index 0000000..a11d46b --- /dev/null +++ b/example/cb_example.pl @@ -0,0 +1,59 @@ +use strict; +use warnings; + +use XML::LibXML; +use IO::File; + +# first instanciate the parser +my $parser = XML::LibXML->new(); + +# initialize the callbacks +$parser->match_callback( \&match_uri ); +$parser->read_callback( \&read_uri ); +$parser->open_callback( \&open_uri ); +$parser->close_callback( \&close_uri ); + +# include XIncludes on the fly +$parser->expand_xinclude( 1 ); + +# parse the file "text.xml" in the current directory +my $dom = $parser->parse_file("test.xml"); + +print $dom->toString() , "\n"; + +# the callbacks follow +# these callbacks are used for both the original parse AND the XInclude +sub match_uri { + my $uri = shift; + return $uri !~ /:\/\// ? 1 : 0; # we handle only files +} + +sub open_uri { + my $uri = shift; + + my $handler = new IO::File; + if ( not $handler->open( "<$uri" ) ){ + $handler = 0; + } + + return $handler; +} + +sub read_uri { + my $handler = shift; + my $length = shift; + my $buffer = undef; + if ( $handler ) { + $handler->read( $buffer, $length ); + } + return $buffer; +} + +sub close_uri { + my $handler = shift; + if ( $handler ) { + $handler->close(); + } + return 1; +} + diff --git a/example/complex/complex.dtd b/example/complex/complex.dtd new file mode 100644 index 0000000..b9be794 --- /dev/null +++ b/example/complex/complex.dtd @@ -0,0 +1,2 @@ + +%f; diff --git a/example/complex/complex.xml b/example/complex/complex.xml new file mode 100644 index 0000000..1ce5a3e --- /dev/null +++ b/example/complex/complex.xml @@ -0,0 +1,6 @@ + +%e; +]> + + diff --git a/example/complex/complex2.xml b/example/complex/complex2.xml new file mode 100644 index 0000000..1fdcb5b --- /dev/null +++ b/example/complex/complex2.xml @@ -0,0 +1,3 @@ + +&foo; + diff --git a/example/complex/dtd/f.dtd b/example/complex/dtd/f.dtd new file mode 100644 index 0000000..a0c01fd --- /dev/null +++ b/example/complex/dtd/f.dtd @@ -0,0 +1,3 @@ + + +%g; diff --git a/example/complex/dtd/g.dtd b/example/complex/dtd/g.dtd new file mode 100644 index 0000000..f811fce --- /dev/null +++ b/example/complex/dtd/g.dtd @@ -0,0 +1,2 @@ + + diff --git a/example/create-sample-html-document.pl b/example/create-sample-html-document.pl new file mode 100644 index 0000000..312328d --- /dev/null +++ b/example/create-sample-html-document.pl @@ -0,0 +1,167 @@ +#!/usr/bin/perl + +=head1 ABOUT + +This is a sample program to generate an HTML document using XML::LibXML's +DOM routines. It was written to resolve +L . Thanks to Dan Jacobson. + +=cut + +use strict; +use warnings; + +use XML::LibXML; + +{ + my $doc = XML::LibXML->createDocument; + + # A small Domain-Specific-Language for generating DOM: + my $_text = sub { + my ($content) = @_; + + return $doc->createTextNode($content); + }; + + # Short for element. + my $_el = sub { + my $name = shift; + my $param = shift; + my $attrs = {}; + if (ref($param) eq 'HASH') + { + $attrs = $param; + $param = shift; + } + my $childs = $param; + + my $elem = $doc->createElementNS("", $name); + + while (my ($k, $v) = each %$attrs) + { + $elem->setAttribute($k, $v); + } + + foreach my $child (@$childs) + { + $elem->appendChild($child); + } + + return $elem; + }; + + my $html = $_el->( + 'html', + [ + $_el->( + 'head', + [ + $_el->( + 'title', + [ + $_text->("Sample HTML document as generated by XML::LibXML"), + ], + ), + $_el->( + 'meta', + { 'http-equiv' => 'Content-Type', + 'content' => 'text/html; charset=utf-8' + }, + [], + ), + ], + ), + $_el->( + 'body', + [ + $_el->( + 'p', + [ + $_text->("Introducing a link - "), + $_el->( + 'a', + { 'href' => 'http://www.wikipedia.org/', }, + [ + $_text->("Link to Wikipedia"), + ], + ), + $_text->(". We hope you enjoyed it."), + ], + ), + $_el->( + 'p', + [ + $_el->( + 'img', + { 'src' => 'http://example.com/non-exist.png', + 'alt' => 'non-existing image', + }, + [], + ), + ], + ), + $_el->( + 'ol', + [ + $_el->( + 'li', + [ + $_el->( + 'p', + [ + $_text->("First item."), + ], + ), + ], + ), + $_el->( + 'li', + [ + $_el->( + 'p', + [ + $_text->("Second item."), + ], + ), + ], + ), + ], + ), + ], + ), + ], + ); + $doc->setDocumentElement( $html ); + + print $doc->toStringHTML(); +} + +=head1 COPYRIGHT & LICENSE + +Copyright 2016 by Shlomi Fish + +This program is distributed under the MIT (X11) License: +L + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +=cut diff --git a/example/dromeds.xml b/example/dromeds.xml new file mode 100644 index 0000000..1e4e70e --- /dev/null +++ b/example/dromeds.xml @@ -0,0 +1,15 @@ + + + + 1 or 2 + Cranky + + + 1 (sort of) + Aloof + + + (see Llama) + Friendly + + diff --git a/example/dtd.xml b/example/dtd.xml new file mode 100644 index 0000000..ab9302e --- /dev/null +++ b/example/dtd.xml @@ -0,0 +1,5 @@ + + +]> +This is a valid document &foo; ! diff --git a/example/enc2_latin2.html b/example/enc2_latin2.html new file mode 100644 index 0000000..fdce135 --- /dev/null +++ b/example/enc2_latin2.html @@ -0,0 +1,5 @@ + + +

ì¹èø

+ + diff --git a/example/enc_latin2.html b/example/enc_latin2.html new file mode 100644 index 0000000..0c0c284 --- /dev/null +++ b/example/enc_latin2.html @@ -0,0 +1,8 @@ + + + + + +

ì¹èø

+ + diff --git a/example/ext_ent.dtd b/example/ext_ent.dtd new file mode 100644 index 0000000..d96dcac --- /dev/null +++ b/example/ext_ent.dtd @@ -0,0 +1,2 @@ + + diff --git a/example/ns.xml b/example/ns.xml new file mode 100644 index 0000000..50ba609 --- /dev/null +++ b/example/ns.xml @@ -0,0 +1,4 @@ + + Camelid + 4 + diff --git a/example/test.dtd b/example/test.dtd new file mode 100644 index 0000000..8f7a1b1 --- /dev/null +++ b/example/test.dtd @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff --git a/example/test.html b/example/test.html new file mode 100644 index 0000000..294d2b7 --- /dev/null +++ b/example/test.html @@ -0,0 +1,182 @@ + + +XML::LibXML::Document - DOM Document Class + + + + + + + + + + +
+

+

NAME

+

+XML::LibXML::Document - DOM Document Class + +

+


+

SYNOPSIS

+

+

  use XML::LibXML::Document;
+
+

+

  $dom = XML::LibXML::Document->new( $version, $encoding );
+  $dom = XML::LibXML::Document->createDocument( $version, $encoding );
+  $strEncoding = $doc->getEncoding();
+  $strVersion = $doc->getVersion();
+  $docstring = $dom->toString([$format]);
+  $bool = $dom->is_valid();
+  $root = $dom->getDocumentElement($name, $namespace );
+  $dom->setDocumentElement( $root );
+  $element = $dom->createElement( $nodename );
+  $element = $dom->createElementNS( $namespaceURI, $qname );
+  $text = $dom->createTextNode( $content_text );
+  $comment = $dom->createComment( $comment_text );
+  $attrnode = $doc->createAttribute($name [,$value]);
+  $attrnode = $doc->createAttributeNS( namespaceURI, $name [,$value] );
+  $cdata = $dom->create( $cdata_content );
+  $document->importNode( $node [, $move] );
+
+

+


+

DESCRIPTION

+

+The Document Class is the result of a parsing process. But sometimes it is +necessary to create a Document from scratch. The DOM Document Class +provides functions that are conform to the DOM Core naming style. It +inherits all functions from XML::LibXML::Node as specified in DOM Level2. This enables to access the nodes beside the +root element on document level - a DTD for example. The support for these nodes is limited at the moment, so I +would recommend, not to use node functions on documents. It is suggested that one should always create a node not bound to any +document. There is no need of really including the node to the document, +but once the node is bound to a document, it is quite safe that all strings +have the correct encoding. If an unbound textnode with an iso encoded +string is created (e.g. with $CLASS->new()), the toString function may not return the expected result. This seems like a limitation +as long UTF8 encoding is assured. If iso encoded strings come into play it +is much safer to use the node creation functions of XML::LibXML::Document. + +

+


+

Methods

+
+
new
+

+alias for createDocument() + +

createDocument
+

+The constructor for the document class. As Parameter it takes the version +string and (optionally) the ecoding string. Simply calling createDocument will create the document: + +

+

+  <?xml version="your version" encoding="your encoding"?>
+
+

+Both parameter are optional. The default value for $version is 1.0 , of course. If the $encoding parameter is not set, the encoding will be left unset, which means UTF8 is +implied (and set). The call of createDocument without any parameter will result the following code: + +

+

+  <?xml version="1.0"?>
+
+
getEncoding
+

+returns the encoding string of the document + +

getVersion
+

+returns the version string of the document + +

toString
+

+toString is a deparsing function, so the DOM Tree can be translated into a string, +ready for output. The optional $format parameter sets the indenting of the output. This parameter is expected to +be an integer value, that specifies the number of linebreaks for each node. For more +information about the formatted output check the documentation of xmlDocDumpFormatMemory in libxml2/tree.h . + +

is_valid
+

+Returns either TRUE or FALSE depending on the DOM Tree is a valid Document +or not. + +

getDocumentElement
+

+Returns the root element of the Document. A document can have just one root +element to contain the documents data. + +

setDocumentElement
+

+This function enables you to set the root element for a document. The +function supports the import of a node from a different document tree. + +

createElement
+

+This function creates a new Element Node bound to the DOM with the name $nodename . + +

createElementNS
+

+This function creates a new Element Node bound to the DOM with the name $nodename and placed in the given namespace. + +

createTextNode
+

+As an equivalent of createElement , but it creates a Text Node bound to the DOM. + +

createComment
+

+As an equivalent of createElement , but it creates a Comment Node bound to the DOM. + +

createAttribute
+

+Creates a new Attribute node. This function is rather useless at the +moment, since there is no setAttributeNode function defined in XML::LibXML::Element , yet. + +

createAttributeNS
+

+Creates an Attribute bound to a namespace. + +

createCDATASection
+

+Similar to createTextNode and createComment, this function creates a +CDataSection bound to the current DOM. + +

importNode
+

+If a node is not part of a document, it can be imported to another +document. As specified in DOM Level 2 Specification the Node will not be +altered or removed from its original document by default. ( $node-cloneNode(1)> will get called implicitly). Sometimes it is +necessary to move a node between documents. In such a case the node will not be copied, but +removed from the original document. + +

+

+


+

SEE ALSO

+

+XML::LibXML, XML::LibXML::Element, XML::LibXML::Text, XML::LibXML::Attr, +XML::LibXML::Comment + +

+


+

VERSION

+

+0.90_a + + + + diff --git a/example/test.xhtml b/example/test.xhtml new file mode 100644 index 0000000..8088ae3 --- /dev/null +++ b/example/test.xhtml @@ -0,0 +1,109 @@ + +XML::LibXML::Document - DOM Document Class


NAME

+XML::LibXML::Document - DOM Document Class + +


SYNOPSIS

  use XML::LibXML::Document;
+

  $dom = XML::LibXML::Document->new( $version, $encoding );
+  $dom = XML::LibXML::Document->createDocument( $version, $encoding );
+  $strEncoding = $doc->getEncoding();
+  $strVersion = $doc->getVersion();
+  $docstring = $dom->toString([$format]);
+  $bool = $dom->is_valid();
+  $root = $dom->getDocumentElement($name, $namespace );
+  $dom->setDocumentElement( $root );
+  $element = $dom->createElement( $nodename );
+  $element = $dom->createElementNS( $namespaceURI, $qname );
+  $text = $dom->createTextNode( $content_text );
+  $comment = $dom->createComment( $comment_text );
+  $attrnode = $doc->createAttribute($name [,$value]);
+  $attrnode = $doc->createAttributeNS( namespaceURI, $name [,$value] );
+  $cdata = $dom->create( $cdata_content );
+  $document->importNode( $node [, $move] );
+


DESCRIPTION

+The Document Class is the result of a parsing process. But sometimes it is +necessary to create a Document from scratch. The DOM Document Class +provides functions that are conform to the DOM Core naming style. It +inherits all functions from XML::LibXML::Node as specified in DOM Level2. This enables to access the nodes beside the +root element on document level - a DTD for example. The support for these nodes is limited at the moment, so I +would recommend, not to use node functions on documents. It is suggested that one should always create a node not bound to any +document. There is no need of really including the node to the document, +but once the node is bound to a document, it is quite safe that all strings +have the correct encoding. If an unbound textnode with an iso encoded +string is created (e.g. with $CLASS->new()), the toString function may not return the expected result. This seems like a limitation +as long UTF8 encoding is assured. If iso encoded strings come into play it +is much safer to use the node creation functions of XML::LibXML::Document. + +


Methods

new

+alias for createDocument()

createDocument

+The constructor for the document class. As Parameter it takes the version +string and (optionally) the ecoding string. Simply calling createDocument will create the document: + +

+  <?xml version="your version" encoding="your encoding"?>
+

+Both parameter are optional. The default value for $version is 1.0 , of course. If the $encoding parameter is not set, the encoding will be left unset, which means UTF8 is +implied (and set). The call of createDocument without any parameter will result the following code: + +

+  <?xml version="1.0"?>
+
getEncoding

+returns the encoding string of the document + +

getVersion

+returns the version string of the document + +

toString

toString is a deparsing function, so the DOM Tree can be translated into a string, +ready for output. The optional $format parameter sets the indenting of the output. This parameter is expected to +be an integer value, that specifies the number of linebreaks for each node. For more +information about the formatted output check the documentation of xmlDocDumpFormatMemory in libxml2/tree.h . + +

is_valid

+Returns either TRUE or FALSE depending on the DOM Tree is a valid Document +or not. + +

getDocumentElement

+Returns the root element of the Document. A document can have just one root +element to contain the documents data. + +

setDocumentElement

+This function enables you to set the root element for a document. The +function supports the import of a node from a different document tree. + +

createElement

+This function creates a new Element Node bound to the DOM with the name $nodename . + +

createElementNS

+This function creates a new Element Node bound to the DOM with the name $nodename and placed in the given namespace. + +

createTextNode

+As an equivalent of createElement , but it creates a Text Node bound to the DOM. + +

createComment

+As an equivalent of createElement , but it creates a Comment Node bound to the DOM. + +

createAttribute

+Creates a new Attribute node. This function is rather useless at the +moment, since there is no setAttributeNode function defined in XML::LibXML::Element , yet. + +

createAttributeNS

+Creates an Attribute bound to a namespace. + +

createCDATASection

+Similar to createTextNode and createComment, this function creates a +CDataSection bound to the current DOM. + +

importNode

+If a node is not part of a document, it can be imported to another +document. As specified in DOM Level 2 Specification the Node will not be +altered or removed from its original document by default. ( $node-cloneNode(1)> will get called implicitly). Sometimes it is +necessary to move a node between documents. In such a case the node will not be copied, but +removed from the original document. + +


SEE ALSO

+XML::LibXML, XML::LibXML::Element, XML::LibXML::Text, XML::LibXML::Attr, +XML::LibXML::Comment + +


VERSION

+0.90_a + +

diff --git a/example/test.xml b/example/test.xml new file mode 100644 index 0000000..788aa4e --- /dev/null +++ b/example/test.xml @@ -0,0 +1,6 @@ + + +test + + + diff --git a/example/test2.xml b/example/test2.xml new file mode 100644 index 0000000..13ca0df --- /dev/null +++ b/example/test2.xml @@ -0,0 +1 @@ +.. \ No newline at end of file diff --git a/example/test3.xml b/example/test3.xml new file mode 100644 index 0000000..6ddbb01 --- /dev/null +++ b/example/test3.xml @@ -0,0 +1,3 @@ + + + diff --git a/example/test4.xml b/example/test4.xml new file mode 100644 index 0000000..1e94b10 --- /dev/null +++ b/example/test4.xml @@ -0,0 +1,6 @@ + + +test 4 + + + diff --git a/example/thedieline.rss b/example/thedieline.rss new file mode 100644 index 0000000..ea1cd14 --- /dev/null +++ b/example/thedieline.rss @@ -0,0 +1,29 @@ + + + TheDieline.com: Package Design + + + + tag:typepad.com,2003:weblog-611821 + 2011-06-15T11:03:00-07:00 + The World's #1 Package Design Website + TypePad + + Ginja d' Óbidos + + + tag:typepad.com,2003:post-6a00d8345250f069e20133f1a9824b970b + 2010-06-25T10:00:00-07:00 + 2010-06-24T22:16:07-07:00 + + The Dieline + + + + + +<font color="#000000" ><p><Img align="left" border="0" height="1" width="1" style="border:0;float:left;margin:0;" vspace="0" hspace="0" src="http://feeds.feedblitz.com/~/i/15124587/1ir2jk/thedieline"><div xmlns="http://www.w3.org/1999/xhtml"><p><a href="http://feeds.feedblitz.com/~/t/0/1ir2jk/thedieline/~http://www.thedieline.com/.a/6a00d8345250f069e20133f1a97caa970b-popup" onclick="window.open( this.href, &#39;_blank&#39;, &#39;width=640,height=480,scrollbars=no,resizable=no,toolbar=no,directories=no,location=no,menubar=no,status=no,left=0,top=0&#39; ); return false" style="display: inline;"><img alt="1" class="asset asset-image at-xid-6a00d8345250f069e20133f1a97caa970b " src="http://www.thedieline.com/.a/6a00d8345250f069e20133f1a97caa970b-550wi" style="width: 540px; " ></a> <br>Lisbon based <a href="http://feeds.feedblitz.com/~/t/0/1ir2jk/thedieline/~http://"></a><a href="http://feeds.feedblitz.com/~/t/0/1ir2jk/thedieline/~http://www.ntgj.org/" target="_blank">NT.GJ</a> designed this cherry liqueur concept which features actual cherries within...</div><p><a href="http://feeds.feedblitz.com/~/15124587/1ir2jk/thedieline">CLICK HERE to read the rest of the post...</a> <!-- _!fbztxtlnk!_ http://feeds.feedblitz.com/~/15124587/1ir2jk/thedieline -->&raquo;</p></font><p><div style="clear:both;"><em>(Want to see more packaging? Visit <a href="http://www.TheDieline.com">TheDieline.com</a>!)</em><p></div></p> +<div style="clear:both;"><a title="Tweet with Bit.ly" href="http://bit.ly/?v=3&ref=feedblitz&u=http%3a%2f%2fwww.thedieline.com%2fblog%2f2010%2f06%2fginja-d-%25c3%25b3bidos.html&t=Ginja+d%26%2339%3b+Óbidos"><img height="16" border="0" src="http://assets.feedblitz.com/images/icons/bitly.jpg"></a> <a title="Add to Delicious" href="http://delicious.com/post?url=http%3a%2f%2fwww.thedieline.com%2fblog%2f2010%2f06%2fginja-d-%25c3%25b3bidos.html&title=Ginja+d%26%2339%3b+Óbidos"><img height="16" border="0" src="http://assets.feedblitz.com/images/icons/delicious.jpg"></a> <a title="Digg This" href="http://digg.com/submit?phase=2&url=http%3a%2f%2fwww.thedieline.com%2fblog%2f2010%2f06%2fginja-d-%25c3%25b3bidos.html&title=Ginja+d%26%2339%3b+Óbidos"><img height="16" border="0" src="http://assets.feedblitz.com/images/icons/digg.jpg"></a> <a title="Add to FaceBook" href="http://facebook.com/share.php?u=http%3a%2f%2fwww.thedieline.com%2fblog%2f2010%2f06%2fginja-d-%25c3%25b3bidos.html&t=Ginja+d%26%2339%3b+Óbidos"><img height="16" border="0" src="http://assets.feedblitz.com/images/icons/facebook.jpg"></a> <a title="Add to Google Bookmarks" href="http://google.com/bookmarks/mark?op=edit&bkmk=http%3a%2f%2fwww.thedieline.com%2fblog%2f2010%2f06%2fginja-d-%25c3%25b3bidos.html&title=Ginja+d%26%2339%3b+Óbidos"><img height="16" border="0" src="http://assets.feedblitz.com/images/icons/google.jpg"></a> <a title="Stumble This" href="http://stumbleupon.com/submit?url=http%3a%2f%2fwww.thedieline.com%2fblog%2f2010%2f06%2fginja-d-%25c3%25b3bidos.html&title=Ginja+d%26%2339%3b+Óbidos"><img height="16" border="0" src="http://assets.feedblitz.com/images/icons/stumble.jpg"></a> <a title="Tweet This" href="http://twitter.com/home?status=Ginja+d%26%2339%3b+Óbidos+http%3a%2f%2fwww.thedieline.com%2fblog%2f2010%2f06%2fginja-d-%25c3%25b3bidos.html"><img height="16" border="0" src="http://assets.feedblitz.com/images/icons/twitter.jpg"></a> <a title="Subscribe by RSS" href="http://feeds.feedblitz.com/thedieline"><img height="16" border="0" src="http://assets.feedblitz.com/images/rss.gif"></a> <a title="View Comments" href="http://www.thedieline.com/blog/2010/06/ginja-d-%C3%B3bidos.html"><img height=16 border=0 src="http://assets.feedblitz.com/images/comment.png"></a> </div> + + + diff --git a/example/utf-16-1.html b/example/utf-16-1.html new file mode 100644 index 0000000..a3216c4 Binary files /dev/null and b/example/utf-16-1.html differ diff --git a/example/utf-16-2.html b/example/utf-16-2.html new file mode 100644 index 0000000..996ba3a Binary files /dev/null and b/example/utf-16-2.html differ diff --git a/example/utf-16-2.xml b/example/utf-16-2.xml new file mode 100644 index 0000000..e72de46 Binary files /dev/null and b/example/utf-16-2.xml differ diff --git a/example/xmllibxmldocs.pl b/example/xmllibxmldocs.pl new file mode 100644 index 0000000..0b10fde --- /dev/null +++ b/example/xmllibxmldocs.pl @@ -0,0 +1,551 @@ +#!/usr/bin/perl -w + +use strict; + +use XML::LibXML; +use IO::File; + +# ------------------------------------------------------------------------- # +# (c) 2003 christian p. glahn +# ------------------------------------------------------------------------- # + +# ------------------------------------------------------------------------- # +# This is an example how to use the DOM interface of XML::LibXML The +# script reads a XML File with a module specification. If the module +# contains several classes, the script fetches them and stores the +# data into different POD Files. +# +# Note this is just an example, to demonstrate how XML::LibXML works. +# The code works for the XML::LibXML documentation, but may not work +# for any other docbook file. +# +# If you are interested what the results are, check the README and the POD +# files shipped with XML::LibXML. +# ------------------------------------------------------------------------- # + +# ------------------------------------------------------------------------- # +# SYNOPSIS: +# xmllibxmldocs.pl $dokbook_file $targetdir +# +my $srcfile = shift @ARGV; +my $targetdir = shift @ARGV; + +unless ( $targetdir =~ /\/$/ ) { + $targetdir .= "/"; +} + +# ------------------------------------------------------------------------- # +# +# ------------------------------------------------------------------------- # +# init the parser +my $parser = XML::LibXML->new(); +$parser->load_ext_dtd(0); +$parser->keep_blanks(0); +# ------------------------------------------------------------------------- # +# +# ------------------------------------------------------------------------- # +# load the document into memory. +my $doc = $parser->parse_file( $srcfile ); +# ------------------------------------------------------------------------- # +# +# ------------------------------------------------------------------------- # +# good implementations would use XSLT to convert a docbook to any other +# text format. Since the module does not presume libxslt installed, we +# have to do the dirty job. +my $ch = ChapterHandler->new($targetdir); + +# ------------------------------------------------------------------------- # +# init the common parts in all pods +my ( $bookinfo ) = $doc->findnodes( "//bookinfo" ); +$ch->set_general_info( $bookinfo ); +# ------------------------------------------------------------------------- # + +# ------------------------------------------------------------------------- # +# then process each chapter of the XML::LibXML book +my @chapters = $doc->findnodes( "//chapter" ); +foreach my $chap ( @chapters ) { + $ch->handle( $chap ); +} +# ------------------------------------------------------------------------- # +# ------------------------------------------------------------------------- # + +# ------------------------------------------------------------------------- # +# the class to process our dokbook file +# ------------------------------------------------------------------------- # +package ChapterHandler; +use XML::LibXML; + +# ------------------------------------------------------------------------- # +# the constructor +# ------------------------------------------------------------------------- # +sub new{ + my $class = shift; + my $dir = shift; + my $self = bless {directory => $dir}, $class; + + return $self; +} +# ------------------------------------------------------------------------- # + +# ------------------------------------------------------------------------- # +# set_general_info +# ------------------------------------------------------------------------- # +# processes the bookinfo tag of XML::LibXML to extract common information such +# as version or copyright information +sub set_general_info { + my $self = shift; + my $infonode = shift; + return unless defined $infonode; + + my $infostr = "=head1 AUTHORS\n\n"; + my @authors = $infonode->findnodes( "authorgroup/author" ); + foreach my $author ( @authors ) { + my ( $node_fn ) = $author->getChildrenByTagName( "firstname" ); + my ( $node_sn ) = $author->getChildrenByTagName( "surname" ); + if ( defined $node_fn ) { + $infostr .= $node_fn->string_value(); + } + if ( defined $node_sn ) { + $infostr .= " ". $node_sn->string_value(); + } + if ( defined $author->nextSibling() ) { + $infostr .= ", \n"; + } + else { + $infostr .= "\n\n"; + } + } + + my ( $version ) = $infonode->findnodes( "edition" ); + if ( defined $version ) { + $infostr .= "\n=head1 VERSION\n\n" . $version->string_value() . "\n\n"; + } + + my @copyright = $infonode->findnodes( "copyright" ); + if ( @copyright ) { + $infostr .= "=head1 COPYRIGHT\n\n"; + foreach my $copyright (@copyright) { + my $node_y = $copyright->getChildrenByTagName( "year" ); + my $node_h = $copyright->getChildrenByTagName( "holder" ); + if ( defined $node_y ) { + $infostr .= $node_y->string_value() . ", "; + } + if ( defined $node_h ) { + $infostr .= $node_h->string_value(); + } + $infostr .= ".\n\n"; + } + $infostr .= "=cut\n"; + + $infostr .= "\n\n".<<'EOF'; +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. + +EOF + } + + $self->{infoblock} = $infostr; +} + +# ------------------------------------------------------------------------- # +# handle +# ------------------------------------------------------------------------- # +# This function opens the output file and decides how the chapter is +# processed +sub handle { + my $self = shift; + my $chapter = shift; + + my ( $abbr ) = $chapter->findnodes( "titleabbrev" ); + if ( defined $abbr ) { + # create a new file. + my $filename = $abbr->string_value(); + $filename =~ s/^\s*|\s*$//g; + my $dir = $self->{directory}; + + $filename =~ s/XML\:\:LibXML//g; + $filename =~ s/^-|^\:\://g; # remove the first colon or minus. + $filename =~ s/\:\:/\//g; # transform remaining colons to paths. + # the previous statement should work for existing modules. This could be + # dangerous for nested modules, which do not exist at the time of writing + # this code. + + unless ( length $filename ) { + $dir = ""; + $filename = "LibXML"; + } + + if ( $filename ne "README" and $filename ne "LICENSE" ) { + $filename .= ".pod"; + } + else { + $dir = ""; + } + + $self->{OFILE} = IO::File->new(); + $self->{OFILE}->open(">".$dir.$filename); + + if ( $abbr->string_value() eq "README" + or $abbr->string_value() eq "LICENSE" ) { + + # Text only chapters in the documentation + $self->dump_text( $chapter ); + } + else { + # print header + # print synopsis + # process the information itself + # dump the info block + $self->dump_pod( $chapter ); + $self->{OFILE}->print( $self->{infoblock} ); + } + # close the file + $self->{OFILE}->close(); + + # Strip trailing space. + my $text = _slurp($dir.$filename); + $text =~ s/[ \t]+$//gms; + + open my $out, '>', $dir.$filename + or die "Cannot open $dir$filename for writing."; + print {$out} $text; + close ($out); + + } +} + +sub _slurp +{ + my $filename = shift; + + open my $in, '<', $filename + or die "Cannot open '$filename' for slurping - $!"; + + local $/; + my $contents = <$in>; + + close($in); + + return $contents; +} + +# ------------------------------------------------------------------------- # +# dump_text +# ------------------------------------------------------------------------- # +# convert the chapter into a textfile, such as README. +sub dump_text { + my $self = shift; + my $chap = shift; + + if ( $chap->nodeName() eq "chapter" ) { + my ( $title ) = $chap->getChildrenByTagName( "title" ); + my $str = $title->string_value(); + my $len = length $str; + $self->{OFILE}->print( uc($str) . "\n" ); + $self->{OFILE}->print( "=" x $len ); + $self->{OFILE}->print( "\n\n" ); + } + + foreach my $node ( $chap->childNodes() ) { + if ( $node->nodeName() eq "para" ) { + # we split at the last whitespace before 80 chars + my $string = $node->string_value(); + my $os = ""; + my @words = split /\s+/, $string; + foreach my $word ( @words ) { + if ( (length( $os ) + length( $word ) + 1) < 80 ) { + if ( length $os ) { $os .= " "; } + $os .= $word; + } + else { + $self->{OFILE}->print( $os . "\n" ); + $os = $word; + } + } + $self->{OFILE}->print( $os ); + $self->{OFILE}->print( "\n\n" ); + } + elsif ( $node->nodeName() eq "sect1" ) { + my ( $title ) = $node->getChildrenByTagName( "title" ); + my $str = $title->string_value(); + my $len = length $str; + + $self->{OFILE}->print( "\n" . uc($str) . "\n" ); + $self->{OFILE}->print( "=" x $len ); + $self->{OFILE}->print( "\n\n" ); + $self->dump_text( $node ); + } + elsif ( $node->nodeName() eq "sect2" ) { + my ( $title ) = $node->getChildrenByTagName( "title" ); + my $str = $title->string_value(); + my $len = length $str; + + $self->{OFILE}->print( "\n" . $str . "\n" ); + $self->{OFILE}->print( "=" x $len ); + $self->{OFILE}->print( "\n\n" ); + $self->dump_text( $node ); + } + elsif ( $node->nodeName() eq "itemizedlist" ) { + my @items = $node->findnodes( "listitem" ); + my $sp= " "; + foreach my $item ( @items ) { + $self->{OFILE}->print( "$sp o " ); + my $str = $item->string_value(); + $str =~ s/^\s*|\s*$//g; + $self->{OFILE}->print( $str ); + $self->{OFILE}->print( "\n" ); + } + $self->{OFILE}->print( "\n" ); + } + elsif ( $node->nodeName() eq "orderedlist" ) { + my @items = $node->findnodes( "listitem" ); + my $i = 0; + my $sp= " "; + foreach my $item ( @items ) { + $i++; + $self->{OFILE}->print( "$sp $i " ); + my $str = $item->string_value(); + $str =~ s/^\s*|\s*$//g; + $self->{OFILE}->print( $str ); + $self->{OFILE}->print( "\n" ); + } + $self->{OFILE}->print( "\n" ); + } + elsif ( $node->nodeName() eq "programlisting" ) { + my $str = $node->string_value(); + $str =~ s/\n/\n> /g; + $self->{OFILE}->print( "> ". $str ); + $self->{OFILE}->print( "\n\n" ); + } + } +} + +# ------------------------------------------------------------------------- # +# dump_pod +# ------------------------------------------------------------------------- # +# This method is used to create the real POD files for XML::LibXML. It is not +# too sophisticated, but it already does quite a good job. +sub dump_pod { + my $self = shift; + my $chap = shift; + + if ( $chap->nodeName() eq "chapter" ) { + my ( $title ) = $chap->getChildrenByTagName( "title" ); + my ( $ttlabbr ) = $chap->getChildrenByTagName( "titleabbrev" ); + my $str = $ttlabbr->string_value() . " - ".$title->string_value(); + $str=~s/^\s+|\s+$//g; + $self->{OFILE}->print( "=head1 NAME\n\n$str\n" ); + my ($synopsis) = $chap->findnodes( "sect1[title='Synopsis']" ); + my @funcs = $chap->findnodes( ".//funcsynopsis" ); + if ($synopsis or scalar @funcs) { + $self->{OFILE}->print( "\n=head1 SYNOPSIS\n\n" ) + } + if ($synopsis) { + $self->dump_pod( $synopsis ); + } + if ( scalar @funcs ) { + foreach my $s ( @funcs ) { + $self->dump_pod( $s ); + } + # $self->{OFILE}->print( "\n\n=head1 DESCRIPTION\n\n" ); + } + } + + foreach my $node ( $chap->childNodes() ) { + if ( $node->nodeType == XML_TEXT_NODE || + $node->nodeType == XML_CDATA_SECTION_NODE ) { + # we split at the last whitespace before 80 chars + my $prev_inline = + ($node->previousSibling and + $node->previousSibling->nodeName !~ + /^(?:itemizedlist|orderedlist|variablelist|programlisting|funcsynopsis)/) + ? 1 : 0; + my $str = $node->data(); + $str=~s/(^|\n)[ \t]+($|\n)/$1$2/g; + if ($str=~/\S/) { + my $string = $str; + my $space_before = ($string =~ s/^\s+//g) ? $prev_inline : 0; + my $space_after = ($string =~ s/\s+$//g) ? 1 : 0; + $self->{OFILE}->print( " " ) if $space_before; + my $os = ""; + my @words = split /\s+/, $string; + foreach my $word ( @words ) { + if ( (length( $os ) + length( $word ) + 1) < 80 ) { + if ( length $os ) { $os .= " "; } + $os .= $word; + } + else { + $self->{OFILE}->print( $os . "\n" ); + $os = $word; + } + } + $os.=" " if $space_after; + $self->{OFILE}->print( $os ); + } + } elsif ( $node->nodeName() eq "para" ) { + $self->dump_pod( $node ); + $self->{OFILE}->print( "\n\n" ); + } elsif ( $node->nodeName() eq "sect1" ) { + my ( $title ) = $node->getChildrenByTagName( "title" ); + my $str = $title->string_value(); + unless ($chap->nodeName eq "chapter" and $str eq 'Synopsis') { + $self->{OFILE}->print( "\n=head1 " . uc($str) ); + $self->{OFILE}->print( "\n\n" ); + $self->dump_pod( $node ); + } + } + elsif ( $node->nodeName() eq "sect2" ) { + my ( $title ) = $node->getChildrenByTagName( "title" ); + my $str = $title->string_value(); + my $len = length $str; + + $self->{OFILE}->print( "\n=head2 " . $str . "\n\n" ); + + $self->dump_pod( $node ); + } + elsif ( $node->nodeName() eq "sect3" ) { + my ( $title ) = $node->getChildrenByTagName( "title" ); + my $str = $title->string_value(); + my $len = length $str; + + $self->{OFILE}->print( "\n=head3 " . $str . "\n\n" ); + + $self->dump_pod( $node ); + } + elsif ( $node->nodeName() eq "itemizedlist" ) { + my @items = $node->findnodes( "listitem" ); + $self->{OFILE}->print( "\n=over 4\n\n" ); + foreach my $item ( @items ) { + $self->{OFILE}->print( "=item *\n\n" ); + $self->dump_pod( $item ); + $self->{OFILE}->print( "\n\n" ); + } + $self->{OFILE}->print( "=back\n\n" ); + } + elsif ( $node->nodeName() eq "orderedlist" ) { + my @items = $node->findnodes( "listitem" ); + my $i = 0; + $self->{OFILE}->print( "\n=over 4\n\n" ); + + foreach my $item ( @items ) { + $i++; + $self->{OFILE}->print( "=item $i.\n\n" ); + $self->dump_pod($item); + $self->{OFILE}->print( "\n\n" ); + } + $self->{OFILE}->print( "=back\n\n" ); + } + elsif ( $node->nodeName() eq "variablelist" ) { + $self->{OFILE}->print( "=over 4\n\n" ); + my @nodes = $node->findnodes( "varlistentry" ); + $self->dump_pod( $node ); + $self->{OFILE}->print( "\n=back\n\n" ); + } + elsif ( $node->nodeName() eq "varlistentry" ) { + my ( $term ) = $node->findnodes( "term" ); + $self->{OFILE}->print( "=item " ); + if ( defined $term ) { + $self->dump_pod( $term ); + } + $self->{OFILE}->print( "\n\n" ); + my @nodes =$node->findnodes( "listitem" ); + foreach my $it ( @nodes ) { + $self->dump_pod( $it ); + } + $self->{OFILE}->print( "\n" ); + } + elsif ( $node->nodeName() eq "programlisting" ) { + my $str = $node->string_value(); + $str =~ s/^\s+|\s+$//g; + $str =~ s/\n/\n /g; + $str=~s/(^|\n)[ \t]+($|\n)/$1$2/g; + $self->{OFILE}->print( "\n\n" ); + $self->{OFILE}->print( " ". $str ); + $self->{OFILE}->print( "\n\n" ); + } + elsif ( $node->nodeName() eq "funcsynopsis") { + if (($node->getAttribute('role')||'') ne 'synopsis') { + $self->dump_pod($node); + $self->{OFILE}->print( "\n" ); + } + } + elsif( $node->nodeName() eq "funcsynopsisinfo" ) { + my $str = $node->string_value() ; + $str =~ s/\n/\n /g; + $self->{OFILE}->print( " $str\n" ); + } elsif( $node->nodeName() eq "title" or + $node->nodeName() eq "titleabbrev" + ) { + # IGNORE + } elsif( $node->nodeName() eq "emphasis" ) { + my $str = $node->string_value() ; + $str =~ s/\n/ /g; + $str = pod_escape($str); + $self->{OFILE}->print( "I<<<<<< $str >>>>>>" ); + } elsif( $node->nodeName() eq "function" or + $node->nodeName() eq "email" or + $node->nodeName() eq "literal" + ) { + my $str = $node->string_value() ; + $str =~ s/\n/ /g; + $str = pod_escape($str); + $self->{OFILE}->print( "C<<<<<< $str >>>>>>" ); + } elsif( $node->nodeName() eq "ulink" ) { + my $str = $node->string_value() ; + my $url = $node->getAttribute('url'); + $str =~ s/\n/ /g; + if ($str eq $url) { + $self->{OFILE}->print( "L<<<<<< $url >>>>>>" ); + } else { + $self->{OFILE}->print( "$str (L<<<<<< $url >>>>>>)" ); + } + } elsif( $node->nodeName() eq "xref" ) { + my $linkend = $node->getAttribute('linkend'); + my ($target) = $node->findnodes(qq(//*[\@id="$linkend"]/titleabbrev)); + ($target) = $node->findnodes(qq(//*[\@id="$linkend"]/title)) unless $target; + if ($target) { + my $str = $target->string_value() ; + $str =~ s/\n/ /g; + $str = pod_escape($str); + $self->{OFILE}->print( "L<<<<<< $str >>>>>>" ); + } else { + warn "WARNING: Didn't find any section with id='$linkend'\n"; + $self->{OFILE}->print( "$linkend" ); + } + } elsif( $node->nodeName() eq "olink" ) { + my $str = pod_escape($node->string_value()); + my $url = $node->getAttribute('targetdoc'); + if (!defined $url) { + warn $node->toString(1),"\n"; + } + $str =~ s/\n/ /g; + if ($str eq $url) { + $self->{OFILE}->print( "L<<<<<< $url >>>>>>" ); + } else { + $self->{OFILE}->print( "$str (L<<<<<< $url >>>>>>)" ); + } + } else { + print STDERR "Ignoring ",$node->nodeName(),"\n"; + $self->dump_pod($node); + } + } +} + +sub pod_escape { + my ($str) = @_; + + my %escapes = ( + '>' => 'gt', + '<' => 'lt', + ); + + my $re = join('|', keys %escapes); + + $str =~ s/($re)/E<$escapes{$1}>/g; + + return $str; +} + +1; diff --git a/example/xmlns/badguy.xml b/example/xmlns/badguy.xml new file mode 100644 index 0000000..e8d6f62 --- /dev/null +++ b/example/xmlns/badguy.xml @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/example/xmlns/goodguy.xml b/example/xmlns/goodguy.xml new file mode 100644 index 0000000..4ec73bb --- /dev/null +++ b/example/xmlns/goodguy.xml @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/example/xpath.pl b/example/xpath.pl new file mode 100644 index 0000000..e9f0742 --- /dev/null +++ b/example/xpath.pl @@ -0,0 +1,53 @@ +#!/usr/bin/perl + +# $Id$ + +use XML::LibXML; + +use strict; +use warnings; + +my $parser = XML::LibXML->new(); +my $xpath = shift @ARGV; + +if ( scalar @ARGV ) { + foreach ( @ARGV ) { + my $doc = $parser->parse_file( $_ ); + my $result = $doc->find( $xpath ); + handle_result( $result ); + undef $doc; + } +} +else { + # read from std in + my @doc = ; + my $string = join "", @doc; + my $doc = $parser->parse_string( $string ); + my $result = $doc->find( $xpath ); + exit handle_result( $result ); +} + +sub handle_result { + my $result = shift; + + return 1 unless defined $result; + + if ( $result->isa( 'XML::LibXML::NodeList' ) ) { + foreach ( @$result ) { + print $_->toString(1) , "\n"; + } + return 0; + } + + if ( $result->isa( 'XML::LibXML::Literal' ) ) { + print $result->value , "\n"; + return 0; + } + + if ( $result->isa( 'XML::LibXML::Boolean' ) ){ + print $result->to_literal , "\n"; + return 0; + } + + return 1; +} diff --git a/example/yahoo-finance-html-with-errors.html b/example/yahoo-finance-html-with-errors.html new file mode 100644 index 0000000..046306d --- /dev/null +++ b/example/yahoo-finance-html-with-errors.html @@ -0,0 +1,1193 @@ + + + + Yahoo! Finance - Business Finance, Stock Market, Quotes, News + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
Skip to search.
Make Y! My Homepage
 
  1. Drag the "Y!" and drop it onto the "Home" icon.
  2. Select "Yes" from the pop up window.
  3. Nothing, you're done.

If this didn't work for you see detailed instructions

Close this window

+ +
+ +
+ + +
+ +
+ +
+ + { "market" : {"NAME" : "", "ID" : "us", "TZ" : "", "TZOFFSET" : "-14400", "open" : "", "close" : "", "flags" : ""} ,"STREAMER_SERVER" : "http://streamerapi.finance.yahoo.com" ,"arrowAsChangeSign" : false ,"throttleInterval": "1000","localize":"0" ,"region": "US","lang" : "en-US","mu":"1" ,"up_arrow_icon" :"http://l.yimg.com/a/i/us/fi/03rd/up_g.gif" , "down_arrow_icon":"http://l.yimg.com/a/i/us/fi/03rd/down_r.gif" ,"up_color" :"green" , "down_color":"red","pass_market_id" : "0","market_status_yrb" :"YFT_MARKET_WILL_OPEN"} + + + + + +
+ +
+ + +
+
+ +
+ + +
+ +
+
+
+ +
Learn how to trade on Breakout from Y! Finance
+ +
+ + + +
+ + + +
+ + + + +
+
+
+
+

Facts Column

+
+ +
+ +
Dow12,188.69+145.13+1.21%
Chart for Dow
Nasdaq2,729.310.000.00%
Chart for Nasdaq
S&P 5001,296.670.000.00%
Chart for S&P 500
10 Yr Bond(%)3.0450%0.0000
Chart for 10 Yr Bond(%)
Oil93.65+0.76+0.82%
Chart for Oil
Gold1,508.10+8.40+0.56%
Chart for Gold
FTSE 1005,831.33+64.45+1.12%
Chart for FTSE 100
DAX7,274.16+103.73+1.45%
Chart for DAX
CAC 403,925.90+74.01+1.92%
Chart for CAC 40
Nikkei 2259,797.26+148.28+1.54%
Chart for Nikkei 225
Hang Seng22,061.18-0.600.00%
Chart for Hang Seng
Straits Times3,079.74+28.95+0.95%
Chart for Straits Times
{"s" : "","k" : "a00,a50,b00,b60,c10,g00,h00,l10,p20,t10,v00","o" : "CLQ11.NYM,GCN11.CMX,^DJI,^FCHI,^FTSE,^GDAXI,^GSPC,^HSI,^IXIC,^N225,^STI,^TNX","j" : "c10,l10,p20,t10"} + + +
+

Wed 6:32am ET- Briefing.com
[BRIEFING.COM] S&P futures vs fair value: +6.00. Nasdaq futures vs fair value: +8.00....

+» Read more +
+ + +
+ +
+ + + + +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + +
Currency PairPriceChange
EUR/USD1.4419+ 0.0050
USD/JPY81.0650+ 0.0300
GBP/USD1.6024+ 0.0031
+ +
+ + +
+
Sponsored by:
+ GFT - Trade the Biggest Market
Forex. Free Practice Account. +
+ +
+ +
+ + + + + + + + + + + +
$1 U.S. Dollar (USD) =
Japanese Yen81.0650 ¥
Euro0.6934 €
+ +
+ + +
+
+ +
+ +
+ + » View more investing ideas + + + + +
+

Market Movers

+Newsworthy

Determined based on increased frequency of appearance in news

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
SPRD12.490.000.00% +Spreadtrum Communications, Inc.Chart for SPRD + + +
MON66.900.000.00% +Monsanto Company Common StockChart for MON + + +
LYG2.800.000.00% +Lloyds Banking Group Plc AmericChart for LYG + + +
EK3.440.000.00% +Eastman Kodak Company Common StChart for EK + + +
GIS37.210.000.00% +General Mills, Inc. Common StocChart for GIS + + +
+More Market Movers +
{"s" : "ek,gis,lyg,mon,sprd","k" : "a00,a50,b00,b60,c10,g00,h00,l10,p20,t10,v00","o" : "AA,AAPL,ADXS.OB,ARAY,CMG,FIZZ,INTC,MCD,NOK,WNI","j" : "c10,l10,p20,t10"} + + +
+
+
+ +
+

Community Sentiment

+ +

Powered by Collective Intellect, Inc.

+
+ + +
+
+

News and Opinion Column

+
+ +
+

Top Stories

+
+Wal-Mart Stores cutting gas prices for 3 months- AP

Wal-Mart Stores Inc. is cutting gas prices by 10 cents a gallon for three months. The world's biggest retailer says customers visiting participating Murphy USA and Walmart gas stations from Wednesday through Sept. 30 will receive the discount on all fuel, gas and diesel purchases when they use a reloadable Walmart gift card, Walmart MoneyCard or a Walmart credit card.

+ +
+ + + +
+ +
+ +
+

Focus on Lifelong Investing

+ brought to you by Fidelity +
+
+ + + » View More Stories... +
+ + + +
+ + + + + + +
+ +
+ + + » View More +
+ + +
+ + + +
+
+

Breakout

+ +
+
+ + + » View More +
+ + +
+ + + + +
+
+
+ +
+ +
+ + + +
+ +
+ + +
+ +
+ + + » View all columnists +
+ + +
+ + + + +
+
+
+ + + +
+
+ +
+

Sponsored Links

+
+
Hot Stock Pick - GTSO
+
Rare Earth Minerals used in touch screens. China. Invest Now.
+
www.RareEarthExporters.com
+
+
+
2011 Market Crash?
+
Analyst Dennis Slothower foresaw 2008 collapse; issues new warning
+
www.stealthstocksonline.com
+
+
ING DIRECT Investing
+
No Acct. Minimum. Free Mobile App. $50 Account Bonus. Learn More!
+
www.ShareBuilder.com/ingdirect
+
+
+
100% Accurate Stock Picks
+
Electrifying Stock Pick Accuracy. Only 5 Free spots left. Sign Up Now!
+
www.PerfectPennyStocks.com
+
+
Stock Recommendations
+
Buy/Hold/Sell and target prices on over 5,500 stocks every day
+
www.ValuEngine.com
+
+
+
Where's the Dow headed?
+
The answer may surprise you. Find out now with Chart of the Day.
+
www.chartoftheday.com
+
+
+ + +
+
+
+

Videos, My Quotes, Rates

+
+ +
AdChoices
+
+ +
+ + + + +
+ + + + +
+
+ +

Quotes

Your most recently viewed tickers will automatically show up here if you type a ticker in the Get Quotes box on the top of the page.
{"s" : "GE,MCD,T,YHOO,^DJI,^IXIC,^SPC,^TYX","k" : "a00,a50,b00,b60,c10,g00,h00,l10,p20,t10,v00","o" : "","j" : ""} + + + +
+ + + + +
+ +

Rates

See today's average mortgage rates across the country. Source: Bankrate.com

Loan TypeTodayLast Week
30 Year Fixed4.50%4.51%
15 Year Fixed3.67%3.69%
1 Year ARM3.18%3.14%
30 Year Fixed Jumbo5.02%4.98%
5/1 ARM3.06%3.05%
3/1 ARM3.26%3.33%
» View rates in your area

See today's average home equity rates across the country. Source: Bankrate.com

Loan TypeTodayLast Week
$30K Home Equity Loan5.95%5.99%
$50K Home Equity Loan5.73%5.79%
$75K Home Equity Loan5.71%5.76%
$30K HELOC4.77%4.75%
$50K HELOC4.42%4.43%
$75K HELOC4.40%4.40%
» View rates in your area

See today's average savings rates across the country. Source: Bankrate.com

Savings TypeTodayLast Week
6 month CD0.54%0.54%
1 year CD0.89%0.89%
3 year CD1.31%1.31%
MMA0.62%0.62%
$10K MMA 0.71%0.71%
$25K MMA0.87%0.87%
» View rates in your area

See today's average auto rates across the country. Source: Bankrate.com

Auto TypeTodayLast Week
36 Month New Car Loan3.66%3.91%
48 Month New Car Loan3.77%3.99%
60 Month New Car Loan3.81%4.02%
72 Month New Car Loan3.24%3.37%
36 Month Used Car Loan4.66%4.68%
48 Month Used Car Loan4.73%4.56%
» View rates in your area

See today's average credit card rates across the country. Source: CreditCards.com

Card TypeTodayLast Week
Low Interest Credit Cards10.73%10.73%
Balance Transfer Credit Cards12.78%12.76%
Business Credit Cards13.07%13.07%
Student Credit Cards13.77%13.77%
Cash Back Credit Cards13.90%13.87%
Airline Credit Cards14.31%14.24%
» View more rates
+ + + + + +
+ + + + +
+
Trending Now
+

Trending Now

+ +
+ + + +
+
+ +
+
+
+ + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/lib/XML/LibXML/Attr.pod b/lib/XML/LibXML/Attr.pod new file mode 100644 index 0000000..fa6a5cd --- /dev/null +++ b/lib/XML/LibXML/Attr.pod @@ -0,0 +1,141 @@ +=head1 NAME + +XML::LibXML::Attr - XML::LibXML Attribute Class + +=head1 SYNOPSIS + + + + use XML::LibXML; + # Only methods specific to Attribute nodes are listed here, + # see the XML::LibXML::Node manpage for other methods + + $attr = XML::LibXML::Attr->new($name [,$value]); + $string = $attr->getValue(); + $string = $attr->value; + $attr->setValue( $string ); + $node = $attr->getOwnerElement(); + $attr->setNamespace($nsURI, $prefix); + $bool = $attr->isId; + $string = $attr->serializeContent; + +=head1 DESCRIPTION + +This is the interface to handle Attributes like ordinary nodes. The naming of +the class relies on the W3C DOM documentation. + + +=head1 METHODS + +The class inherits from L<<<<<< XML::LibXML::Node >>>>>>. The documentation for Inherited methods is not listed here. + +Many functions listed here are extensively documented in the DOM Level 3 specification (L<<<<<< http://www.w3.org/TR/DOM-Level-3-Core/ >>>>>>). Please refer to the specification for extensive documentation. + +=over 4 + +=item new + + $attr = XML::LibXML::Attr->new($name [,$value]); + +Class constructor. If you need to work with ISO encoded strings, you should I<<<<<< always >>>>>> use the C<<<<<< createAttribute >>>>>> of L<<<<<< XML::LibXML::Document >>>>>>. + + +=item getValue + + $string = $attr->getValue(); + +Returns the value stored for the attribute. If undef is returned, the attribute +has no value, which is different of being C<<<<<< not specified >>>>>>. + + +=item value + + $string = $attr->value; + +Alias for I<<<<<< getValue() >>>>>> + + +=item setValue + + $attr->setValue( $string ); + +This is needed to set a new attribute value. If ISO encoded strings are passed +as parameter, the node has to be bound to a document, otherwise the encoding +might be done incorrectly. + + +=item getOwnerElement + + $node = $attr->getOwnerElement(); + +returns the node the attribute belongs to. If the attribute is not bound to a +node, undef will be returned. Overwriting the underlying implementation, the I<<<<<< parentNode >>>>>> function will return undef, instead of the owner element. + + +=item setNamespace + + $attr->setNamespace($nsURI, $prefix); + +This function tries to bound the attribute to a given namespace. If C<<<<<< $nsURI >>>>>> is undefined or empty, the function discards any previous association of the +attribute with a namespace. If the namespace was not previously declared in the +context of the attribute, this function will fail. In this case you may wish to +call setNamespace() on the ownerElement. If the namespace URI is non-empty and +declared in the context of the attribute, but only with a different (non-empty) +prefix, then the attribute is still bound to the namespace but gets a different +prefix than C<<<<<< $prefix >>>>>>. The function also fails if the prefix is empty but the namespace URI is not +(because unprefixed attributes should by definition belong to no namespace). +This function returns 1 on success, 0 otherwise. + + +=item isId + + $bool = $attr->isId; + +Determine whether an attribute is of type ID. For documents with a DTD, this +information is only available if DTD loading/validation has been requested. For +HTML documents parsed with the HTML parser ID detection is done automatically. +In XML documents, all "xml:id" attributes are considered to be of type ID. + + +=item serializeContent($docencoding) + + $string = $attr->serializeContent; + +This function is not part of DOM API. It returns attribute content in the form +in which it serializes into XML, that is with all meta-characters properly +quoted and with raw entity references (except for entities expanded during +parse time). Setting the optional $docencoding flag to 1 enforces document +encoding for the output string (which is then passed to Perl as a byte string). +Otherwise the string is passed to Perl as (UTF-8 encoded) characters. + + + +=back + +=head1 AUTHORS + +Matt Sergeant, +Christian Glahn, +Petr Pajas + + +=head1 VERSION + +2.0207 + +=head1 COPYRIGHT + +2001-2007, AxKit.com Ltd. + +2002-2006, Christian Glahn. + +2006-2009, Petr Pajas. + +=cut + + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. + diff --git a/lib/XML/LibXML/AttributeHash.pm b/lib/XML/LibXML/AttributeHash.pm new file mode 100644 index 0000000..c83f0dc --- /dev/null +++ b/lib/XML/LibXML/AttributeHash.pm @@ -0,0 +1,215 @@ +package XML::LibXML::AttributeHash; + +use strict; +use warnings; +use Scalar::Util qw//; +use Tie::Hash; +our @ISA = qw/Tie::Hash/; + +use vars qw($VERSION); +$VERSION = "2.0207"; # VERSION TEMPLATE: DO NOT CHANGE + +BEGIN +{ + *__HAS_WEAKEN = defined(&Scalar::Util::weaken) + ? sub () { 1 } + : sub () { 0 }; +}; + +sub element +{ + return $_[0][0]; +} + +sub from_clark +{ + my ($self, $str) = @_; + if ($str =~ m! \{ (.+) \} (.+) !x) + { + return ($1, $2); + } + return (undef, $str); +} + +sub to_clark +{ + my ($self, $ns, $local) = @_; + defined $ns ? "{$ns}$local" : $local; +} + +sub all_keys +{ + my ($self, @keys) = @_; + + my $elem = $self->element; + + foreach my $attr (defined($elem) ? $elem->attributes : ()) + { + if (! $attr->isa('XML::LibXML::Namespace')) + { + push @keys, $self->to_clark($attr->namespaceURI, $attr->localname); + } + } + + return sort @keys; +} + +sub TIEHASH +{ + my ($class, $element, %args) = @_; + my $self = bless [$element, undef, \%args], $class; + if (__HAS_WEAKEN and $args{weaken}) + { + Scalar::Util::weaken( $self->[0] ); + } + return $self; +} + +sub STORE +{ + my ($self, $key, $value) = @_; + my ($key_ns, $key_local) = $self->from_clark($key); + if (defined $key_ns) + { + return $self->element->setAttributeNS($key_ns, "xxx:$key_local", "$value"); + } + else + { + return $self->element->setAttribute($key_local, "$value"); + } +} + +sub FETCH +{ + my ($self, $key) = @_; + my ($key_ns, $key_local) = $self->from_clark($key); + if (defined $key_ns) + { + return $self->element->getAttributeNS($key_ns, "$key_local"); + } + else + { + return $self->element->getAttribute($key_local); + } +} + +sub EXISTS +{ + my ($self, $key) = @_; + my ($key_ns, $key_local) = $self->from_clark($key); + if (defined $key_ns) + { + return $self->element->hasAttributeNS($key_ns, "$key_local"); + } + else + { + return $self->element->hasAttribute($key_local); + } +} + +sub DELETE +{ + my ($self, $key) = @_; + my ($key_ns, $key_local) = $self->from_clark($key); + if (defined $key_ns) + { + return $self->element->removeAttributeNS($key_ns, "$key_local"); + } + else + { + return $self->element->removeAttribute($key_local); + } +} + +sub FIRSTKEY +{ + my ($self) = @_; + my @keys = $self->all_keys; + $self->[1] = \@keys; + if (wantarray) + { + return ($keys[0], $self->FETCH($keys[0])); + } + $keys[0]; +} + +sub NEXTKEY +{ + my ($self, $lastkey) = @_; + my @keys = defined $self->[1] ? @{ $self->[1] } : $self->all_keys; + my $found; + foreach my $k (@keys) + { + if ($k gt $lastkey) + { + $found = $k and last; + } + } + if (!defined $found) + { + $self->[1] = undef; + return; + } + if (wantarray) + { + return ($found, $self->FETCH($found)); + } + return $found; +} + +sub SCALAR +{ + my ($self) = @_; + return $self->element; +} + +sub CLEAR +{ + my ($self) = @_; + foreach my $k ($self->all_keys) + { + $self->DELETE($k); + } + return $self; +} + +__PACKAGE__ +__END__ + +=head1 NAME + +XML::LibXML::AttributeHash - tie an XML::LibXML::Element to a hash to access its attributes + +=head1 SYNOPSIS + + tie my %hash, 'XML::LibXML::AttributeHash', $element; + $hash{'href'} = 'http://example.com/'; + print $element->getAttribute('href') . "\n"; + +=head1 DESCRIPTION + +This class allows an element's attributes to be accessed as if they were a +plain old Perl hash. Attribute names become hash keys. Namespaced attributes +are keyed using Clark notation. + + my $XLINK = 'http://www.w3.org/1999/xlink'; + tie my %hash, 'XML::LibXML::AttributeHash', $element; + $hash{"{$XLINK}href"} = 'http://localhost/'; + print $element->getAttributeNS($XLINK, 'href') . "\n"; + +There is rarely any need to use XML::LibXML::AttributeHash directly. In +general, it is possible to take advantage of XML::LibXML::Element's +overloading. The example in the SYNOPSIS could have been written: + + $element->{'href'} = 'http://example.com/'; + print $element->getAttribute('href') . "\n"; + +The tie interface allows the passing of additional arguments to +XML::LibXML::AttributeHash: + + tie my %hash, 'XML::LibXML::AttributeHash', $element, %args; + +Currently only one argument is supported, the boolean "weaken" which (if +true) indicates that the tied object's reference to the element should be +a weak reference. This is used by XML::LibXML::Element's overloading. The +"weaken" argument is ignored if you don't have a working Scalar::Util::weaken. diff --git a/lib/XML/LibXML/Boolean.pm b/lib/XML/LibXML/Boolean.pm new file mode 100644 index 0000000..faa53b7 --- /dev/null +++ b/lib/XML/LibXML/Boolean.pm @@ -0,0 +1,93 @@ +# $Id$ +# +# +# This is free software, you may use it and distribute it under the same terms as +# Perl itself. +# +# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas +# +# + +package XML::LibXML::Boolean; +use XML::LibXML::Number; +use XML::LibXML::Literal; +use strict; +use warnings; + +use vars qw ($VERSION); + +$VERSION = "2.0207"; # VERSION TEMPLATE: DO NOT CHANGE + +use overload + '""' => \&value, + '<=>' => \&cmp; + +sub new { + my $class = shift; + my ($param) = @_; + my $val = $param ? 1 : 0; + bless \$val, $class; +} + +sub True { + my $class = shift; + my $val = 1; + bless \$val, $class; +} + +sub False { + my $class = shift; + my $val = 0; + bless \$val, $class; +} + +sub value { + my $self = shift; + $$self; +} + +sub cmp { + my $self = shift; + my ($other, $swap) = @_; + if ($swap) { + return $other <=> $$self; + } + return $$self <=> $other; +} + +sub to_number { XML::LibXML::Number->new($_[0]->value); } +sub to_boolean { $_[0]; } +sub to_literal { XML::LibXML::Literal->new($_[0]->value ? "true" : "false"); } + +sub string_value { return $_[0]->to_literal->value; } + +1; +__END__ + +=head1 NAME + +XML::LibXML::Boolean - Boolean true/false values + +=head1 DESCRIPTION + +XML::LibXML::Boolean objects implement simple boolean true/false objects. + +=head1 API + +=head2 XML::LibXML::Boolean->True + +Creates a new Boolean object with a true value. + +=head2 XML::LibXML::Boolean->False + +Creates a new Boolean object with a false value. + +=head2 value() + +Returns true or false. + +=head2 to_literal() + +Returns the string "true" or "false". + +=cut diff --git a/lib/XML/LibXML/CDATASection.pod b/lib/XML/LibXML/CDATASection.pod new file mode 100644 index 0000000..1113e56 --- /dev/null +++ b/lib/XML/LibXML/CDATASection.pod @@ -0,0 +1,65 @@ +=head1 NAME + +XML::LibXML::CDATASection - XML::LibXML Class for CDATA Sections + +=head1 SYNOPSIS + + + + use XML::LibXML; + # Only methods specific to CDATA nodes are listed here, + # see the XML::LibXML::Node manpage for other methods + + $node = XML::LibXML::CDATASection->new( $content ); + +=head1 DESCRIPTION + +This class provides all functions of L<<<<<< XML::LibXML::Text >>>>>>, but for CDATA nodes. + + +=head1 METHODS + +The class inherits from L<<<<<< XML::LibXML::Node >>>>>>. The documentation for Inherited methods is not listed here. + +Many functions listed here are extensively documented in the DOM Level 3 specification (L<<<<<< http://www.w3.org/TR/DOM-Level-3-Core/ >>>>>>). Please refer to the specification for extensive documentation. + +=over 4 + +=item new + + $node = XML::LibXML::CDATASection->new( $content ); + +The constructor is the only provided function for this package. It is required, +because I<<<<<< libxml2 >>>>>> treats the different text node types slightly differently. + + + +=back + +=head1 AUTHORS + +Matt Sergeant, +Christian Glahn, +Petr Pajas + + +=head1 VERSION + +2.0207 + +=head1 COPYRIGHT + +2001-2007, AxKit.com Ltd. + +2002-2006, Christian Glahn. + +2006-2009, Petr Pajas. + +=cut + + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. + diff --git a/lib/XML/LibXML/Comment.pod b/lib/XML/LibXML/Comment.pod new file mode 100644 index 0000000..aaacdce --- /dev/null +++ b/lib/XML/LibXML/Comment.pod @@ -0,0 +1,66 @@ +=head1 NAME + +XML::LibXML::Comment - XML::LibXML Comment Class + +=head1 SYNOPSIS + + + + use XML::LibXML; + # Only methods specific to Comment nodes are listed here, + # see the XML::LibXML::Node manpage for other methods + + $node = XML::LibXML::Comment->new( $content ); + +=head1 DESCRIPTION + +This class provides all functions of L<<<<<< XML::LibXML::Text >>>>>>, but for comment nodes. This can be done, since only the output of the node +types is different, but not the data structure. :-) + + +=head1 METHODS + +The class inherits from L<<<<<< XML::LibXML::Node >>>>>>. The documentation for Inherited methods is not listed here. + +Many functions listed here are extensively documented in the DOM Level 3 specification (L<<<<<< http://www.w3.org/TR/DOM-Level-3-Core/ >>>>>>). Please refer to the specification for extensive documentation. + +=over 4 + +=item new + + $node = XML::LibXML::Comment->new( $content ); + +The constructor is the only provided function for this package. It is required, +because I<<<<<< libxml2 >>>>>> treats text nodes and comment nodes slightly differently. + + + +=back + +=head1 AUTHORS + +Matt Sergeant, +Christian Glahn, +Petr Pajas + + +=head1 VERSION + +2.0207 + +=head1 COPYRIGHT + +2001-2007, AxKit.com Ltd. + +2002-2006, Christian Glahn. + +2006-2009, Petr Pajas. + +=cut + + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. + diff --git a/lib/XML/LibXML/Common.pm b/lib/XML/LibXML/Common.pm new file mode 100644 index 0000000..eb7681c --- /dev/null +++ b/lib/XML/LibXML/Common.pm @@ -0,0 +1,203 @@ +#-------------------------------------------------------------------------# +# $Id: Common.pm,v 1.5 2003/02/27 18:32:59 phish108 Exp $ +# +# +# This is free software, you may use it and distribute it under the same terms as +# Perl itself. +# +# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas +# +# +#-------------------------------------------------------------------------# +package XML::LibXML::Common; + + +#-------------------------------------------------------------------------# +# global blur # +#-------------------------------------------------------------------------# +use strict; +use warnings; + +require Exporter; +use vars qw( @ISA $VERSION @EXPORT @EXPORT_OK %EXPORT_TAGS); + +@ISA = qw(Exporter); + +$VERSION = "2.0207"; # VERSION TEMPLATE: DO NOT CHANGE + +use XML::LibXML qw(:libxml); + +#-------------------------------------------------------------------------# +# export information # +#-------------------------------------------------------------------------# +%EXPORT_TAGS = ( + all => [qw( + ELEMENT_NODE + ATTRIBUTE_NODE + TEXT_NODE + CDATA_SECTION_NODE + ENTITY_REFERENCE_NODE + ENTITY_NODE + PI_NODE + PROCESSING_INSTRUCTION_NODE + COMMENT_NODE + DOCUMENT_NODE + DOCUMENT_TYPE_NODE + DOCUMENT_FRAG_NODE + DOCUMENT_FRAGMENT_NODE + NOTATION_NODE + HTML_DOCUMENT_NODE + DTD_NODE + ELEMENT_DECLARATION + ATTRIBUTE_DECLARATION + ENTITY_DECLARATION + NAMESPACE_DECLARATION + XINCLUDE_END + XINCLUDE_START + encodeToUTF8 + decodeFromUTF8 + )], + w3c => [qw( + ELEMENT_NODE + ATTRIBUTE_NODE + TEXT_NODE + CDATA_SECTION_NODE + ENTITY_REFERENCE_NODE + ENTITY_NODE + PI_NODE + PROCESSING_INSTRUCTION_NODE + COMMENT_NODE + DOCUMENT_NODE + DOCUMENT_TYPE_NODE + DOCUMENT_FRAG_NODE + DOCUMENT_FRAGMENT_NODE + NOTATION_NODE + HTML_DOCUMENT_NODE + DTD_NODE + ELEMENT_DECLARATION + ATTRIBUTE_DECLARATION + ENTITY_DECLARATION + NAMESPACE_DECLARATION + XINCLUDE_END + XINCLUDE_START + )], + libxml => [qw( + XML_ELEMENT_NODE + XML_ATTRIBUTE_NODE + XML_TEXT_NODE + XML_CDATA_SECTION_NODE + XML_ENTITY_REF_NODE + XML_ENTITY_NODE + XML_PI_NODE + XML_COMMENT_NODE + XML_DOCUMENT_NODE + XML_DOCUMENT_TYPE_NODE + XML_DOCUMENT_FRAG_NODE + XML_NOTATION_NODE + XML_HTML_DOCUMENT_NODE + XML_DTD_NODE + XML_ELEMENT_DECL + XML_ATTRIBUTE_DECL + XML_ENTITY_DECL + XML_NAMESPACE_DECL + XML_XINCLUDE_END + XML_XINCLUDE_START + )], + gdome => [qw( + GDOME_ELEMENT_NODE + GDOME_ATTRIBUTE_NODE + GDOME_TEXT_NODE + GDOME_CDATA_SECTION_NODE + GDOME_ENTITY_REF_NODE + GDOME_ENTITY_NODE + GDOME_PI_NODE + GDOME_COMMENT_NODE + GDOME_DOCUMENT_NODE + GDOME_DOCUMENT_TYPE_NODE + GDOME_DOCUMENT_FRAG_NODE + GDOME_NOTATION_NODE + GDOME_HTML_DOCUMENT_NODE + GDOME_DTD_NODE + GDOME_ELEMENT_DECL + GDOME_ATTRIBUTE_DECL + GDOME_ENTITY_DECL + GDOME_NAMESPACE_DECL + GDOME_XINCLUDE_END + GDOME_XINCLUDE_START + )], + encoding => [qw( + encodeToUTF8 + decodeFromUTF8 + )], + ); + +@EXPORT_OK = ( + @{$EXPORT_TAGS{encoding}}, + @{$EXPORT_TAGS{w3c}}, + @{$EXPORT_TAGS{libxml}}, + @{$EXPORT_TAGS{gdome}}, + ); + +@EXPORT = ( + @{$EXPORT_TAGS{encoding}}, + @{$EXPORT_TAGS{w3c}}, + ); + +#-------------------------------------------------------------------------# +# W3 conform node types # +#-------------------------------------------------------------------------# +use constant ELEMENT_NODE => 1; +use constant ATTRIBUTE_NODE => 2; +use constant TEXT_NODE => 3; +use constant CDATA_SECTION_NODE => 4; +use constant ENTITY_REFERENCE_NODE => 5; +use constant ENTITY_NODE => 6; +use constant PROCESSING_INSTRUCTION_NODE => 7; +use constant COMMENT_NODE => 8; +use constant DOCUMENT_NODE => 9; +use constant DOCUMENT_TYPE_NODE => 10; +use constant DOCUMENT_FRAGMENT_NODE => 11; +use constant NOTATION_NODE => 12; +use constant HTML_DOCUMENT_NODE => 13; +use constant DTD_NODE => 14; +use constant ELEMENT_DECLARATION => 15; +use constant ATTRIBUTE_DECLARATION => 16; +use constant ENTITY_DECLARATION => 17; +use constant NAMESPACE_DECLARATION => 18; + +#-------------------------------------------------------------------------# +# some extras for the W3 spec +#-------------------------------------------------------------------------# +use constant PI_NODE => 7; +use constant DOCUMENT_FRAG_NODE => 11; +use constant XINCLUDE_END => 19; +use constant XINCLUDE_START => 20; + +#-------------------------------------------------------------------------# +# libgdome compat names # +#-------------------------------------------------------------------------# +use constant GDOME_ELEMENT_NODE => 1; +use constant GDOME_ATTRIBUTE_NODE => 2; +use constant GDOME_TEXT_NODE => 3; +use constant GDOME_CDATA_SECTION_NODE => 4; +use constant GDOME_ENTITY_REF_NODE => 5; +use constant GDOME_ENTITY_NODE => 6; +use constant GDOME_PI_NODE => 7; +use constant GDOME_COMMENT_NODE => 8; +use constant GDOME_DOCUMENT_NODE => 9; +use constant GDOME_DOCUMENT_TYPE_NODE => 10; +use constant GDOME_DOCUMENT_FRAG_NODE => 11; +use constant GDOME_NOTATION_NODE => 12; +use constant GDOME_HTML_DOCUMENT_NODE => 13; +use constant GDOME_DTD_NODE => 14; +use constant GDOME_ELEMENT_DECL => 15; +use constant GDOME_ATTRIBUTE_DECL => 16; +use constant GDOME_ENTITY_DECL => 17; +use constant GDOME_NAMESPACE_DECL => 18; +use constant GDOME_XINCLUDE_START => 19; +use constant GDOME_XINCLUDE_END => 20; + +1; +#-------------------------------------------------------------------------# +__END__ + diff --git a/lib/XML/LibXML/Common.pod b/lib/XML/LibXML/Common.pod new file mode 100644 index 0000000..9423109 --- /dev/null +++ b/lib/XML/LibXML/Common.pod @@ -0,0 +1,136 @@ +=head1 NAME + +XML::LibXML::Common - Constants and Character Encoding Routines + +=head1 SYNOPSIS + + + + use XML::LibXML::Common; + + $encodedstring = encodeToUTF8( $name_of_encoding, $sting_to_encode ); + $decodedstring = decodeFromUTF8($name_of_encoding, $string_to_decode ); + +=head1 DESCRIPTION + +XML::LibXML::Common defines constants for all node types and provides interface +to libxml2 charset conversion functions. + +Since XML::LibXML use their own node type definitions, one may want to use +XML::LibXML::Common in its compatibility mode: + + +=head2 Exporter TAGS + + + + use XML::LibXML::Common qw(:libxml); + +C<<<<<< :libxml >>>>>> tag will use the XML::LibXML Compatibility mode, which defines the old 'XML_' +node-type definitions. + + + + use XML::LibXML::Common qw(:gdome); + +C<<<<<< :gdome >>>>>> tag will use the XML::GDOME Compatibility mode, which defines the old 'GDOME_' +node-type definitions. + + + + use XML::LibXML::Common qw(:w3c); + +This uses the nodetype definition names as specified for DOM. + + + + use XML::LibXML::Common qw(:encoding); + +This tag can be used to export only the charset encoding functions of +XML::LibXML::Common. + + +=head2 Exports + +By default the W3 definitions as defined in the DOM specifications and the +encoding functions are exported by XML::LibXML::Common. + + +=head2 Encoding functions + +To encode or decode a string to or from UTF-8, XML::LibXML::Common exports two +functions, which provide an interface to the encoding support in C<<<<<< libxml2 >>>>>>. Which encodings are supported by these functions depends on how C<<<<<< libxml2 >>>>>> was compiled. UTF-16 is always supported and on most installations, ISO +encodings are supported as well. + +This interface was useful for older versions of Perl. Since Perl >= 5.8 +provides similar functions via the C<<<<<< Encode >>>>>> module, it is probably a good idea to use those instead. + +=over 4 + +=item encodeToUTF8 + + $encodedstring = encodeToUTF8( $name_of_encoding, $sting_to_encode ); + +The function will convert a byte string from the specified encoding to an UTF-8 +encoded character string. + + +=item decodeToUTF8 + + $decodedstring = decodeFromUTF8($name_of_encoding, $string_to_decode ); + +This function converts an UTF-8 encoded character string to a specified +encoding. Note that the conversion can raise an error if the given string +contains characters that cannot be represented in the target encoding. + + + +=back + +Both these functions report their errors on the standard error. If an error +occurs the function will croak(). To catch the error information it is required +to call the encoding function from within an eval block in order to prevent the +entire script from being stopped on encoding error. + + +=head2 A note on history + +Before XML::LibXML 1.70, this class was available as a separate CPAN +distribution, intended to provide functionality shared between XML::LibXML, +XML::GDOME, and possibly other modules. Since there seems to be no progress in +this direction, we decided to merge XML::LibXML::Common 0.13 and XML::LibXML +1.70 to one CPAN distribution. + +The merge also naturally eliminates a practical and urgent problem experienced +by many XML::LibXML users on certain platforms, namely mysterious misbehavior +of XML::LibXML occurring if the installed (often pre-packaged) version of +XML::LibXML::Common was compiled against an older version of libxml2 than +XML::LibXML. + +=head1 AUTHORS + +Matt Sergeant, +Christian Glahn, +Petr Pajas + + +=head1 VERSION + +2.0207 + +=head1 COPYRIGHT + +2001-2007, AxKit.com Ltd. + +2002-2006, Christian Glahn. + +2006-2009, Petr Pajas. + +=cut + + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. + diff --git a/lib/XML/LibXML/DOM.pod b/lib/XML/LibXML/DOM.pod new file mode 100644 index 0000000..d01e75a --- /dev/null +++ b/lib/XML/LibXML/DOM.pod @@ -0,0 +1,149 @@ +=head1 NAME + +XML::LibXML::DOM - XML::LibXML DOM Implementation + +=head1 DESCRIPTION + +XML::LibXML provides a lightweight interface to I<<<<<< modify >>>>>> a node of the document tree generated by the XML::LibXML parser. This interface +follows as far as possible the DOM Level 3 specification. In addition to the +specified functions, XML::LibXML supports some functions that are more handy to +use in the perl environment. + +One also has to remember, that XML::LibXML is an interface to libxml2 nodes +which actually reside on the C-Level of XML::LibXML. This means each node is a +reference to a structure which is different from a perl hash or array. The only +way to access these structures' values is through the DOM interface provided by +XML::LibXML. This also means, that one I<<<<<< can't >>>>>> simply inherit an XML::LibXML node and add new member variables as if they were +hash keys. + +The DOM interface of XML::LibXML does not intend to implement a full DOM +interface as it is done by XML::GDOME and used for full featured application. +Moreover, it offers an simple way to build or modify documents that are created +by XML::LibXML's parser. + +Another target of the XML::LibXML interface is to make the interfaces of +libxml2 available to the perl community. This includes also some workarounds to +some features where libxml2 assumes more control over the C-Level that most +perl users don't have. + +One of the most important parts of the XML::LibXML DOM interface is that the +interfaces try to follow the DOM Level 3 specification (L<<<<<< http://www.w3.org/TR/DOM-Level-3-Core/ >>>>>>) rather strictly. This means the interface functions are named as the DOM +specification says and not what widespread Java interfaces claim to be the +standard. Although there are several functions that have only a singular +interface that conforms to the DOM spec XML::LibXML provides an additional Java +style alias interface. + +Moreover, there are some function interfaces left over from early stages of +XML::LibXML for compatibility reasons. These interfaces are for compatibility +reasons I<<<<<< only >>>>>>. They might disappear in one of the future versions of XML::LibXML, so a user +is requested to switch over to the official functions. + + +=head2 Encodings and XML::LibXML's DOM implementation + +See the section on Encodings in the I<<<<<< XML::LibXML >>>>>> manual page. + + +=head2 Namespaces and XML::LibXML's DOM implementation + +XML::LibXML's DOM implementation is limited by the DOM implementation of +libxml2 which treats namespaces slightly differently than required by the DOM +Level 2 specification. + +According to the DOM Level 2 specification, namespaces of elements and +attributes should be persistent, and nodes should be permanently bound to +namespace URIs as they get created; it should be possible to manipulate the +special attributes used for declaring XML namespaces just as other attributes +without affecting the namespaces of other nodes. In DOM Level 2, the +application is responsible for creating the special attributes consistently +and/or for correct serialization of the document. + +This is both inconvenient, causes problems in serialization of DOM to XML, and +most importantly, seems almost impossible to implement over libxml2. + +In libxml2, namespace URI and prefix of a node is provided by a pointer to a +namespace declaration (appearing as a special xmlns attribute in the XML +document). If the prefix or namespace URI of the declaration changes, the +prefix and namespace URI of all nodes that point to it changes as well. +Moreover, in contrast to DOM, a node (element or attribute) can only be bound +to a namespace URI if there is some namespace declaration in the document to +point to. + +Therefore current DOM implementation in XML::LibXML tries to treat namespace +declarations in a compromise between reason, common sense, limitations of +libxml2, and the DOM Level 2 specification. + +In XML::LibXML, special attributes declaring XML namespaces are often created +automatically, usually when a namespaced node is attached to a document and no +existing declaration of the namespace and prefix is in the scope to be reused. +In this respect, XML::LibXML DOM implementation differs from the DOM Level 2 +specification according to which special attributes for declaring the +appropriate XML namespaces should not be added when a node with a namespace +prefix and namespace URI is created. + +Namespace declarations are also created when L<<<<<< XML::LibXML::Document >>>>>>'s createElementNS() or createAttributeNS() function are used. If the a +namespace is not declared on the documentElement, the namespace will be locally +declared for the newly created node. In case of Attributes this may look a bit +confusing, since these nodes cannot have namespace declarations itself. In this +case the namespace is internally applied to the attribute and later declared on +the node the attribute is appended to (if required). + +The following example may explain this a bit: + + + + my $doc = XML::LibXML->createDocument; + my $root = $doc->createElementNS( "", "foo" ); + $doc->setDocumentElement( $root ); + + my $attr = $doc->createAttributeNS( "bar", "bar:foo", "test" ); + $root->setAttributeNodeNS( $attr ); + +This piece of code will result in the following document: + + + + + + +The namespace is declared on the document element during the +setAttributeNodeNS() call. + +Namespaces can be also declared explicitly by the use of XML::LibXML::Element's +setNamespace() function. Since 1.61, they can also be manipulated with +functions setNamespaceDeclPrefix() and setNamespaceDeclURI() (not available in +DOM). Changing an URI or prefix of an existing namespace declaration affects +the namespace URI and prefix of all nodes which point to it (that is the nodes +in its scope). + +It is also important to repeat the specification: While working with namespaces +you should use the namespace aware functions instead of the simplified +versions. For example you should I<<<<<< never >>>>>> use setAttribute() but setAttributeNS(). + +=head1 AUTHORS + +Matt Sergeant, +Christian Glahn, +Petr Pajas + + +=head1 VERSION + +2.0207 + +=head1 COPYRIGHT + +2001-2007, AxKit.com Ltd. + +2002-2006, Christian Glahn. + +2006-2009, Petr Pajas. + +=cut + + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. + diff --git a/lib/XML/LibXML/Devel.pm b/lib/XML/LibXML/Devel.pm new file mode 100644 index 0000000..8f13912 --- /dev/null +++ b/lib/XML/LibXML/Devel.pm @@ -0,0 +1,216 @@ +# $Id: $ +# This is free software, you may use it and distribute it under the same terms as +# Perl itself. +# +# Copyright 2011 Joachim Zobel +# +package XML::LibXML::Devel; + +use strict; +use warnings; + +use XML::LibXML; + +use vars qw ($VERSION); +$VERSION = "2.0207"; # VERSION TEMPLATE: DO NOT CHANGE + +use 5.008_000; + +use parent qw(Exporter); + +use vars qw( @EXPORT @EXPORT_OK %EXPORT_TAGS ); + +# This allows declaration use XML::LibXML::Devel ':all'; +# If you do not need this, moving things directly into @EXPORT or @EXPORT_OK +# will save memory. +our %EXPORT_TAGS = ( 'all' => [ qw( + node_to_perl + node_from_perl + refcnt_inc + refcnt_dec + refcnt + fix_owner + mem_used +) ] ); + +our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); + +# Preloaded methods go here. + +1; +__END__ + +=head1 NAME + +XML::LibXML::Devel - makes functions from LibXML.xs available + +=head1 SYNOPSIS + + /********************************************** + * C functions you want to access + */ + xmlNode *return_node(); + void receive_node(xmlNode *); + + ############################################### + # XS Code + void * + xs_return_node + CODE: + RETVAL = return_node(); + OUTPUT: + RETVAL + + void + xs_receive_node + void *n + CODE: + receive_node(n); + + ############################################### + # Perl code + use XML::LibXML::Devel; + + sub return_node + { + my $raw_node = xs_return_node(); + my $node = XML::LibXML::Devel::node_to_perl($raw_node); + XML::LibXML::Devel::refcnt_inc($raw_node); + return $node; + } + + sub receive_node + { + my ($node) = @_; + my $raw_node = XML::LibXML::Devel::node_from_perl($node); + xs_receive_node($raw_node); + XML::LibXML::Devel::refcnt_inc($raw_node); + } + + +=head1 DESCRIPTION + +C makes functions from LibXML.xs available that +are needed to wrap libxml2 nodes in and out of XML::LibXML::Nodes. +This gives cleaner dependencies than using LibXML.so directly. + +To XS a library that uses libxml2 nodes the first step is to +do this so that xmlNodePtr is passed as void *. These raw nodes +are then turned into libxml nodes by using this C functions. + +Be aware that this module is currently rather experimental. The function +names may change if I XS more functions and introduce a reasonable +naming convention. + +Be also aware that this module is a great tool to cause segfaults and +introduce memory leaks. It does however provide a partial cure by making +C available as C. + +=head1 FUNCTIONS + +=head2 NODE MANAGEMENT + +=over 1 + +=item node_to_perl + + node_to_perl($raw_node); + +Returns a LibXML::Node object. This has a proxy node with a reference +counter and an owner attached. The raw node will be deleted as soon +as the reference counter reaches zero. +If the C library is keeping a +pointer to the raw node, you need to call refcnt_inc immediately. +You also need to replace xmlFreeNode by a call to refcnt_dec. + +=item node_to_perl + + node_from_perl($node); + +Returns a raw node. This is a void * pointer and you can do nothing +but passing it to functions that treat it as an xmlNodePtr. The +raw node will be freed as soon as its reference counter reaches zero. +If the C library is keeping a +pointer to the raw node, you need to call refcnt_inc immediately. +You also need to replace xmlFreeNode by a call to refcnt_dec. + +=item refcnt_inc + + refcnt_inc($raw_node); + +Increments the raw nodes reference counter. The raw node must already +be known to perl to have a reference counter. + +=item refcnt_dec + + refcnt_dec($raw_node); + +Decrements the raw nodes reference counter and returns the value it +had before. if the counter becomes zero or less, +this method will free the proxy node holding the reference counter. +If the node is part of a +subtree, refcnt_dec will fix the reference counts and delete +the subtree if it is not required any more. + +=item refcnt + + refcnt($raw_node); + +Returns the value of the reference counter. + +=item fix_owner + + fix_owner($raw_node, $raw_parent); + +This functions fixes the reference counts for an entire subtree. +it is very important to fix an entire subtree after node operations +where the documents or the owner node may get changed. this method is +aware about nodes that already belong to a certain owner node. + +=back + +=head2 MEMORY DEBUGGING + +=over 1 + +=item $ENV{DEBUG_MEMORY} + + BEGIN {$ENV{DEBUG_MEMORY} = 1;}; + use XML::LibXML; + +This turns on libxml2 memory debugging. It must be set before +XML::LibXML is loaded. + + +=item mem_used + + mem_used(); + +Returns the number of bytes currently allocated. + +=back + +=head2 EXPORT + +None by default. + +=head1 SEE ALSO + +This was created to support the needs of Apache2::ModXml2. So this +can serve as an example. + +=head1 AUTHOR + +Joachim Zobel Ejz-2011@heute-morgen.deE + +=head1 COPYRIGHT AND LICENSE + +Copyright (C) 2011 by Joachim Zobel + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself, either Perl version 5.10.1 or, +at your option, any later version of Perl 5 you may have available. + + +=cut + diff --git a/lib/XML/LibXML/Document.pod b/lib/XML/LibXML/Document.pod new file mode 100644 index 0000000..2cdc576 --- /dev/null +++ b/lib/XML/LibXML/Document.pod @@ -0,0 +1,703 @@ +=head1 NAME + +XML::LibXML::Document - XML::LibXML DOM Document Class + +=head1 SYNOPSIS + + + + use XML::LibXML; + # Only methods specific to Document nodes are listed here, + # see the XML::LibXML::Node manpage for other methods + + $dom = XML::LibXML::Document->new( $version, $encoding ); + $dom = XML::LibXML::Document->createDocument( $version, $encoding ); + $strURI = $doc->URI(); + $doc->setURI($strURI); + $strEncoding = $doc->encoding(); + $strEncoding = $doc->actualEncoding(); + $doc->setEncoding($new_encoding); + $strVersion = $doc->version(); + $doc->standalone + $doc->setStandalone($numvalue); + my $compression = $doc->compression; + $doc->setCompression($ziplevel); + $docstring = $dom->toString($format); + $c14nstr = $doc->toStringC14N($comment_flag, $xpath [, $xpath_context ]); + $ec14nstr = $doc->toStringEC14N($comment_flag, $xpath [, $xpath_context ], $inclusive_prefix_list); + $str = $doc->serialize($format); + $state = $doc->toFile($filename, $format); + $state = $doc->toFH($fh, $format); + $str = $document->toStringHTML(); + $str = $document->serialize_html(); + $bool = $dom->is_valid(); + $dom->validate(); + $root = $dom->documentElement(); + $dom->setDocumentElement( $root ); + $element = $dom->createElement( $nodename ); + $element = $dom->createElementNS( $namespaceURI, $nodename ); + $text = $dom->createTextNode( $content_text ); + $comment = $dom->createComment( $comment_text ); + $attrnode = $doc->createAttribute($name [,$value]); + $attrnode = $doc->createAttributeNS( namespaceURI, $name [,$value] ); + $fragment = $doc->createDocumentFragment(); + $cdata = $dom->createCDATASection( $cdata_content ); + my $pi = $doc->createProcessingInstruction( $target, $data ); + my $entref = $doc->createEntityReference($refname); + $dtd = $document->createInternalSubset( $rootnode, $public, $system); + $dtd = $document->createExternalSubset( $rootnode_name, $publicId, $systemId); + $document->importNode( $node ); + $document->adoptNode( $node ); + my $dtd = $doc->externalSubset; + my $dtd = $doc->internalSubset; + $doc->setExternalSubset($dtd); + $doc->setInternalSubset($dtd); + my $dtd = $doc->removeExternalSubset(); + my $dtd = $doc->removeInternalSubset(); + my @nodelist = $doc->getElementsByTagName($tagname); + my @nodelist = $doc->getElementsByTagNameNS($nsURI,$tagname); + my @nodelist = $doc->getElementsByLocalName($localname); + my $node = $doc->getElementById($id); + $dom->indexElements(); + +=head1 DESCRIPTION + +The Document Class is in most cases the result of a parsing process. But +sometimes it is necessary to create a Document from scratch. The DOM Document +Class provides functions that conform to the DOM Core naming style. + +It inherits all functions from L<<<<<< XML::LibXML::Node >>>>>> as specified in the DOM specification. This enables access to the nodes besides +the root element on document level - a C<<<<<< DTD >>>>>> for example. The support for these nodes is limited at the moment. + +While generally nodes are bound to a document in the DOM concept it is +suggested that one should always create a node not bound to any document. There +is no need of really including the node to the document, but once the node is +bound to a document, it is quite safe that all strings have the correct +encoding. If an unbound text node with an ISO encoded string is created (e.g. +with $CLASS->new()), the C<<<<<< toString >>>>>> function may not return the expected result. + +To prevent such problems, it is recommended to pass all data to XML::LibXML +methods as character strings (i.e. UTF-8 encoded, with the UTF8 flag on). + + +=head1 METHODS + +Many functions listed here are extensively documented in the DOM Level 3 specification (L<<<<<< http://www.w3.org/TR/DOM-Level-3-Core/ >>>>>>). Please refer to the specification for extensive documentation. + +=over 4 + +=item new + + $dom = XML::LibXML::Document->new( $version, $encoding ); + +alias for createDocument() + + +=item createDocument + + $dom = XML::LibXML::Document->createDocument( $version, $encoding ); + +The constructor for the document class. As Parameter it takes the version +string and (optionally) the encoding string. Simply calling I<<<<<< createDocument >>>>>>() will create the document: + + + + + +Both parameter are optional. The default value for I<<<<<< $version >>>>>> is C<<<<<< 1.0 >>>>>>, of course. If the I<<<<<< $encoding >>>>>> parameter is not set, the encoding will be left unset, which means UTF-8 is +implied. + +The call of I<<<<<< createDocument >>>>>>() without any parameter will result the following code: + + + + + +Alternatively one can call this constructor directly from the XML::LibXML class +level, to avoid some typing. This will not have any effect on the class +instance, which is always XML::LibXML::Document. + + + + my $document = XML::LibXML->createDocument( "1.0", "UTF-8" ); + +is therefore a shortcut for + + + + my $document = XML::LibXML::Document->createDocument( "1.0", "UTF-8" ); + + +=item URI + + $strURI = $doc->URI(); + +Returns the URI (or filename) of the original document. For documents obtained +by parsing a string of a FH without using the URI parsing argument of the +corresponding C<<<<<< parse_* >>>>>> function, the result is a generated string unknown-XYZ where XYZ is some +number; for documents created with the constructor C<<<<<< new >>>>>>, the URI is undefined. + +The value can be modified by calling C<<<<<< setURI >>>>>> method on the document node. + + +=item setURI + + $doc->setURI($strURI); + +Sets the URI of the document reported by the method URI (see also the URI +argument to the various C<<<<<< parse_* >>>>>> functions). + + +=item encoding + + $strEncoding = $doc->encoding(); + +returns the encoding string of the document. + + + + my $doc = XML::LibXML->createDocument( "1.0", "ISO-8859-15" ); + print $doc->encoding; # prints ISO-8859-15 + + +=item actualEncoding + + $strEncoding = $doc->actualEncoding(); + +returns the encoding in which the XML will be returned by $doc->toString(). +This is usually the original encoding of the document as declared in the XML +declaration and returned by $doc->encoding. If the original encoding is not +known (e.g. if created in memory or parsed from a XML without a declared +encoding), 'UTF-8' is returned. + + + + my $doc = XML::LibXML->createDocument( "1.0", "ISO-8859-15" ); + print $doc->encoding; # prints ISO-8859-15 + + +=item setEncoding + + $doc->setEncoding($new_encoding); + +This method allows one to change the declaration of encoding in the XML +declaration of the document. The value also affects the encoding in which the +document is serialized to XML by $doc->toString(). Use setEncoding() to remove +the encoding declaration. + + +=item version + + $strVersion = $doc->version(); + +returns the version string of the document + +I<<<<<< getVersion() >>>>>> is an alternative form of this function. + + +=item standalone + + $doc->standalone + +This function returns the Numerical value of a documents XML declarations +standalone attribute. It returns I<<<<<< 1 >>>>>> if standalone="yes" was found, I<<<<<< 0 >>>>>> if standalone="no" was found and I<<<<<< -1 >>>>>> if standalone was not specified (default on creation). + + +=item setStandalone + + $doc->setStandalone($numvalue); + +Through this method it is possible to alter the value of a documents standalone +attribute. Set it to I<<<<<< 1 >>>>>> to set standalone="yes", to I<<<<<< 0 >>>>>> to set standalone="no" or set it to I<<<<<< -1 >>>>>> to remove the standalone attribute from the XML declaration. + + +=item compression + + my $compression = $doc->compression; + +libxml2 allows reading of documents directly from gzipped files. In this case +the compression variable is set to the compression level of that file (0-8). If +XML::LibXML parsed a different source or the file wasn't compressed, the +returned value will be I<<<<<< -1 >>>>>>. + + +=item setCompression + + $doc->setCompression($ziplevel); + +If one intends to write the document directly to a file, it is possible to set +the compression level for a given document. This level can be in the range from +0 to 8. If XML::LibXML should not try to compress use I<<<<<< -1 >>>>>> (default). + +Note that this feature will I<<<<<< only >>>>>> work if libxml2 is compiled with zlib support and toFile() is used for output. + + +=item toString + + $docstring = $dom->toString($format); + +I<<<<<< toString >>>>>> is a DOM serializing function, so the DOM Tree is serialized into an XML +string, ready for output. + +IMPORTANT: unlike toString for other nodes, on document nodes this function +returns the XML as a byte string in the original encoding of the document (see +the actualEncoding() method)! This means you can simply do: + + + + open my $out_fh, '>', $file; + print {$out_fh} $doc->toString; + +regardless of the actual encoding of the document. See the section on encodings +in L<<<<<< XML::LibXML >>>>>> for more details. + +The optional I<<<<<< $format >>>>>> parameter sets the indenting of the output. This parameter is expected to be an C<<<<<< integer >>>>>> value, that specifies that indentation should be used. The format parameter can +have three different values if it is used: + +If $format is 0, than the document is dumped as it was originally parsed + +If $format is 1, libxml2 will add ignorable white spaces, so the nodes content +is easier to read. Existing text nodes will not be altered + +If $format is 2 (or higher), libxml2 will act as $format == 1 but it add a +leading and a trailing line break to each text node. + +libxml2 uses a hard-coded indentation of 2 space characters per indentation +level. This value can not be altered on run-time. + + +=item toStringC14N + + $c14nstr = $doc->toStringC14N($comment_flag, $xpath [, $xpath_context ]); + +See the documentation in L<<<<<< XML::LibXML::Node >>>>>>. + + +=item toStringEC14N + + $ec14nstr = $doc->toStringEC14N($comment_flag, $xpath [, $xpath_context ], $inclusive_prefix_list); + +See the documentation in L<<<<<< XML::LibXML::Node >>>>>>. + + +=item serialize + + $str = $doc->serialize($format); + +An alias for toString(). This function was name added to be more consistent +with libxml2. + + +=item serialize_c14n + +An alias for toStringC14N(). + + +=item serialize_exc_c14n + +An alias for toStringEC14N(). + + +=item toFile + + $state = $doc->toFile($filename, $format); + +This function is similar to toString(), but it writes the document directly +into a filesystem. This function is very useful, if one needs to store large +documents. + +The format parameter has the same behaviour as in toString(). + + +=item toFH + + $state = $doc->toFH($fh, $format); + +This function is similar to toString(), but it writes the document directly to +a filehandle or a stream. A byte stream in the document encoding is passed to +the file handle. Do NOT apply any C<<<<<< :encoding(...) >>>>>> or C<<<<<< :utf8 >>>>>> PerlIO layer to the filehandle! See the section on encodings in L<<<<<< XML::LibXML >>>>>> for more details. + +The format parameter has the same behaviour as in toString(). + + +=item toStringHTML + + $str = $document->toStringHTML(); + +I<<<<<< toStringHTML >>>>>> serialize the tree to a byte string in the document encoding as HTML. With this +method indenting is automatic and managed by libxml2 internally. + + +=item serialize_html + + $str = $document->serialize_html(); + +An alias for toStringHTML(). + + +=item is_valid + + $bool = $dom->is_valid(); + +Returns either TRUE or FALSE depending on whether the DOM Tree is a valid +Document or not. + +You may also pass in a L<<<<<< XML::LibXML::Dtd >>>>>> object, to validate against an external DTD: + + + + if (!$dom->is_valid($dtd)) { + warn("document is not valid!"); + } + + +=item validate + + $dom->validate(); + +This is an exception throwing equivalent of is_valid. If the document is not +valid it will throw an exception containing the error. This allows you much +better error reporting than simply is_valid or not. + +Again, you may pass in a DTD object + + +=item documentElement + + $root = $dom->documentElement(); + +Returns the root element of the Document. A document can have just one root +element to contain the documents data. + +Optionally one can use I<<<<<< getDocumentElement >>>>>>. + + +=item setDocumentElement + + $dom->setDocumentElement( $root ); + +This function enables you to set the root element for a document. The function +supports the import of a node from a different document tree, but does not +support a document fragment as $root. + + +=item createElement + + $element = $dom->createElement( $nodename ); + +This function creates a new Element Node bound to the DOM with the name C<<<<<< $nodename >>>>>>. + + +=item createElementNS + + $element = $dom->createElementNS( $namespaceURI, $nodename ); + +This function creates a new Element Node bound to the DOM with the name C<<<<<< $nodename >>>>>> and placed in the given namespace. + + +=item createTextNode + + $text = $dom->createTextNode( $content_text ); + +As an equivalent of I<<<<<< createElement >>>>>>, but it creates a I<<<<<< Text Node >>>>>> bound to the DOM. + + +=item createComment + + $comment = $dom->createComment( $comment_text ); + +As an equivalent of I<<<<<< createElement >>>>>>, but it creates a I<<<<<< Comment Node >>>>>> bound to the DOM. + + +=item createAttribute + + $attrnode = $doc->createAttribute($name [,$value]); + +Creates a new Attribute node. + + +=item createAttributeNS + + $attrnode = $doc->createAttributeNS( namespaceURI, $name [,$value] ); + +Creates an Attribute bound to a namespace. + + +=item createDocumentFragment + + $fragment = $doc->createDocumentFragment(); + +This function creates a DocumentFragment. + + +=item createCDATASection + + $cdata = $dom->createCDATASection( $cdata_content ); + +Similar to createTextNode and createComment, this function creates a +CDataSection bound to the current DOM. + + +=item createProcessingInstruction + + my $pi = $doc->createProcessingInstruction( $target, $data ); + +create a processing instruction node. + +Since this method is quite long one may use its short form I<<<<<< createPI() >>>>>>. + + +=item createEntityReference + + my $entref = $doc->createEntityReference($refname); + +If a document has a DTD specified, one can create entity references by using +this function. If one wants to add a entity reference to the document, this +reference has to be created by this function. + +An entity reference is unique to a document and cannot be passed to other +documents as other nodes can be passed. + +I<<<<<< NOTE: >>>>>> A text content containing something that looks like an entity reference, will +not be expanded to a real entity reference unless it is a predefined entity + + + + my $string = "&foo;"; + $some_element->appendText( $string ); + print $some_element->textContent; # prints "&foo;" + + +=item createInternalSubset + + $dtd = $document->createInternalSubset( $rootnode, $public, $system); + +This function creates and adds an internal subset to the given document. +Because the function automatically adds the DTD to the document there is no +need to add the created node explicitly to the document. + + + + my $document = XML::LibXML::Document->new(); + my $dtd = $document->createInternalSubset( "foo", undef, "foo.dtd" ); + +will result in the following XML document: + + + + + + +By setting the public parameter it is possible to set PUBLIC DTDs to a given +document. So + + + + my $document = XML::LibXML::Document->new(); + my $dtd = $document->createInternalSubset( "foo", "-//FOO//DTD FOO 0.1//EN", undef ); + +will cause the following declaration to be created on the document: + + + + + + + +=item createExternalSubset + + $dtd = $document->createExternalSubset( $rootnode_name, $publicId, $systemId); + +This function is similar to C<<<<<< createInternalSubset() >>>>>> but this DTD is considered to be external and is therefore not added to the +document itself. Nevertheless it can be used for validation purposes. + + +=item importNode + + $document->importNode( $node ); + +If a node is not part of a document, it can be imported to another document. As +specified in DOM Level 2 Specification the Node will not be altered or removed +from its original document (C<<<<<< $node-EcloneNode(1) >>>>>> will get called implicitly). + +I<<<<<< NOTE: >>>>>> Don't try to use importNode() to import sub-trees that contain an entity +reference - even if the entity reference is the root node of the sub-tree. This +will cause serious problems to your program. This is a limitation of libxml2 +and not of XML::LibXML itself. + + +=item adoptNode + + $document->adoptNode( $node ); + +If a node is not part of a document, it can be imported to another document. As +specified in DOM Level 3 Specification the Node will not be altered but it will +removed from its original document. + +After a document adopted a node, the node, its attributes and all its +descendants belong to the new document. Because the node does not belong to the +old document, it will be unlinked from its old location first. + +I<<<<<< NOTE: >>>>>> Don't try to adoptNode() to import sub-trees that contain entity references - +even if the entity reference is the root node of the sub-tree. This will cause +serious problems to your program. This is a limitation of libxml2 and not of +XML::LibXML itself. + + +=item externalSubset + + my $dtd = $doc->externalSubset; + +If a document has an external subset defined it will be returned by this +function. + +I<<<<<< NOTE >>>>>> Dtd nodes are no ordinary nodes in libxml2. The support for these nodes in +XML::LibXML is still limited. In particular one may not want use common node +function on doctype declaration nodes! + + +=item internalSubset + + my $dtd = $doc->internalSubset; + +If a document has an internal subset defined it will be returned by this +function. + +I<<<<<< NOTE >>>>>> Dtd nodes are no ordinary nodes in libxml2. The support for these nodes in +XML::LibXML is still limited. In particular one may not want use common node +function on doctype declaration nodes! + + +=item setExternalSubset + + $doc->setExternalSubset($dtd); + +I<<<<<< EXPERIMENTAL! >>>>>> + +This method sets a DTD node as an external subset of the given document. + + +=item setInternalSubset + + $doc->setInternalSubset($dtd); + +I<<<<<< EXPERIMENTAL! >>>>>> + +This method sets a DTD node as an internal subset of the given document. + + +=item removeExternalSubset + + my $dtd = $doc->removeExternalSubset(); + +I<<<<<< EXPERIMENTAL! >>>>>> + +If a document has an external subset defined it can be removed from the +document by using this function. The removed dtd node will be returned. + + +=item removeInternalSubset + + my $dtd = $doc->removeInternalSubset(); + +I<<<<<< EXPERIMENTAL! >>>>>> + +If a document has an internal subset defined it can be removed from the +document by using this function. The removed dtd node will be returned. + + +=item getElementsByTagName + + my @nodelist = $doc->getElementsByTagName($tagname); + +Implements the DOM Level 2 function + +In SCALAR context this function returns an L<<<<<< XML::LibXML::NodeList >>>>>> object. + + +=item getElementsByTagNameNS + + my @nodelist = $doc->getElementsByTagNameNS($nsURI,$tagname); + +Implements the DOM Level 2 function + +In SCALAR context this function returns an L<<<<<< XML::LibXML::NodeList >>>>>> object. + + +=item getElementsByLocalName + + my @nodelist = $doc->getElementsByLocalName($localname); + +This allows the fetching of all nodes from a given document with the given +Localname. + +In SCALAR context this function returns an L<<<<<< XML::LibXML::NodeList >>>>>> object. + + +=item getElementById + + my $node = $doc->getElementById($id); + +Returns the element that has an ID attribute with the given value. If no such +element exists, this returns undef. + +Note: the ID of an element may change while manipulating the document. For +documents with a DTD, the information about ID attributes is only available if +DTD loading/validation has been requested. For HTML documents parsed with the +HTML parser ID detection is done automatically. In XML documents, all "xml:id" +attributes are considered to be of type ID. You can test ID-ness of an +attribute node with $attr->isId(). + +In versions 1.59 and earlier this method was called getElementsById() (plural) +by mistake. Starting from 1.60 this name is maintained as an alias only for +backward compatibility. + + +=item indexElements + + $dom->indexElements(); + +This function causes libxml2 to stamp all elements in a document with their +document position index which considerably speeds up XPath queries for large +documents. It should only be used with static documents that won't be further +changed by any DOM methods, because once a document is indexed, XPath will +always prefer the index to other methods of determining the document order of +nodes. XPath could therefore return improperly ordered node-lists when applied +on a document that has been changed after being indexed. It is of course +possible to use this method to re-index a modified document before using it +with XPath again. This function is not a part of the DOM specification. + +This function returns number of elements indexed, -1 if error occurred, or -2 +if this feature is not available in the running libxml2. + + + +=back + +=head1 AUTHORS + +Matt Sergeant, +Christian Glahn, +Petr Pajas + + +=head1 VERSION + +2.0207 + +=head1 COPYRIGHT + +2001-2007, AxKit.com Ltd. + +2002-2006, Christian Glahn. + +2006-2009, Petr Pajas. + +=cut + + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. + diff --git a/lib/XML/LibXML/DocumentFragment.pod b/lib/XML/LibXML/DocumentFragment.pod new file mode 100644 index 0000000..61e741c --- /dev/null +++ b/lib/XML/LibXML/DocumentFragment.pod @@ -0,0 +1,47 @@ +=head1 NAME + +XML::LibXML::DocumentFragment - XML::LibXML's DOM L2 Document Fragment Implementation + +=head1 SYNOPSIS + + + + use XML::LibXML; + + +=head1 DESCRIPTION + +This class is a helper class as described in the DOM Level 2 Specification. It +is implemented as a node without name. All adding, inserting or replacing +functions are aware of document fragments now. + +As well I<<<<<< all >>>>>> unbound nodes (all nodes that do not belong to any document sub-tree) are +implicit members of document fragments. + +=head1 AUTHORS + +Matt Sergeant, +Christian Glahn, +Petr Pajas + + +=head1 VERSION + +2.0207 + +=head1 COPYRIGHT + +2001-2007, AxKit.com Ltd. + +2002-2006, Christian Glahn. + +2006-2009, Petr Pajas. + +=cut + + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. + diff --git a/lib/XML/LibXML/Dtd.pod b/lib/XML/LibXML/Dtd.pod new file mode 100644 index 0000000..294cb17 --- /dev/null +++ b/lib/XML/LibXML/Dtd.pod @@ -0,0 +1,109 @@ +=head1 NAME + +XML::LibXML::Dtd - XML::LibXML DTD Handling + +=head1 SYNOPSIS + + + + use XML::LibXML; + + $dtd = XML::LibXML::Dtd->new($public_id, $system_id); + $dtd = XML::LibXML::Dtd->parse_string($dtd_str); + $publicId = $dtd->getName(); + $publicId = $dtd->publicId(); + $systemId = $dtd->systemId(); + +=head1 DESCRIPTION + +This class holds a DTD. You may parse a DTD from either a string, or from an +external SYSTEM identifier. + +No support is available as yet for parsing from a filehandle. + +XML::LibXML::Dtd is a sub-class of L<<<<<< XML::LibXML::Node >>>>>>, so all the methods available to nodes (particularly toString()) are available +to Dtd objects. + + +=head1 METHODS + +=over 4 + +=item new + + $dtd = XML::LibXML::Dtd->new($public_id, $system_id); + +Parse a DTD from the system identifier, and return a DTD object that you can +pass to $doc->is_valid() or $doc->validate(). + + + + my $dtd = XML::LibXML::Dtd->new( + "SOME // Public / ID / 1.0", + "test.dtd" + ); + my $doc = XML::LibXML->new->parse_file("test.xml"); + $doc->validate($dtd); + + +=item parse_string + + $dtd = XML::LibXML::Dtd->parse_string($dtd_str); + +The same as new() above, except you can parse a DTD from a string. Note that +parsing from string may fail if the DTD contains external parametric-entity +references with relative URLs. + + +=item getName + + $publicId = $dtd->getName(); + +Returns the name of DTD; i.e., the name immediately following the DOCTYPE +keyword. + + +=item publicId + + $publicId = $dtd->publicId(); + +Returns the public identifier of the external subset. + + +=item systemId + + $systemId = $dtd->systemId(); + +Returns the system identifier of the external subset. + + + +=back + +=head1 AUTHORS + +Matt Sergeant, +Christian Glahn, +Petr Pajas + + +=head1 VERSION + +2.0207 + +=head1 COPYRIGHT + +2001-2007, AxKit.com Ltd. + +2002-2006, Christian Glahn. + +2006-2009, Petr Pajas. + +=cut + + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. + diff --git a/lib/XML/LibXML/Element.pod b/lib/XML/LibXML/Element.pod new file mode 100644 index 0000000..f6f0b74 --- /dev/null +++ b/lib/XML/LibXML/Element.pod @@ -0,0 +1,402 @@ +=head1 NAME + +XML::LibXML::Element - XML::LibXML Class for Element Nodes + +=head1 SYNOPSIS + + + + use XML::LibXML; + # Only methods specific to Element nodes are listed here, + # see the XML::LibXML::Node manpage for other methods + + $node = XML::LibXML::Element->new( $name ); + $node->setAttribute( $aname, $avalue ); + $node->setAttributeNS( $nsURI, $aname, $avalue ); + $avalue = $node->getAttribute( $aname ); + $avalue = $node->getAttributeNS( $nsURI, $aname ); + $attrnode = $node->getAttributeNode( $aname ); + $attrnode = $node->getAttributeNodeNS( $namespaceURI, $aname ); + $node->removeAttribute( $aname ); + $node->removeAttributeNS( $nsURI, $aname ); + $boolean = $node->hasAttribute( $aname ); + $boolean = $node->hasAttributeNS( $nsURI, $aname ); + @nodes = $node->getChildrenByTagName($tagname); + @nodes = $node->getChildrenByTagNameNS($nsURI,$tagname); + @nodes = $node->getChildrenByLocalName($localname); + @nodes = $node->getElementsByTagName($tagname); + @nodes = $node->getElementsByTagNameNS($nsURI,$localname); + @nodes = $node->getElementsByLocalName($localname); + $node->appendWellBalancedChunk( $chunk ); + $node->appendText( $PCDATA ); + $node->appendTextNode( $PCDATA ); + $node->appendTextChild( $childname , $PCDATA ); + $node->setNamespace( $nsURI , $nsPrefix, $activate ); + $node->setNamespaceDeclURI( $nsPrefix, $newURI ); + $node->setNamespaceDeclPrefix( $oldPrefix, $newPrefix ); + +=head1 METHODS + +The class inherits from L<<<<<< XML::LibXML::Node >>>>>>. The documentation for Inherited methods is not listed here. + +Many functions listed here are extensively documented in the DOM Level 3 specification (L<<<<<< http://www.w3.org/TR/DOM-Level-3-Core/ >>>>>>). Please refer to the specification for extensive documentation. + +=over 4 + +=item new + + $node = XML::LibXML::Element->new( $name ); + +This function creates a new node unbound to any DOM. + + +=item setAttribute + + $node->setAttribute( $aname, $avalue ); + +This method sets or replaces the node's attribute C<<<<<< $aname >>>>>> to the value C<<<<<< $avalue >>>>>> + + +=item setAttributeNS + + $node->setAttributeNS( $nsURI, $aname, $avalue ); + +Namespace-aware version of C<<<<<< setAttribute >>>>>>, where C<<<<<< $nsURI >>>>>> is a namespace URI, C<<<<<< $aname >>>>>> is a qualified name, and C<<<<<< $avalue >>>>>> is the value. The namespace URI may be null (empty or undefined) in order to +create an attribute which has no namespace. + +The current implementation differs from DOM in the following aspects + +If an attribute with the same local name and namespace URI already exists on +the element, but its prefix differs from the prefix of C<<<<<< $aname >>>>>>, then this function is supposed to change the prefix (regardless of namespace +declarations and possible collisions). However, the current implementation does +rather the opposite. If a prefix is declared for the namespace URI in the scope +of the attribute, then the already declared prefix is used, disregarding the +prefix specified in C<<<<<< $aname >>>>>>. If no prefix is declared for the namespace, the function tries to declare the +prefix specified in C<<<<<< $aname >>>>>> and dies if the prefix is already taken by some other namespace. + +According to DOM Level 2 specification, this method can also be used to create +or modify special attributes used for declaring XML namespaces (which belong to +the namespace "http://www.w3.org/2000/xmlns/" and have prefix or name "xmlns"). +This should work since version 1.61, but again the implementation differs from +DOM specification in the following: if a declaration of the same namespace +prefix already exists on the element, then changing its value via this method +automatically changes the namespace of all elements and attributes in its +scope. This is because in libxml2 the namespace URI of an element is not static +but is computed from a pointer to a namespace declaration attribute. + + +=item getAttribute + + $avalue = $node->getAttribute( $aname ); + +If C<<<<<< $node >>>>>> has an attribute with the name C<<<<<< $aname >>>>>>, the value of this attribute will get returned. + + +=item getAttributeNS + + $avalue = $node->getAttributeNS( $nsURI, $aname ); + +Retrieves an attribute value by local name and namespace URI. + + +=item getAttributeNode + + $attrnode = $node->getAttributeNode( $aname ); + +Retrieve an attribute node by name. If no attribute with a given name exists, C<<<<<< undef >>>>>> is returned. + + +=item getAttributeNodeNS + + $attrnode = $node->getAttributeNodeNS( $namespaceURI, $aname ); + +Retrieves an attribute node by local name and namespace URI. If no attribute +with a given localname and namespace exists, C<<<<<< undef >>>>>> is returned. + + +=item removeAttribute + + $node->removeAttribute( $aname ); + +The method removes the attribute C<<<<<< $aname >>>>>> from the node's attribute list, if the attribute can be found. + + +=item removeAttributeNS + + $node->removeAttributeNS( $nsURI, $aname ); + +Namespace version of C<<<<<< removeAttribute >>>>>> + + +=item hasAttribute + + $boolean = $node->hasAttribute( $aname ); + +This function tests if the named attribute is set for the node. If the +attribute is specified, TRUE (1) will be returned, otherwise the return value +is FALSE (0). + + +=item hasAttributeNS + + $boolean = $node->hasAttributeNS( $nsURI, $aname ); + +namespace version of C<<<<<< hasAttribute >>>>>> + + +=item getChildrenByTagName + + @nodes = $node->getChildrenByTagName($tagname); + +The function gives direct access to all child elements of the current node with +a given tagname, where tagname is a qualified name, that is, in case of +namespace usage it may consist of a prefix and local name. This function makes +things a lot easier if one needs to handle big data sets. A special tagname '*' +can be used to match any name. + +If this function is called in SCALAR context, it returns the number of elements +found. + + +=item getChildrenByTagNameNS + + @nodes = $node->getChildrenByTagNameNS($nsURI,$tagname); + +Namespace version of C<<<<<< getChildrenByTagName >>>>>>. A special nsURI '*' matches any namespace URI, in which case the function +behaves just like C<<<<<< getChildrenByLocalName >>>>>>. + +If this function is called in SCALAR context, it returns the number of elements +found. + + +=item getChildrenByLocalName + + @nodes = $node->getChildrenByLocalName($localname); + +The function gives direct access to all child elements of the current node with +a given local name. It makes things a lot easier if one needs to handle big +data sets. A special C<<<<<< localname >>>>>> '*' can be used to match any local name. + +If this function is called in SCALAR context, it returns the number of elements +found. + + +=item getElementsByTagName + + @nodes = $node->getElementsByTagName($tagname); + +This function is part of the spec. It fetches all descendants of a node with a +given tagname, where C<<<<<< tagname >>>>>> is a qualified name, that is, in case of namespace usage it may consist of a +prefix and local name. A special C<<<<<< tagname >>>>>> '*' can be used to match any tag name. + +In SCALAR context this function returns an L<<<<<< XML::LibXML::NodeList >>>>>> object. + + +=item getElementsByTagNameNS + + @nodes = $node->getElementsByTagNameNS($nsURI,$localname); + +Namespace version of C<<<<<< getElementsByTagName >>>>>> as found in the DOM spec. A special C<<<<<< localname >>>>>> '*' can be used to match any local name and C<<<<<< nsURI >>>>>> '*' can be used to match any namespace URI. + +In SCALAR context this function returns an L<<<<<< XML::LibXML::NodeList >>>>>> object. + + +=item getElementsByLocalName + + @nodes = $node->getElementsByLocalName($localname); + +This function is not found in the DOM specification. It is a mix of +getElementsByTagName and getElementsByTagNameNS. It will fetch all tags +matching the given local-name. This allows one to select tags with the same +local name across namespace borders. + +In SCALAR context this function returns an L<<<<<< XML::LibXML::NodeList >>>>>> object. + + +=item appendWellBalancedChunk + + $node->appendWellBalancedChunk( $chunk ); + +Sometimes it is necessary to append a string coded XML Tree to a node. I<<<<<< appendWellBalancedChunk >>>>>> will do the trick for you. But this is only done if the String is C<<<<<< well-balanced >>>>>>. + +I<<<<<< Note that appendWellBalancedChunk() is only left for compatibility reasons >>>>>>. Implicitly it uses + + + + my $fragment = $parser->parse_balanced_chunk( $chunk ); + $node->appendChild( $fragment ); + +This form is more explicit and makes it easier to control the flow of a script. + + +=item appendText + + $node->appendText( $PCDATA ); + +alias for appendTextNode(). + + +=item appendTextNode + + $node->appendTextNode( $PCDATA ); + +This wrapper function lets you add a string directly to an element node. + + +=item appendTextChild + + $node->appendTextChild( $childname , $PCDATA ); + +Somewhat similar with C<<<<<< appendTextNode >>>>>>: It lets you set an Element, that contains only a C<<<<<< text node >>>>>> directly by specifying the name and the text content. + + +=item setNamespace + + $node->setNamespace( $nsURI , $nsPrefix, $activate ); + +setNamespace() allows one to apply a namespace to an element. The function +takes three parameters: 1. the namespace URI, which is required and the two +optional values prefix, which is the namespace prefix, as it should be used in +child elements or attributes as well as the additional activate parameter. If +prefix is not given, undefined or empty, this function tries to create a +declaration of the default namespace. + +The activate parameter is most useful: If this parameter is set to FALSE (0), a +new namespace declaration is simply added to the element while the element's +namespace itself is not altered. Nevertheless, activate is set to TRUE (1) on +default. In this case the namespace is used as the node's effective namespace. +This means the namespace prefix is added to the node name and if there was a +namespace already active for the node, it will be replaced (but its declaration +is not removed from the document). A new namespace declaration is only created +if necessary (that is, if the element is already in the scope of a namespace +declaration associating the prefix with the namespace URI, then this +declaration is reused). + +The following example may clarify this: + + + + my $e1 = $doc->createElement("bar"); + $e1->setNamespace("http://foobar.org", "foo") + +results + + + + + +while + + + + my $e2 = $doc->createElement("bar"); + $e2->setNamespace("http://foobar.org", "foo",0) + +results only + + + + + +By using $activate == 0 it is possible to create multiple namespace +declarations on a single element. + +The function fails if it is required to create a declaration associating the +prefix with the namespace URI but the element already carries a declaration +with the same prefix but different namespace URI. + + +=item setNamespaceDeclURI + + $node->setNamespaceDeclURI( $nsPrefix, $newURI ); + +EXPERIMENTAL IN 1.61 ! + +This function manipulates directly with an existing namespace declaration on an +element. It takes two parameters: the prefix by which it looks up the namespace +declaration and a new namespace URI which replaces its previous value. + +It returns 1 if the namespace declaration was found and changed, 0 otherwise. + +All elements and attributes (even those previously unbound from the document) +for which the namespace declaration determines their namespace belong to the +new namespace after the change. + +If the new URI is undef or empty, the nodes have no namespace and no prefix +after the change. Namespace declarations once nulled in this way do not further +appear in the serialized output (but do remain in the document for internal +integrity of libxml2 data structures). + +This function is NOT part of any DOM API. + + +=item setNamespaceDeclPrefix + + $node->setNamespaceDeclPrefix( $oldPrefix, $newPrefix ); + +EXPERIMENTAL IN 1.61 ! + +This function manipulates directly with an existing namespace declaration on an +element. It takes two parameters: the old prefix by which it looks up the +namespace declaration and a new prefix which is to replace the old one. + +The function dies with an error if the element is in the scope of another +declaration whose prefix equals to the new prefix, or if the change should +result in a declaration with a non-empty prefix but empty namespace URI. +Otherwise, it returns 1 if the namespace declaration was found and changed and +0 if not found. + +All elements and attributes (even those previously unbound from the document) +for which the namespace declaration determines their namespace change their +prefix to the new value. + +If the new prefix is undef or empty, the namespace declaration becomes a +declaration of a default namespace. The corresponding nodes drop their +namespace prefix (but remain in the, now default, namespace). In this case the +function fails, if the containing element is in the scope of another default +namespace declaration. + +This function is NOT part of any DOM API. + + + +=back + + +=head1 OVERLOADING + +XML::LibXML::Element overloads hash dereferencing to provide access to the +element's attributes. For non-namespaced attributes, the attribute name is the +hash key, and the attribute value is the hash value. For namespaced attributes, +the hash key is qualified with the namespace URI, using Clark notation. + +Perl's "tied hash" feature is used, which means that the hash gives you +read-write access to the element's attributes. For more information, see L<<<<<< XML::LibXML::AttributeHash >>>>>> + +=head1 AUTHORS + +Matt Sergeant, +Christian Glahn, +Petr Pajas + + +=head1 VERSION + +2.0207 + +=head1 COPYRIGHT + +2001-2007, AxKit.com Ltd. + +2002-2006, Christian Glahn. + +2006-2009, Petr Pajas. + +=cut + + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. + diff --git a/lib/XML/LibXML/ErrNo.pm b/lib/XML/LibXML/ErrNo.pm new file mode 100644 index 0000000..0fa6308 --- /dev/null +++ b/lib/XML/LibXML/ErrNo.pm @@ -0,0 +1,501 @@ +# $Id: ErrNo.pm,v 1.1.2.1 2004/04/20 20:09:48 pajas Exp $ +# +# +# This is free software, you may use it and distribute it under the same terms as +# Perl itself. +# +# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas +# +# + +package XML::LibXML::ErrNo; + +use strict; +use warnings; +use vars qw($VERSION); + +$VERSION = "2.0207"; # VERSION TEMPLATE: DO NOT CHANGE + +use constant ERR_OK => 0; +use constant ERR_INTERNAL_ERROR => 1; +use constant ERR_NO_MEMORY => 2; +use constant ERR_DOCUMENT_START => 3; +use constant ERR_DOCUMENT_EMPTY => 4; +use constant ERR_DOCUMENT_END => 5; +use constant ERR_INVALID_HEX_CHARREF => 6; +use constant ERR_INVALID_DEC_CHARREF => 7; +use constant ERR_INVALID_CHARREF => 8; +use constant ERR_INVALID_CHAR => 9; +use constant ERR_CHARREF_AT_EOF => 10; +use constant ERR_CHARREF_IN_PROLOG => 11; +use constant ERR_CHARREF_IN_EPILOG => 12; +use constant ERR_CHARREF_IN_DTD => 13; +use constant ERR_ENTITYREF_AT_EOF => 14; +use constant ERR_ENTITYREF_IN_PROLOG => 15; +use constant ERR_ENTITYREF_IN_EPILOG => 16; +use constant ERR_ENTITYREF_IN_DTD => 17; +use constant ERR_PEREF_AT_EOF => 18; +use constant ERR_PEREF_IN_PROLOG => 19; +use constant ERR_PEREF_IN_EPILOG => 20; +use constant ERR_PEREF_IN_INT_SUBSET => 21; +use constant ERR_ENTITYREF_NO_NAME => 22; +use constant ERR_ENTITYREF_SEMICOL_MISSING => 23; +use constant ERR_PEREF_NO_NAME => 24; +use constant ERR_PEREF_SEMICOL_MISSING => 25; +use constant ERR_UNDECLARED_ENTITY => 26; +use constant WAR_UNDECLARED_ENTITY => 27; +use constant ERR_UNPARSED_ENTITY => 28; +use constant ERR_ENTITY_IS_EXTERNAL => 29; +use constant ERR_ENTITY_IS_PARAMETER => 30; +use constant ERR_UNKNOWN_ENCODING => 31; +use constant ERR_UNSUPPORTED_ENCODING => 32; +use constant ERR_STRING_NOT_STARTED => 33; +use constant ERR_STRING_NOT_CLOSED => 34; +use constant ERR_NS_DECL_ERROR => 35; +use constant ERR_ENTITY_NOT_STARTED => 36; +use constant ERR_ENTITY_NOT_FINISHED => 37; +use constant ERR_LT_IN_ATTRIBUTE => 38; +use constant ERR_ATTRIBUTE_NOT_STARTED => 39; +use constant ERR_ATTRIBUTE_NOT_FINISHED => 40; +use constant ERR_ATTRIBUTE_WITHOUT_VALUE => 41; +use constant ERR_ATTRIBUTE_REDEFINED => 42; +use constant ERR_LITERAL_NOT_STARTED => 43; +use constant ERR_LITERAL_NOT_FINISHED => 44; +use constant ERR_COMMENT_NOT_FINISHED => 45; +use constant ERR_PI_NOT_STARTED => 46; +use constant ERR_PI_NOT_FINISHED => 47; +use constant ERR_NOTATION_NOT_STARTED => 48; +use constant ERR_NOTATION_NOT_FINISHED => 49; +use constant ERR_ATTLIST_NOT_STARTED => 50; +use constant ERR_ATTLIST_NOT_FINISHED => 51; +use constant ERR_MIXED_NOT_STARTED => 52; +use constant ERR_MIXED_NOT_FINISHED => 53; +use constant ERR_ELEMCONTENT_NOT_STARTED => 54; +use constant ERR_ELEMCONTENT_NOT_FINISHED => 55; +use constant ERR_XMLDECL_NOT_STARTED => 56; +use constant ERR_XMLDECL_NOT_FINISHED => 57; +use constant ERR_CONDSEC_NOT_STARTED => 58; +use constant ERR_CONDSEC_NOT_FINISHED => 59; +use constant ERR_EXT_SUBSET_NOT_FINISHED => 60; +use constant ERR_DOCTYPE_NOT_FINISHED => 61; +use constant ERR_MISPLACED_CDATA_END => 62; +use constant ERR_CDATA_NOT_FINISHED => 63; +use constant ERR_RESERVED_XML_NAME => 64; +use constant ERR_SPACE_REQUIRED => 65; +use constant ERR_SEPARATOR_REQUIRED => 66; +use constant ERR_NMTOKEN_REQUIRED => 67; +use constant ERR_NAME_REQUIRED => 68; +use constant ERR_PCDATA_REQUIRED => 69; +use constant ERR_URI_REQUIRED => 70; +use constant ERR_PUBID_REQUIRED => 71; +use constant ERR_LT_REQUIRED => 72; +use constant ERR_GT_REQUIRED => 73; +use constant ERR_LTSLASH_REQUIRED => 74; +use constant ERR_EQUAL_REQUIRED => 75; +use constant ERR_TAG_NAME_MISMATCH => 76; +use constant ERR_TAG_NOT_FINISHED => 77; +use constant ERR_STANDALONE_VALUE => 78; +use constant ERR_ENCODING_NAME => 79; +use constant ERR_HYPHEN_IN_COMMENT => 80; +use constant ERR_INVALID_ENCODING => 81; +use constant ERR_EXT_ENTITY_STANDALONE => 82; +use constant ERR_CONDSEC_INVALID => 83; +use constant ERR_VALUE_REQUIRED => 84; +use constant ERR_NOT_WELL_BALANCED => 85; +use constant ERR_EXTRA_CONTENT => 86; +use constant ERR_ENTITY_CHAR_ERROR => 87; +use constant ERR_ENTITY_PE_INTERNAL => 88; +use constant ERR_ENTITY_LOOP => 89; +use constant ERR_ENTITY_BOUNDARY => 90; +use constant ERR_INVALID_URI => 91; +use constant ERR_URI_FRAGMENT => 92; +use constant WAR_CATALOG_PI => 93; +use constant ERR_NO_DTD => 94; +use constant ERR_CONDSEC_INVALID_KEYWORD => 95; +use constant ERR_VERSION_MISSING => 96; +use constant WAR_UNKNOWN_VERSION => 97; +use constant WAR_LANG_VALUE => 98; +use constant WAR_NS_URI => 99; +use constant WAR_NS_URI_RELATIVE => 100; +use constant NS_ERR_XML_NAMESPACE => 200; +use constant NS_ERR_UNDEFINED_NAMESPACE => 201; +use constant NS_ERR_QNAME => 202; +use constant NS_ERR_ATTRIBUTE_REDEFINED => 203; +use constant DTD_ATTRIBUTE_DEFAULT => 500; +use constant DTD_ATTRIBUTE_REDEFINED => 501; +use constant DTD_ATTRIBUTE_VALUE => 502; +use constant DTD_CONTENT_ERROR => 503; +use constant DTD_CONTENT_MODEL => 504; +use constant DTD_CONTENT_NOT_DETERMINIST => 505; +use constant DTD_DIFFERENT_PREFIX => 506; +use constant DTD_ELEM_DEFAULT_NAMESPACE => 507; +use constant DTD_ELEM_NAMESPACE => 508; +use constant DTD_ELEM_REDEFINED => 509; +use constant DTD_EMPTY_NOTATION => 510; +use constant DTD_ENTITY_TYPE => 511; +use constant DTD_ID_FIXED => 512; +use constant DTD_ID_REDEFINED => 513; +use constant DTD_ID_SUBSET => 514; +use constant DTD_INVALID_CHILD => 515; +use constant DTD_INVALID_DEFAULT => 516; +use constant DTD_LOAD_ERROR => 517; +use constant DTD_MISSING_ATTRIBUTE => 518; +use constant DTD_MIXED_CORRUPT => 519; +use constant DTD_MULTIPLE_ID => 520; +use constant DTD_NO_DOC => 521; +use constant DTD_NO_DTD => 522; +use constant DTD_NO_ELEM_NAME => 523; +use constant DTD_NO_PREFIX => 524; +use constant DTD_NO_ROOT => 525; +use constant DTD_NOTATION_REDEFINED => 526; +use constant DTD_NOTATION_VALUE => 527; +use constant DTD_NOT_EMPTY => 528; +use constant DTD_NOT_PCDATA => 529; +use constant DTD_NOT_STANDALONE => 530; +use constant DTD_ROOT_NAME => 531; +use constant DTD_STANDALONE_WHITE_SPACE => 532; +use constant DTD_UNKNOWN_ATTRIBUTE => 533; +use constant DTD_UNKNOWN_ELEM => 534; +use constant DTD_UNKNOWN_ENTITY => 535; +use constant DTD_UNKNOWN_ID => 536; +use constant DTD_UNKNOWN_NOTATION => 537; +use constant HTML_STRUCURE_ERROR => 800; +use constant HTML_UNKNOWN_TAG => 801; +use constant RNGP_ANYNAME_ATTR_ANCESTOR => 1000; +use constant RNGP_ATTR_CONFLICT => 1001; +use constant RNGP_ATTRIBUTE_CHILDREN => 1002; +use constant RNGP_ATTRIBUTE_CONTENT => 1003; +use constant RNGP_ATTRIBUTE_EMPTY => 1004; +use constant RNGP_ATTRIBUTE_NOOP => 1005; +use constant RNGP_CHOICE_CONTENT => 1006; +use constant RNGP_CHOICE_EMPTY => 1007; +use constant RNGP_CREATE_FAILURE => 1008; +use constant RNGP_DATA_CONTENT => 1009; +use constant RNGP_DEF_CHOICE_AND_INTERLEAVE => 1010; +use constant RNGP_DEFINE_CREATE_FAILED => 1011; +use constant RNGP_DEFINE_EMPTY => 1012; +use constant RNGP_DEFINE_MISSING => 1013; +use constant RNGP_DEFINE_NAME_MISSING => 1014; +use constant RNGP_ELEM_CONTENT_EMPTY => 1015; +use constant RNGP_ELEM_CONTENT_ERROR => 1016; +use constant RNGP_ELEMENT_EMPTY => 1017; +use constant RNGP_ELEMENT_CONTENT => 1018; +use constant RNGP_ELEMENT_NAME => 1019; +use constant RNGP_ELEMENT_NO_CONTENT => 1020; +use constant RNGP_ELEM_TEXT_CONFLICT => 1021; +use constant RNGP_EMPTY => 1022; +use constant RNGP_EMPTY_CONSTRUCT => 1023; +use constant RNGP_EMPTY_CONTENT => 1024; +use constant RNGP_EMPTY_NOT_EMPTY => 1025; +use constant RNGP_ERROR_TYPE_LIB => 1026; +use constant RNGP_EXCEPT_EMPTY => 1027; +use constant RNGP_EXCEPT_MISSING => 1028; +use constant RNGP_EXCEPT_MULTIPLE => 1029; +use constant RNGP_EXCEPT_NO_CONTENT => 1030; +use constant RNGP_EXTERNALREF_EMTPY => 1031; +use constant RNGP_EXTERNAL_REF_FAILURE => 1032; +use constant RNGP_EXTERNALREF_RECURSE => 1033; +use constant RNGP_FORBIDDEN_ATTRIBUTE => 1034; +use constant RNGP_FOREIGN_ELEMENT => 1035; +use constant RNGP_GRAMMAR_CONTENT => 1036; +use constant RNGP_GRAMMAR_EMPTY => 1037; +use constant RNGP_GRAMMAR_MISSING => 1038; +use constant RNGP_GRAMMAR_NO_START => 1039; +use constant RNGP_GROUP_ATTR_CONFLICT => 1040; +use constant RNGP_HREF_ERROR => 1041; +use constant RNGP_INCLUDE_EMPTY => 1042; +use constant RNGP_INCLUDE_FAILURE => 1043; +use constant RNGP_INCLUDE_RECURSE => 1044; +use constant RNGP_INTERLEAVE_ADD => 1045; +use constant RNGP_INTERLEAVE_CREATE_FAILED => 1046; +use constant RNGP_INTERLEAVE_EMPTY => 1047; +use constant RNGP_INTERLEAVE_NO_CONTENT => 1048; +use constant RNGP_INVALID_DEFINE_NAME => 1049; +use constant RNGP_INVALID_URI => 1050; +use constant RNGP_INVALID_VALUE => 1051; +use constant RNGP_MISSING_HREF => 1052; +use constant RNGP_NAME_MISSING => 1053; +use constant RNGP_NEED_COMBINE => 1054; +use constant RNGP_NOTALLOWED_NOT_EMPTY => 1055; +use constant RNGP_NSNAME_ATTR_ANCESTOR => 1056; +use constant RNGP_NSNAME_NO_NS => 1057; +use constant RNGP_PARAM_FORBIDDEN => 1058; +use constant RNGP_PARAM_NAME_MISSING => 1059; +use constant RNGP_PARENTREF_CREATE_FAILED => 1060; +use constant RNGP_PARENTREF_NAME_INVALID => 1061; +use constant RNGP_PARENTREF_NO_NAME => 1062; +use constant RNGP_PARENTREF_NO_PARENT => 1063; +use constant RNGP_PARENTREF_NOT_EMPTY => 1064; +use constant RNGP_PARSE_ERROR => 1065; +use constant RNGP_PAT_ANYNAME_EXCEPT_ANYNAME => 1066; +use constant RNGP_PAT_ATTR_ATTR => 1067; +use constant RNGP_PAT_ATTR_ELEM => 1068; +use constant RNGP_PAT_DATA_EXCEPT_ATTR => 1069; +use constant RNGP_PAT_DATA_EXCEPT_ELEM => 1070; +use constant RNGP_PAT_DATA_EXCEPT_EMPTY => 1071; +use constant RNGP_PAT_DATA_EXCEPT_GROUP => 1072; +use constant RNGP_PAT_DATA_EXCEPT_INTERLEAVE => 1073; +use constant RNGP_PAT_DATA_EXCEPT_LIST => 1074; +use constant RNGP_PAT_DATA_EXCEPT_ONEMORE => 1075; +use constant RNGP_PAT_DATA_EXCEPT_REF => 1076; +use constant RNGP_PAT_DATA_EXCEPT_TEXT => 1077; +use constant RNGP_PAT_LIST_ATTR => 1078; +use constant RNGP_PAT_LIST_ELEM => 1079; +use constant RNGP_PAT_LIST_INTERLEAVE => 1080; +use constant RNGP_PAT_LIST_LIST => 1081; +use constant RNGP_PAT_LIST_REF => 1082; +use constant RNGP_PAT_LIST_TEXT => 1083; +use constant RNGP_PAT_NSNAME_EXCEPT_ANYNAME => 1084; +use constant RNGP_PAT_NSNAME_EXCEPT_NSNAME => 1085; +use constant RNGP_PAT_ONEMORE_GROUP_ATTR => 1086; +use constant RNGP_PAT_ONEMORE_INTERLEAVE_ATTR => 1087; +use constant RNGP_PAT_START_ATTR => 1088; +use constant RNGP_PAT_START_DATA => 1089; +use constant RNGP_PAT_START_EMPTY => 1090; +use constant RNGP_PAT_START_GROUP => 1091; +use constant RNGP_PAT_START_INTERLEAVE => 1092; +use constant RNGP_PAT_START_LIST => 1093; +use constant RNGP_PAT_START_ONEMORE => 1094; +use constant RNGP_PAT_START_TEXT => 1095; +use constant RNGP_PAT_START_VALUE => 1096; +use constant RNGP_PREFIX_UNDEFINED => 1097; +use constant RNGP_REF_CREATE_FAILED => 1098; +use constant RNGP_REF_CYCLE => 1099; +use constant RNGP_REF_NAME_INVALID => 1100; +use constant RNGP_REF_NO_DEF => 1101; +use constant RNGP_REF_NO_NAME => 1102; +use constant RNGP_REF_NOT_EMPTY => 1103; +use constant RNGP_START_CHOICE_AND_INTERLEAVE => 1104; +use constant RNGP_START_CONTENT => 1105; +use constant RNGP_START_EMPTY => 1106; +use constant RNGP_START_MISSING => 1107; +use constant RNGP_TEXT_EXPECTED => 1108; +use constant RNGP_TEXT_HAS_CHILD => 1109; +use constant RNGP_TYPE_MISSING => 1110; +use constant RNGP_TYPE_NOT_FOUND => 1111; +use constant RNGP_TYPE_VALUE => 1112; +use constant RNGP_UNKNOWN_ATTRIBUTE => 1113; +use constant RNGP_UNKNOWN_COMBINE => 1114; +use constant RNGP_UNKNOWN_CONSTRUCT => 1115; +use constant RNGP_UNKNOWN_TYPE_LIB => 1116; +use constant RNGP_URI_FRAGMENT => 1117; +use constant RNGP_URI_NOT_ABSOLUTE => 1118; +use constant RNGP_VALUE_EMPTY => 1119; +use constant RNGP_VALUE_NO_CONTENT => 1120; +use constant RNGP_XMLNS_NAME => 1121; +use constant RNGP_XML_NS => 1122; +use constant XPATH_EXPRESSION_OK => 1200; +use constant XPATH_NUMBER_ERROR => 1201; +use constant XPATH_UNFINISHED_LITERAL_ERROR => 1202; +use constant XPATH_START_LITERAL_ERROR => 1203; +use constant XPATH_VARIABLE_REF_ERROR => 1204; +use constant XPATH_UNDEF_VARIABLE_ERROR => 1205; +use constant XPATH_INVALID_PREDICATE_ERROR => 1206; +use constant XPATH_EXPR_ERROR => 1207; +use constant XPATH_UNCLOSED_ERROR => 1208; +use constant XPATH_UNKNOWN_FUNC_ERROR => 1209; +use constant XPATH_INVALID_OPERAND => 1210; +use constant XPATH_INVALID_TYPE => 1211; +use constant XPATH_INVALID_ARITY => 1212; +use constant XPATH_INVALID_CTXT_SIZE => 1213; +use constant XPATH_INVALID_CTXT_POSITION => 1214; +use constant XPATH_MEMORY_ERROR => 1215; +use constant XPTR_SYNTAX_ERROR => 1216; +use constant XPTR_RESOURCE_ERROR => 1217; +use constant XPTR_SUB_RESOURCE_ERROR => 1218; +use constant XPATH_UNDEF_PREFIX_ERROR => 1219; +use constant XPATH_ENCODING_ERROR => 1220; +use constant XPATH_INVALID_CHAR_ERROR => 1221; +use constant TREE_INVALID_HEX => 1300; +use constant TREE_INVALID_DEC => 1301; +use constant TREE_UNTERMINATED_ENTITY => 1302; +use constant SAVE_NOT_UTF8 => 1400; +use constant SAVE_CHAR_INVALID => 1401; +use constant SAVE_NO_DOCTYPE => 1402; +use constant SAVE_UNKNOWN_ENCODING => 1403; +use constant REGEXP_COMPILE_ERROR => 1450; +use constant IO_UNKNOWN => 1500; +use constant IO_EACCES => 1501; +use constant IO_EAGAIN => 1502; +use constant IO_EBADF => 1503; +use constant IO_EBADMSG => 1504; +use constant IO_EBUSY => 1505; +use constant IO_ECANCELED => 1506; +use constant IO_ECHILD => 1507; +use constant IO_EDEADLK => 1508; +use constant IO_EDOM => 1509; +use constant IO_EEXIST => 1510; +use constant IO_EFAULT => 1511; +use constant IO_EFBIG => 1512; +use constant IO_EINPROGRESS => 1513; +use constant IO_EINTR => 1514; +use constant IO_EINVAL => 1515; +use constant IO_EIO => 1516; +use constant IO_EISDIR => 1517; +use constant IO_EMFILE => 1518; +use constant IO_EMLINK => 1519; +use constant IO_EMSGSIZE => 1520; +use constant IO_ENAMETOOLONG => 1521; +use constant IO_ENFILE => 1522; +use constant IO_ENODEV => 1523; +use constant IO_ENOENT => 1524; +use constant IO_ENOEXEC => 1525; +use constant IO_ENOLCK => 1526; +use constant IO_ENOMEM => 1527; +use constant IO_ENOSPC => 1528; +use constant IO_ENOSYS => 1529; +use constant IO_ENOTDIR => 1530; +use constant IO_ENOTEMPTY => 1531; +use constant IO_ENOTSUP => 1532; +use constant IO_ENOTTY => 1533; +use constant IO_ENXIO => 1534; +use constant IO_EPERM => 1535; +use constant IO_EPIPE => 1536; +use constant IO_ERANGE => 1537; +use constant IO_EROFS => 1538; +use constant IO_ESPIPE => 1539; +use constant IO_ESRCH => 1540; +use constant IO_ETIMEDOUT => 1541; +use constant IO_EXDEV => 1542; +use constant IO_NETWORK_ATTEMPT => 1543; +use constant IO_ENCODER => 1544; +use constant IO_FLUSH => 1545; +use constant IO_WRITE => 1546; +use constant IO_NO_INPUT => 1547; +use constant IO_BUFFER_FULL => 1548; +use constant IO_LOAD_ERROR => 1549; +use constant IO_ENOTSOCK => 1550; +use constant IO_EISCONN => 1551; +use constant IO_ECONNREFUSED => 1552; +use constant IO_ENETUNREACH => 1553; +use constant IO_EADDRINUSE => 1554; +use constant IO_EALREADY => 1555; +use constant IO_EAFNOSUPPORT => 1556; +use constant XINCLUDE_RECURSION => 1600; +use constant XINCLUDE_PARSE_VALUE => 1601; +use constant XINCLUDE_ENTITY_DEF_MISMATCH => 1602; +use constant XINCLUDE_NO_HREF => 1603; +use constant XINCLUDE_NO_FALLBACK => 1604; +use constant XINCLUDE_HREF_URI => 1605; +use constant XINCLUDE_TEXT_FRAGMENT => 1606; +use constant XINCLUDE_TEXT_DOCUMENT => 1607; +use constant XINCLUDE_INVALID_CHAR => 1608; +use constant XINCLUDE_BUILD_FAILED => 1609; +use constant XINCLUDE_UNKNOWN_ENCODING => 1610; +use constant XINCLUDE_MULTIPLE_ROOT => 1611; +use constant XINCLUDE_XPTR_FAILED => 1612; +use constant XINCLUDE_XPTR_RESULT => 1613; +use constant XINCLUDE_INCLUDE_IN_INCLUDE => 1614; +use constant XINCLUDE_FALLBACKS_IN_INCLUDE => 1615; +use constant XINCLUDE_FALLBACK_NOT_IN_INCLUDE => 1616; +use constant CATALOG_MISSING_ATTR => 1650; +use constant CATALOG_ENTRY_BROKEN => 1651; +use constant CATALOG_PREFER_VALUE => 1652; +use constant CATALOG_NOT_CATALOG => 1653; +use constant CATALOG_RECURSION => 1654; +use constant SCHEMAP_PREFIX_UNDEFINED => 1700; +use constant SCHEMAP_ATTRFORMDEFAULT_VALUE => 1701; +use constant SCHEMAP_ATTRGRP_NONAME_NOREF => 1702; +use constant SCHEMAP_ATTR_NONAME_NOREF => 1703; +use constant SCHEMAP_COMPLEXTYPE_NONAME_NOREF => 1704; +use constant SCHEMAP_ELEMFORMDEFAULT_VALUE => 1705; +use constant SCHEMAP_ELEM_NONAME_NOREF => 1706; +use constant SCHEMAP_EXTENSION_NO_BASE => 1707; +use constant SCHEMAP_FACET_NO_VALUE => 1708; +use constant SCHEMAP_FAILED_BUILD_IMPORT => 1709; +use constant SCHEMAP_GROUP_NONAME_NOREF => 1710; +use constant SCHEMAP_IMPORT_NAMESPACE_NOT_URI => 1711; +use constant SCHEMAP_IMPORT_REDEFINE_NSNAME => 1712; +use constant SCHEMAP_IMPORT_SCHEMA_NOT_URI => 1713; +use constant SCHEMAP_INVALID_BOOLEAN => 1714; +use constant SCHEMAP_INVALID_ENUM => 1715; +use constant SCHEMAP_INVALID_FACET => 1716; +use constant SCHEMAP_INVALID_FACET_VALUE => 1717; +use constant SCHEMAP_INVALID_MAXOCCURS => 1718; +use constant SCHEMAP_INVALID_MINOCCURS => 1719; +use constant SCHEMAP_INVALID_REF_AND_SUBTYPE => 1720; +use constant SCHEMAP_INVALID_WHITE_SPACE => 1721; +use constant SCHEMAP_NOATTR_NOREF => 1722; +use constant SCHEMAP_NOTATION_NO_NAME => 1723; +use constant SCHEMAP_NOTYPE_NOREF => 1724; +use constant SCHEMAP_REF_AND_SUBTYPE => 1725; +use constant SCHEMAP_RESTRICTION_NONAME_NOREF => 1726; +use constant SCHEMAP_SIMPLETYPE_NONAME => 1727; +use constant SCHEMAP_TYPE_AND_SUBTYPE => 1728; +use constant SCHEMAP_UNKNOWN_ALL_CHILD => 1729; +use constant SCHEMAP_UNKNOWN_ANYATTRIBUTE_CHILD => 1730; +use constant SCHEMAP_UNKNOWN_ATTR_CHILD => 1731; +use constant SCHEMAP_UNKNOWN_ATTRGRP_CHILD => 1732; +use constant SCHEMAP_UNKNOWN_ATTRIBUTE_GROUP => 1733; +use constant SCHEMAP_UNKNOWN_BASE_TYPE => 1734; +use constant SCHEMAP_UNKNOWN_CHOICE_CHILD => 1735; +use constant SCHEMAP_UNKNOWN_COMPLEXCONTENT_CHILD => 1736; +use constant SCHEMAP_UNKNOWN_COMPLEXTYPE_CHILD => 1737; +use constant SCHEMAP_UNKNOWN_ELEM_CHILD => 1738; +use constant SCHEMAP_UNKNOWN_EXTENSION_CHILD => 1739; +use constant SCHEMAP_UNKNOWN_FACET_CHILD => 1740; +use constant SCHEMAP_UNKNOWN_FACET_TYPE => 1741; +use constant SCHEMAP_UNKNOWN_GROUP_CHILD => 1742; +use constant SCHEMAP_UNKNOWN_IMPORT_CHILD => 1743; +use constant SCHEMAP_UNKNOWN_LIST_CHILD => 1744; +use constant SCHEMAP_UNKNOWN_NOTATION_CHILD => 1745; +use constant SCHEMAP_UNKNOWN_PROCESSCONTENT_CHILD => 1746; +use constant SCHEMAP_UNKNOWN_REF => 1747; +use constant SCHEMAP_UNKNOWN_RESTRICTION_CHILD => 1748; +use constant SCHEMAP_UNKNOWN_SCHEMAS_CHILD => 1749; +use constant SCHEMAP_UNKNOWN_SEQUENCE_CHILD => 1750; +use constant SCHEMAP_UNKNOWN_SIMPLECONTENT_CHILD => 1751; +use constant SCHEMAP_UNKNOWN_SIMPLETYPE_CHILD => 1752; +use constant SCHEMAP_UNKNOWN_TYPE => 1753; +use constant SCHEMAP_UNKNOWN_UNION_CHILD => 1754; +use constant SCHEMAP_ELEM_DEFAULT_FIXED => 1755; +use constant SCHEMAP_REGEXP_INVALID => 1756; +use constant SCHEMAP_FAILED_LOAD => 1756; +use constant SCHEMAP_NOTHING_TO_PARSE => 1757; +use constant SCHEMAP_NOROOT => 1758; +use constant SCHEMAP_REDEFINED_GROUP => 1759; +use constant SCHEMAP_REDEFINED_TYPE => 1760; +use constant SCHEMAP_REDEFINED_ELEMENT => 1761; +use constant SCHEMAP_REDEFINED_ATTRGROUP => 1762; +use constant SCHEMAP_REDEFINED_ATTR => 1763; +use constant SCHEMAP_REDEFINED_NOTATION => 1764; +use constant SCHEMAP_FAILED_PARSE => 1765; +use constant SCHEMAV_NOROOT => 1800; +use constant SCHEMAV_UNDECLAREDELEM => 1801; +use constant SCHEMAV_NOTTOPLEVEL => 1802; +use constant SCHEMAV_MISSING => 1803; +use constant SCHEMAV_WRONGELEM => 1804; +use constant SCHEMAV_NOTYPE => 1805; +use constant SCHEMAV_NOROLLBACK => 1806; +use constant SCHEMAV_ISABSTRACT => 1807; +use constant SCHEMAV_NOTEMPTY => 1808; +use constant SCHEMAV_ELEMCONT => 1809; +use constant SCHEMAV_HAVEDEFAULT => 1810; +use constant SCHEMAV_NOTNILLABLE => 1811; +use constant SCHEMAV_EXTRACONTENT => 1812; +use constant SCHEMAV_INVALIDATTR => 1813; +use constant SCHEMAV_INVALIDELEM => 1814; +use constant SCHEMAV_NOTDETERMINIST => 1815; +use constant SCHEMAV_CONSTRUCT => 1816; +use constant SCHEMAV_INTERNAL => 1817; +use constant SCHEMAV_NOTSIMPLE => 1818; +use constant SCHEMAV_ATTRUNKNOWN => 1819; +use constant SCHEMAV_ATTRINVALID => 1820; +use constant SCHEMAV_VALUE => 1821; +use constant SCHEMAV_FACET => 1822; +use constant XPTR_UNKNOWN_SCHEME => 1900; +use constant XPTR_CHILDSEQ_START => 1901; +use constant XPTR_EVAL_FAILED => 1902; +use constant XPTR_EXTRA_OBJECTS => 1903; +use constant C14N_CREATE_CTXT => 1950; +use constant C14N_REQUIRES_UTF8 => 1951; +use constant C14N_CREATE_STACK => 1952; +use constant C14N_INVALID_NODE => 1953; +use constant FTP_PASV_ANSWER => 2000; +use constant FTP_EPSV_ANSWER => 2001; +use constant FTP_ACCNT => 2002; +use constant HTTP_URL_SYNTAX => 2020; +use constant HTTP_USE_IP => 2021; +use constant HTTP_UNKNOWN_HOST => 2022; + +1; diff --git a/lib/XML/LibXML/ErrNo.pod b/lib/XML/LibXML/ErrNo.pod new file mode 100644 index 0000000..4b1b59e --- /dev/null +++ b/lib/XML/LibXML/ErrNo.pod @@ -0,0 +1,37 @@ +=head1 NAME + +XML::LibXML::ErrNo - Structured Errors + +=head1 DESCRIPTION + +This module is based on xmlerror.h libxml2 C header file. It defines symbolic +constants for all libxml2 error codes. Currently libxml2 uses over 480 +different error codes. See also XML::LibXML::Error. + +=head1 AUTHORS + +Matt Sergeant, +Christian Glahn, +Petr Pajas + + +=head1 VERSION + +2.0207 + +=head1 COPYRIGHT + +2001-2007, AxKit.com Ltd. + +2002-2006, Christian Glahn. + +2006-2009, Petr Pajas. + +=cut + + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. + diff --git a/lib/XML/LibXML/Error.pm b/lib/XML/LibXML/Error.pm new file mode 100644 index 0000000..9855eaf --- /dev/null +++ b/lib/XML/LibXML/Error.pm @@ -0,0 +1,260 @@ +# $Id: Error.pm,v 1.1.2.1 2004/04/20 20:09:48 pajas Exp $ +# +# This is free software, you may use it and distribute it under the same terms as +# Perl itself. +# +# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas +# +# +package XML::LibXML::Error; + +use strict; +use warnings; + +# To avoid a "Deep recursion on subroutine as_string" warning +no warnings 'recursion'; + +use Encode (); + +use vars qw(@error_domains $VERSION $WARNINGS); +use overload + '""' => \&as_string, + 'eq' => sub { + ("$_[0]" eq "$_[1]") + }, + 'cmp' => sub { + ("$_[0]" cmp "$_[1]") + }, + fallback => 1; + +$WARNINGS = 0; # 0: suppress, 1: report via warn, 2: report via die +$VERSION = "2.0207"; # VERSION TEMPLATE: DO NOT CHANGE + +use constant XML_ERR_NONE => 0; +use constant XML_ERR_WARNING => 1; # A simple warning +use constant XML_ERR_ERROR => 2; # A recoverable error +use constant XML_ERR_FATAL => 3; # A fatal error + +use constant XML_ERR_FROM_NONE => 0; +use constant XML_ERR_FROM_PARSER => 1; # The XML parser +use constant XML_ERR_FROM_TREE => 2; # The tree module +use constant XML_ERR_FROM_NAMESPACE => 3; # The XML Namespace module +use constant XML_ERR_FROM_DTD => 4; # The XML DTD validation +use constant XML_ERR_FROM_HTML => 5; # The HTML parser +use constant XML_ERR_FROM_MEMORY => 6; # The memory allocator +use constant XML_ERR_FROM_OUTPUT => 7; # The serialization code +use constant XML_ERR_FROM_IO => 8; # The Input/Output stack +use constant XML_ERR_FROM_FTP => 9; # The FTP module +use constant XML_ERR_FROM_HTTP => 10; # The FTP module +use constant XML_ERR_FROM_XINCLUDE => 11; # The XInclude processing +use constant XML_ERR_FROM_XPATH => 12; # The XPath module +use constant XML_ERR_FROM_XPOINTER => 13; # The XPointer module +use constant XML_ERR_FROM_REGEXP => 14; # The regular expressions module +use constant XML_ERR_FROM_DATATYPE => 15; # The W3C XML Schemas Datatype module +use constant XML_ERR_FROM_SCHEMASP => 16; # The W3C XML Schemas parser module +use constant XML_ERR_FROM_SCHEMASV => 17; # The W3C XML Schemas validation module +use constant XML_ERR_FROM_RELAXNGP => 18; # The Relax-NG parser module +use constant XML_ERR_FROM_RELAXNGV => 19; # The Relax-NG validator module +use constant XML_ERR_FROM_CATALOG => 20; # The Catalog module +use constant XML_ERR_FROM_C14N => 21; # The Canonicalization module +use constant XML_ERR_FROM_XSLT => 22; # The XSLT engine from libxslt +use constant XML_ERR_FROM_VALID => 23; # The DTD validation module with valid context +use constant XML_ERR_FROM_CHECK => 24; # The error-checking module +use constant XML_ERR_FROM_WRITER => 25; # The xmlwriter module +use constant XML_ERR_FROM_MODULE => 26; # The dynamically-loaded module module +use constant XML_ERR_FROM_I18N => 27; # The module handling character conversion +use constant XML_ERR_FROM_SCHEMATRONV=> 28; # The Schematron validator module + +@error_domains = ("", "parser", "tree", "namespace", "validity", + "HTML parser", "memory", "output", "I/O", "ftp", + "http", "XInclude", "XPath", "xpointer", "regexp", + "Schemas datatype", "Schemas parser", "Schemas validity", + "Relax-NG parser", "Relax-NG validity", + "Catalog", "C14N", "XSLT", "validity", "error-checking", + "xmlwriter", "dynamic loading", "i18n", + "Schematron validity"); + +my $MAX_ERROR_PREV_DEPTH = 100; + +for my $field (qw) { + my $method = sub { $_[0]{$field} }; + no strict 'refs'; + *$field = $method; +} + +{ + + sub new { + my ($class,$xE) = @_; + my $terr; + if (ref($xE)) { + my ($context,$column) = $xE->context_and_column(); + $terr =bless { + domain => $xE->domain(), + level => $xE->level(), + code => $xE->code(), + message => $xE->message(), + file => $xE->file(), + line => $xE->line(), + str1 => $xE->str1(), + str2 => $xE->str2(), + str3 => $xE->str3(), + num1 => $xE->num1(), + num2 => $xE->num2(), + __prev_depth => 0, + (defined($context) ? + ( + context => $context, + column => $column, + ) : ()), + }, $class; + } else { + # !!!! problem : got a flat error + # warn("PROBLEM: GOT A FLAT ERROR $xE\n"); + $terr =bless { + domain => 0, + level => 2, + code => -1, + message => $xE, + file => undef, + line => undef, + str1 => undef, + str2 => undef, + str3 => undef, + num1 => undef, + num2 => undef, + __prev_depth => 0, + }, $class; + } + return $terr; + } + + sub _callback_error { + #print "CALLBACK\n"; + my ($xE,$prev) = @_; + my $terr; + $terr=XML::LibXML::Error->new($xE); + if ($terr->{level} == XML_ERR_WARNING and $WARNINGS!=2) { + warn $terr if $WARNINGS; + return $prev; + } + #unless ( defined $terr->{file} and length $terr->{file} ) { + # this would make it easier to recognize parsed strings + # but it breaks old implementations + # [CG] $terr->{file} = 'string()'; + #} + #warn "Saving the error ",$terr->dump; + + if (ref($prev)) + { + if ($prev->__prev_depth() >= $MAX_ERROR_PREV_DEPTH) + { + return $prev; + } + $terr->{_prev} = $prev; + $terr->{__prev_depth} = $prev->__prev_depth() + 1; + } + else + { + $terr->{_prev} = defined($prev) && length($prev) ? XML::LibXML::Error->new($prev) : undef; + } + return $terr; + } + sub _instant_error_callback { + my $xE = shift; + my $terr= XML::LibXML::Error->new($xE); + print "Reporting an instanteous error ",$terr->dump; + die $terr; + } + sub _report_warning { + my ($saved_error) = @_; + #print "CALLBACK WARN\n"; + if ( defined $saved_error ) { + #print "reporting a warning ",$saved_error->dump; + warn $saved_error; + } + } + sub _report_error { + my ($saved_error) = @_; + #print "CALLBACK ERROR: $saved_error\n"; + if ( defined $saved_error ) { + die $saved_error; + } + } +} + + +# backward compatibility +sub int1 { $_[0]->num1 } +sub int2 { $_[0]->num2 } + +sub domain { + my ($self)=@_; + return undef unless ref($self); + my $domain = $self->{domain}; + # Newer versions of libxml2 might yield errors in domains that aren't + # listed above. Invent something reasonable in that case. + return $domain < @error_domains ? $error_domains[$domain] : "domain_$domain"; +} + +sub as_string { + my ($self)=@_; + my $msg = ""; + my $level; + + if (defined($self->{_prev})) { + $msg = $self->{_prev}->as_string; + } + + if ($self->{level} == XML_ERR_NONE) { + $level = ""; + } elsif ($self->{level} == XML_ERR_WARNING) { + $level = "warning"; + } elsif ($self->{level} == XML_ERR_ERROR || + $self->{level} == XML_ERR_FATAL) { + $level = "error"; + } + my $where=""; + if (defined($self->{file})) { + $where="$self->{file}:$self->{line}"; + } elsif (($self->{domain} == XML_ERR_FROM_PARSER) + and + $self->{line}) { + $where="Entity: line $self->{line}"; + } + if ($self->{nodename}) { + $where.=": element ".$self->{nodename}; + } + $msg.=$where.": " if $where ne ""; + $msg.=$self->domain." ".$level." :"; + my $str=$self->{message}||""; + chomp($str); + $msg.=" ".$str."\n"; + if (($self->{domain} == XML_ERR_FROM_XPATH) and + defined($self->{str1})) { + $msg.=$self->{str1}."\n"; + $msg.=(" " x $self->{num1})."^\n"; + } elsif (defined $self->{context}) { + # If the error relates to character-encoding problems in the context, + # then doing textual operations on it will spew warnings that + # XML::LibXML can do nothing to fix. So just disable all such + # warnings. This has the pleasing benefit of making the test suite + # run warning-free. + no warnings 'utf8'; + my $context = Encode::encode('UTF-8', $self->{context}); + $msg.=$context."\n"; + $context = substr($context,0,$self->{column}); + $context=~s/[^\t]/ /g; + $msg.=$context."^\n"; + } + return $msg; +} + +sub dump { + my ($self)=@_; + require Data::Dumper; + return Data::Dumper->new([$self],['error'])->Dump; +} + +1; diff --git a/lib/XML/LibXML/Error.pod b/lib/XML/LibXML/Error.pod new file mode 100644 index 0000000..b8915e3 --- /dev/null +++ b/lib/XML/LibXML/Error.pod @@ -0,0 +1,264 @@ +=head1 NAME + +XML::LibXML::Error - Structured Errors + +=head1 SYNOPSIS + + + + eval { ... }; + if (ref($@)) { + # handle a structured error (XML::LibXML::Error object) + } elsif ($@) { + # error, but not an XML::LibXML::Error object + } else { + # no error + } + + $XML::LibXML::Error::WARNINGS=1; + $message = $@->as_string(); + print $@->dump(); + $error_domain = $@->domain(); + $error_code = $@->code(); + $error_message = $@->message(); + $error_level = $@->level(); + $filename = $@->file(); + $line = $@->line(); + $nodename = $@->nodename(); + $error_str1 = $@->str1(); + $error_str2 = $@->str2(); + $error_str3 = $@->str3(); + $error_num1 = $@->num1(); + $error_num2 = $@->num2(); + $string = $@->context(); + $offset = $@->column(); + $previous_error = $@->_prev(); + +=head1 DESCRIPTION + +The XML::LibXML::Error class is a tiny frontend to I<<<<<< libxml2 >>>>>>'s structured error support. If XML::LibXML is compiled with structured error +support, all errors reported by libxml2 are transformed to XML::LibXML::Error +objects. These objects automatically serialize to the corresponding error +messages when printed or used in a string operation, but as objects, can also +be used to get a detailed and structured information about the error that +occurred. + +Unlike most other XML::LibXML objects, XML::LibXML::Error doesn't wrap an +underlying I<<<<<< libxml2 >>>>>> structure directly, but rather transforms it to a blessed Perl hash reference +containing the individual fields of the structured error information as hash +key-value pairs. Individual items (fields) of a structured error can either be +obtained directly as $@->{field}, or using autoloaded methods such as +$@->field() (where field is the field name). XML::LibXML::Error objects have +the following fields: domain, code, level, file, line, nodename, message, str1, +str2, str3, num1, num2, and _prev (some of them may be undefined). + +=over 4 + +=item $XML::LibXML::Error::WARNINGS + + $XML::LibXML::Error::WARNINGS=1; + +Traditionally, XML::LibXML was suppressing parser warnings by setting libxml2's +global variable xmlGetWarningsDefaultValue to 0. Since 1.70 we do not change +libxml2's global variables anymore; for backward compatibility, XML::LibXML +suppresses warnings. This variable can be set to 1 to enable reporting of these +warnings via Perl C<<<<<< warn >>>>>> and to 2 to report hem via C<<<<<< die >>>>>>. + + +=item as_string + + $message = $@->as_string(); + +This function serializes an XML::LibXML::Error object to a string containing +the full error message close to the message produced by I<<<<<< libxml2 >>>>>> default error handlers and tools like xmllint. This method is also used to +overload "" operator on XML::LibXML::Error, so it is automatically called +whenever XML::LibXML::Error object is treated as a string (e.g. in print $@). + + +=item dump + + print $@->dump(); + +This function serializes an XML::LibXML::Error to a string displaying all +fields of the error structure individually on separate lines of the form 'name' +=> 'value'. + + +=item domain + + $error_domain = $@->domain(); + +Returns string containing information about what part of the library raised the +error. Can be one of: "parser", "tree", "namespace", "validity", "HTML parser", +"memory", "output", "I/O", "ftp", "http", "XInclude", "XPath", "xpointer", +"regexp", "Schemas datatype", "Schemas parser", "Schemas validity", "Relax-NG +parser", "Relax-NG validity", "Catalog", "C14N", "XSLT", "validity". + + +=item code + + $error_code = $@->code(); + +Returns the actual libxml2 error code. The XML::LibXML::ErrNo module defines +constants for individual error codes. Currently libxml2 uses over 480 different +error codes. + + +=item message + + $error_message = $@->message(); + +Returns a human-readable informative error message. + + +=item level + + $error_level = $@->level(); + +Returns an integer value describing how consequent is the error. +XML::LibXML::Error defines the following constants: + + +=over 4 + +=item * + +XML_ERR_NONE = 0 + + + +=item * + +XML_ERR_WARNING = 1 : A simple warning. + + + +=item * + +XML_ERR_ERROR = 2 : A recoverable error. + + + +=item * + +XML_ERR_FATAL = 3 : A fatal error. + + + +=back + + +=item file + + $filename = $@->file(); + +Returns the filename of the file being processed while the error occurred. + + +=item line + + $line = $@->line(); + +The line number, if available. + + +=item nodename + + $nodename = $@->nodename(); + +Name of the node where error occurred, if available. When this field is +non-empty, libxml2 actually returned a physical pointer to the specified node. +Due to memory management issues, it is very difficult to implement a way to +expose the pointer to the Perl level as a XML::LibXML::Node. For this reason, +XML::LibXML::Error currently only exposes the name the node. + + +=item str1 + + $error_str1 = $@->str1(); + +Error specific. Extra string information. + + +=item str2 + + $error_str2 = $@->str2(); + +Error specific. Extra string information. + + +=item str3 + + $error_str3 = $@->str3(); + +Error specific. Extra string information. + + +=item num1 + + $error_num1 = $@->num1(); + +Error specific. Extra numeric information. + + +=item num2 + + $error_num2 = $@->num2(); + +In recent libxml2 versions, this value contains a column number of the error or +0 if N/A. + + +=item context + + $string = $@->context(); + +For parsing errors, this field contains about 80 characters of the XML near the +place where the error occurred. The field C<<<<<< $@-Ecolumn() >>>>>> contains the corresponding offset. Where N/A, the field is undefined. + + +=item column + + $offset = $@->column(); + +See C<<<<<< $@-Ecolumn() >>>>>> above. + + +=item _prev + + $previous_error = $@->_prev(); + +This field can possibly hold a reference to another XML::LibXML::Error object +representing an error which occurred just before this error. + + + +=back + +=head1 AUTHORS + +Matt Sergeant, +Christian Glahn, +Petr Pajas + + +=head1 VERSION + +2.0207 + +=head1 COPYRIGHT + +2001-2007, AxKit.com Ltd. + +2002-2006, Christian Glahn. + +2006-2009, Petr Pajas. + +=cut + + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. + diff --git a/lib/XML/LibXML/InputCallback.pod b/lib/XML/LibXML/InputCallback.pod new file mode 100644 index 0000000..3cfcd15 --- /dev/null +++ b/lib/XML/LibXML/InputCallback.pod @@ -0,0 +1,300 @@ +=head1 NAME + +XML::LibXML::InputCallback - XML::LibXML Class for Input Callbacks + +=head1 SYNOPSIS + + + + use XML::LibXML; + + +=head1 DESCRIPTION + +You may get unexpected results if you are trying to load external documents +during libxml2 parsing if the location of the resource is not a HTTP, FTP or +relative location but a absolute path for example. To get around this +limitation, you may add your own input handler to open, read and close +particular types of locations or URI classes. Using this input callback +handlers, you can handle your own custom URI schemes for example. + +The input callbacks are used whenever XML::LibXML has to get something other +than externally parsed entities from somewhere. They are implemented using a +callback stack on the Perl layer in analogy to libxml2's native callback stack. + +The XML::LibXML::InputCallback class transparently registers the input +callbacks for the libxml2's parser processes. + + +=head2 How does XML::LibXML::InputCallback work? + +The libxml2 library offers a callback implementation as global functions only. +To work-around the troubles resulting in having only global callbacks - for +example, if the same global callback stack is manipulated by different +applications running together in a single Apache Web-server environment -, +XML::LibXML::InputCallback comes with a object-oriented and a function-oriented +part. + +Using the function-oriented part the global callback stack of libxml2 can be +manipulated. Those functions can be used as interface to the callbacks on the +C- and XS Layer. At the object-oriented part, operations for working with the +"pseudo-localized" callback stack are implemented. Currently, you can register +and de-register callbacks on the Perl layer and initialize them on a per parser +basis. + + +=head3 Callback Groups + +The libxml2 input callbacks come in groups. One group contains a URI matcher (I<<<<<< match >>>>>>), a data stream constructor (I<<<<<< open >>>>>>), a data stream reader (I<<<<<< read >>>>>>), and a data stream destructor (I<<<<<< close >>>>>>). The callbacks can be manipulated on a per group basis only. + + +=head3 The Parser Process + +The parser process works on an XML data stream, along which, links to other +resources can be embedded. This can be links to external DTDs or XIncludes for +example. Those resources are identified by URIs. The callback implementation of +libxml2 assumes that one callback group can handle a certain amount of URIs and +a certain URI scheme. Per default, callback handlers for I<<<<<< file://* >>>>>>, I<<<<<< file:://*.gz >>>>>>, I<<<<<< http://* >>>>>> and I<<<<<< ftp://* >>>>>> are registered. + +Callback groups in the callback stack are processed from top to bottom, meaning +that callback groups registered later will be processed before the earlier +registered ones. + +While parsing the data stream, the libxml2 parser checks if a registered +callback group will handle a URI - if they will not, the URI will be +interpreted as I<<<<<< file://URI >>>>>>. To handle a URI, the I<<<<<< match >>>>>> callback will have to return '1'. If that happens, the handling of the URI will +be passed to that callback group. Next, the URI will be passed to the I<<<<<< open >>>>>> callback, which should return a I<<<<<< reference >>>>>> to the data stream if it successfully opened the file, '0' otherwise. If +opening the stream was successful, the I<<<<<< read >>>>>> callback will be called repeatedly until it returns an empty string. After the +read callback, the I<<<<<< close >>>>>> callback will be called to close the stream. + + +=head3 Organisation of callback groups in XML::LibXML::InputCallback + +Callback groups are implemented as a stack (Array), each entry holds a +reference to an array of the callbacks. For the libxml2 library, the +XML::LibXML::InputCallback callback implementation appears as one single +callback group. The Perl implementation however allows one to manage different +callback stacks on a per libxml2-parser basis. + + +=head2 Using XML::LibXML::InputCallback + +After object instantiation using the parameter-less constructor, you can +register callback groups. + + + + my $input_callbacks = XML::LibXML::InputCallback->new(); + $input_callbacks->register_callbacks([ $match_cb1, $open_cb1, + $read_cb1, $close_cb1 ] ); + $input_callbacks->register_callbacks([ $match_cb2, $open_cb2, + $read_cb2, $close_cb2 ] ); + $input_callbacks->register_callbacks( [ $match_cb3, $open_cb3, + $read_cb3, $close_cb3 ] ); + + $parser->input_callbacks( $input_callbacks ); + $parser->parse_file( $some_xml_file ); + + +=head2 What about the old callback system prior to XML::LibXML::InputCallback? + +In XML::LibXML versions prior to 1.59 - i.e. without the +XML::LibXML::InputCallback module - you could define your callbacks either +using globally or locally. You still can do that using +XML::LibXML::InputCallback, and in addition to that you can define the +callbacks on a per parser basis! + +If you use the old callback interface through global callbacks, +XML::LibXML::InputCallback will treat them with a lower priority as the ones +registered using the new interface. The global callbacks will not override the +callback groups registered using the new interface. Local callbacks are +attached to a specific parser instance, therefore they are treated with highest +priority. If the I<<<<<< match >>>>>> callback of the callback group registered as local variable is identical to one +of the callback groups registered using the new interface, that callback group +will be replaced. + +Users of the old callback implementation whose I<<<<<< open >>>>>> callback returned a plain string, will have to adapt their code to return a +reference to that string after upgrading to version >= 1.59. The new callback +system can only deal with the I<<<<<< open >>>>>> callback returning a reference! + + +=head1 INTERFACE DESCRIPTION + + +=head2 Global Variables + +=over 4 + +=item $_CUR_CB + +Stores the current callback and can be used as shortcut to access the callback +stack. + + +=item @_GLOBAL_CALLBACKS + +Stores all callback groups for the current parser process. + + +=item @_CB_STACK + +Stores the currently used callback group. Used to prevent parser errors when +dealing with nested XML data. + + + +=back + + +=head2 Global Callbacks + +=over 4 + +=item _callback_match + +Implements the interface for the I<<<<<< match >>>>>> callback at C-level and for the selection of the callback group from the +callbacks defined at the Perl-level. + + +=item _callback_open + +Forwards the I<<<<<< open >>>>>> callback from libxml2 to the corresponding callback function at the Perl-level. + + +=item _callback_read + +Forwards the read request to the corresponding callback function at the +Perl-level and returns the result to libxml2. + + +=item _callback_close + +Forwards the I<<<<<< close >>>>>> callback from libxml2 to the corresponding callback function at the +Perl-level.. + + + +=back + + +=head2 Class methods + +=over 4 + +=item new() + +A simple constructor. + + +=item register_callbacks( [ $match_cb, $open_cb, $read_cb, $close_cb ]) + +The four callbacks I<<<<<< have >>>>>> to be given as array reference in the above order I<<<<<< match >>>>>>, I<<<<<< open >>>>>>, I<<<<<< read >>>>>>, I<<<<<< close >>>>>>! + + +=item unregister_callbacks( [ $match_cb, $open_cb, $read_cb, $close_cb ]) + +With no arguments given, C<<<<<< unregister_callbacks() >>>>>> will delete the last registered callback group from the stack. If four +callbacks are passed as array reference, the callback group to unregister will +be identified by the I<<<<<< match >>>>>> callback and deleted from the callback stack. Note that if several identical I<<<<<< match >>>>>> callbacks are defined in different callback groups, ALL of them will be deleted +from the stack. + + +=item init_callbacks( $parser ) + +Initializes the callback system for the provided parser before starting a +parsing process. + + +=item cleanup_callbacks() + +Resets global variables and the libxml2 callback stack. + + +=item lib_init_callbacks() + +Used internally for callback registration at C-level. + + +=item lib_cleanup_callbacks() + +Used internally for callback resetting at the C-level. + + + +=back + + + + +=head1 EXAMPLE CALLBACKS + +The following example is a purely fictitious example that uses a +MyScheme::Handler object that responds to methods similar to an IO::Handle. + + + + # Define the four callback functions + sub match_uri { + my $uri = shift; + return $uri =~ /^myscheme:/; # trigger our callback group at a 'myscheme' URIs + } + + sub open_uri { + my $uri = shift; + my $handler = MyScheme::Handler->new($uri); + return $handler; + } + + # The returned $buffer will be parsed by the libxml2 parser + sub read_uri { + my $handler = shift; + my $length = shift; + my $buffer; + read($handler, $buffer, $length); + return $buffer; # $buffer will be an empty string '' if read() is done + } + + # Close the handle associated with the resource. + sub close_uri { + my $handler = shift; + close($handler); + } + + # Register them with a instance of XML::LibXML::InputCallback + my $input_callbacks = XML::LibXML::InputCallback->new(); + $input_callbacks->register_callbacks([ \&match_uri, \&open_uri, + \&read_uri, \&close_uri ] ); + + # Register the callback group at a parser instance + $parser->input_callbacks( $input_callbacks ); + + # $some_xml_file will be parsed using our callbacks + $parser->parse_file( $some_xml_file ); + +=head1 AUTHORS + +Matt Sergeant, +Christian Glahn, +Petr Pajas + + +=head1 VERSION + +2.0207 + +=head1 COPYRIGHT + +2001-2007, AxKit.com Ltd. + +2002-2006, Christian Glahn. + +2006-2009, Petr Pajas. + +=cut + + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. + diff --git a/lib/XML/LibXML/Literal.pm b/lib/XML/LibXML/Literal.pm new file mode 100644 index 0000000..1bc9fa1 --- /dev/null +++ b/lib/XML/LibXML/Literal.pm @@ -0,0 +1,112 @@ +# $Id$ +# +# This is free software, you may use it and distribute it under the same terms as +# Perl itself. +# +# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas +# +# + +package XML::LibXML::Literal; + +use XML::LibXML::Boolean; +use XML::LibXML::Number; + +use strict; +use warnings; + +use vars qw ($VERSION); +$VERSION = "2.0207"; # VERSION TEMPLATE: DO NOT CHANGE + +use overload + '""' => \&value, + 'cmp' => \&cmp; + +sub new { + my $class = shift; + my ($string) = @_; + +# $string =~ s/"/"/g; +# $string =~ s/'/'/g; + + bless \$string, $class; +} + +sub as_string { + my $self = shift; + my $string = $$self; + $string =~ s/'/'/g; + return "'$string'"; +} + +sub as_xml { + my $self = shift; + my $string = $$self; + return "$string\n"; +} + +sub value { + my $self = shift; + $$self; +} + +sub cmp { + my $self = shift; + my ($cmp, $swap) = @_; + if ($swap) { + return $cmp cmp $$self; + } + return $$self cmp $cmp; +} + +sub evaluate { + my $self = shift; + $self; +} + +sub to_boolean { + my $self = shift; + return (length($$self) > 0) ? XML::LibXML::Boolean->True : XML::LibXML::Boolean->False; +} + +sub to_number { return XML::LibXML::Number->new($_[0]->value); } +sub to_literal { return $_[0]; } + +sub string_value { return $_[0]->value; } + +1; +__END__ + +=head1 NAME + +XML::LibXML::Literal - Simple string values. + +=head1 DESCRIPTION + +In XPath terms a Literal is what we know as a string. + +=head1 API + +=head2 new($string) + +Create a new Literal object with the value in $string. Note that " and +' will be converted to " and ' respectively. That is not part of the XPath +specification, but I consider it useful. Note though that you have to go +to extraordinary lengths in an XML template file (be it XSLT or whatever) to +make use of this: + + + +Which produces a Literal of: + + I'm feeling "sad" + +=head2 value() + +Also overloaded as stringification, simply returns the literal string value. + +=head2 cmp($literal) + +Returns the equivalent of perl's cmp operator against the given $literal. + +=cut diff --git a/lib/XML/LibXML/Namespace.pod b/lib/XML/LibXML/Namespace.pod new file mode 100644 index 0000000..85d5598 --- /dev/null +++ b/lib/XML/LibXML/Namespace.pod @@ -0,0 +1,161 @@ +=head1 NAME + +XML::LibXML::Namespace - XML::LibXML Namespace Implementation + +=head1 SYNOPSIS + + + + use XML::LibXML; + # Only methods specific to Namespace nodes are listed here, + # see the XML::LibXML::Node manpage for other methods + + my $ns = XML::LibXML::Namespace->new($nsURI); + print $ns->nodeName(); + print $ns->name(); + $localname = $ns->getLocalName(); + print $ns->getData(); + print $ns->getValue(); + print $ns->value(); + $known_uri = $ns->getNamespaceURI(); + $known_prefix = $ns->getPrefix(); + $key = $ns->unique_key(); + +=head1 DESCRIPTION + +Namespace nodes are returned by both $element->findnodes('namespace::foo') or +by $node->getNamespaces(). + +The namespace node API is not part of any current DOM API, and so it is quite +minimal. It should be noted that namespace nodes are I<<<<<< not >>>>>> a sub class of L<<<<<< XML::LibXML::Node >>>>>>, however Namespace nodes act a lot like attribute nodes, and similarly named +methods will return what you would expect if you treated the namespace node as +an attribute. Note that in order to fix several inconsistencies between the API +and the documentation, the behavior of some functions have been changed in +1.64. + + +=head1 METHODS + +=over 4 + +=item new + + my $ns = XML::LibXML::Namespace->new($nsURI); + +Creates a new Namespace node. Note that this is not a 'node' as an attribute or +an element node. Therefore you can't do call all L<<<<<< XML::LibXML::Node >>>>>> Functions. All functions available for this node are listed below. + +Optionally you can pass the prefix to the namespace constructor. If this second +parameter is omitted you will create a so called default namespace. Note, the +newly created namespace is not bound to any document or node, therefore you +should not expect it to be available in an existing document. + + +=item declaredURI + +Returns the URI for this namespace. + + +=item declaredPrefix + +Returns the prefix for this namespace. + + +=item nodeName + + print $ns->nodeName(); + +Returns "xmlns:prefix", where prefix is the prefix for this namespace. + + +=item name + + print $ns->name(); + +Alias for nodeName() + + +=item getLocalName + + $localname = $ns->getLocalName(); + +Returns the local name of this node as if it were an attribute, that is, the +prefix associated with the namespace. + + +=item getData + + print $ns->getData(); + +Returns the URI of the namespace, i.e. the value of this node as if it were an +attribute. + + +=item getValue + + print $ns->getValue(); + +Alias for getData() + + +=item value + + print $ns->value(); + +Alias for getData() + + +=item getNamespaceURI + + $known_uri = $ns->getNamespaceURI(); + +Returns the string "http://www.w3.org/2000/xmlns/" + + +=item getPrefix + + $known_prefix = $ns->getPrefix(); + +Returns the string "xmlns" + + +=item unique_key + + $key = $ns->unique_key(); + +This method returns a key guaranteed to be unique for this namespace, and to +always be the same value for this namespace. Two namespace objects return the +same key if and only if they have the same prefix and the same URI. The +returned key value is useful as a key in hashes. + + + +=back + +=head1 AUTHORS + +Matt Sergeant, +Christian Glahn, +Petr Pajas + + +=head1 VERSION + +2.0207 + +=head1 COPYRIGHT + +2001-2007, AxKit.com Ltd. + +2002-2006, Christian Glahn. + +2006-2009, Petr Pajas. + +=cut + + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. + diff --git a/lib/XML/LibXML/Node.pod b/lib/XML/LibXML/Node.pod new file mode 100644 index 0000000..6f8e9a5 --- /dev/null +++ b/lib/XML/LibXML/Node.pod @@ -0,0 +1,783 @@ +=head1 NAME + +XML::LibXML::Node - Abstract Base Class of XML::LibXML Nodes + +=head1 SYNOPSIS + + + + use XML::LibXML; + + $name = $node->nodeName; + $node->setNodeName( $newName ); + $bool = $node->isSameNode( $other_node ); + $bool = $node->isEqual( $other_node ); + $num = $node->unique_key; + $content = $node->nodeValue; + $content = $node->textContent; + $type = $node->nodeType; + $node->unbindNode(); + $childnode = $node->removeChild( $childnode ); + $oldnode = $node->replaceChild( $newNode, $oldNode ); + $node->replaceNode($newNode); + $childnode = $node->appendChild( $childnode ); + $childnode = $node->addChild( $childnode ); + $node = $parent->addNewChild( $nsURI, $name ); + $node->addSibling($newNode); + $newnode =$node->cloneNode( $deep ); + $parentnode = $node->parentNode; + $nextnode = $node->nextSibling(); + $nextnode = $node->nextNonBlankSibling(); + $prevnode = $node->previousSibling(); + $prevnode = $node->previousNonBlankSibling(); + $boolean = $node->hasChildNodes(); + $childnode = $node->firstChild; + $childnode = $node->lastChild; + $documentnode = $node->ownerDocument; + $node = $node->getOwner; + $node->setOwnerDocument( $doc ); + $node->insertBefore( $newNode, $refNode ); + $node->insertAfter( $newNode, $refNode ); + @nodes = $node->findnodes( $xpath_expression ); + $result = $node->find( $xpath ); + print $node->findvalue( $xpath ); + $bool = $node->exists( $xpath_expression ); + @childnodes = $node->childNodes(); + @childnodes = $node->nonBlankChildNodes(); + $xmlstring = $node->toString($format,$docencoding); + $c14nstring = $node->toStringC14N(); + $c14nstring = $node->toStringC14N($with_comments, $xpath_expression , $xpath_context); + $c14nstring = $node->toStringC14N_v1_1(); + $c14nstring = $node->toStringC14N_v1_1($with_comments, $xpath_expression , $xpath_context); + $ec14nstring = $node->toStringEC14N(); + $ec14nstring = $node->toStringEC14N($with_comments, $xpath_expression, $inclusive_prefix_list); + $ec14nstring = $node->toStringEC14N($with_comments, $xpath_expression, $xpath_context, $inclusive_prefix_list); + $str = $doc->serialize($format); + $localname = $node->localname; + $nameprefix = $node->prefix; + $uri = $node->namespaceURI(); + $boolean = $node->hasAttributes(); + @attributelist = $node->attributes(); + $URI = $node->lookupNamespaceURI( $prefix ); + $prefix = $node->lookupNamespacePrefix( $URI ); + $node->normalize; + @nslist = $node->getNamespaces; + $node->removeChildNodes(); + $strURI = $node->baseURI(); + $node->setBaseURI($strURI); + $node->nodePath(); + $lineno = $node->line_number(); + +=head1 DESCRIPTION + +XML::LibXML::Node defines functions that are common to all Node Types. An +XML::LibXML::Node should never be created standalone, but as an instance of a +high level class such as XML::LibXML::Element or XML::LibXML::Text. The class +itself should provide only common functionality. In XML::LibXML each node is +part either of a document or a document-fragment. Because of this there is no +node without a parent. This may causes confusion with "unbound" nodes. + + +=head1 METHODS + +Many functions listed here are extensively documented in the DOM Level 3 specification (L<<<<<< http://www.w3.org/TR/DOM-Level-3-Core/ >>>>>>). Please refer to the specification for extensive documentation. + +=over 4 + +=item nodeName + + $name = $node->nodeName; + +Returns the node's name. This function is aware of namespaces and returns the +full name of the current node (C<<<<<< prefix:localname >>>>>>). + +Since 1.62 this function also returns the correct DOM names for node types with +constant names, namely: #text, #cdata-section, #comment, #document, +#document-fragment. + + +=item setNodeName + + $node->setNodeName( $newName ); + +In very limited situations, it is useful to change a nodes name. In the DOM +specification this should throw an error. This Function is aware of namespaces. + + +=item isSameNode + + $bool = $node->isSameNode( $other_node ); + +returns TRUE (1) if the given nodes refer to the same node structure, otherwise +FALSE (0) is returned. + + +=item isEqual + + $bool = $node->isEqual( $other_node ); + +deprecated version of isSameNode(). + +I<<<<<< NOTE >>>>>> isEqual will change behaviour to follow the DOM specification + + +=item unique_key + + $num = $node->unique_key; + +This function is not specified for any DOM level. It returns a key guaranteed +to be unique for this node, and to always be the same value for this node. In +other words, two node objects return the same key if and only if isSameNode +indicates that they are the same node. + +The returned key value is useful as a key in hashes. + + +=item nodeValue + + $content = $node->nodeValue; + +If the node has any content (such as stored in a C<<<<<< text node >>>>>>) it can get requested through this function. + +I<<<<<< NOTE: >>>>>> Element Nodes have no content per definition. To get the text value of an +Element use textContent() instead! + + +=item textContent + + $content = $node->textContent; + +this function returns the content of all text nodes in the descendants of the +given node as specified in DOM. + + +=item nodeType + + $type = $node->nodeType; + +Return a numeric value representing the node type of this node. The module +XML::LibXML by default exports constants for the node types (see the EXPORT +section in the L<<<<<< XML::LibXML >>>>>> manual page). + + +=item unbindNode + + $node->unbindNode(); + +Unbinds the Node from its siblings and Parent, but not from the Document it +belongs to. If the node is not inserted into the DOM afterwards, it will be +lost after the program terminates. From a low level view, the unbound node is +stripped from the context it is and inserted into a (hidden) document-fragment. + + +=item removeChild + + $childnode = $node->removeChild( $childnode ); + +This will unbind the Child Node from its parent C<<<<<< $node >>>>>>. The function returns the unbound node. If C<<<<<< $childnode >>>>>> is not a child of the given Node the function will fail. + + +=item replaceChild + + $oldnode = $node->replaceChild( $newNode, $oldNode ); + +Replaces the C<<<<<< $oldNode >>>>>> with the C<<<<<< $newNode >>>>>>. The C<<<<<< $oldNode >>>>>> will be unbound from the Node. This function differs from the DOM L2 +specification, in the case, if the new node is not part of the document, the +node will be imported first. + + +=item replaceNode + + $node->replaceNode($newNode); + +This function is very similar to replaceChild(), but it replaces the node +itself rather than a childnode. This is useful if a node found by any XPath +function, should be replaced. + + +=item appendChild + + $childnode = $node->appendChild( $childnode ); + +The function will add the C<<<<<< $childnode >>>>>> to the end of C<<<<<< $node >>>>>>'s children. The function should fail, if the new childnode is already a child +of C<<<<<< $node >>>>>>. This function differs from the DOM L2 specification, in the case, if the new +node is not part of the document, the node will be imported first. + + +=item addChild + + $childnode = $node->addChild( $childnode ); + +As an alternative to appendChild() one can use the addChild() function. This +function is a bit faster, because it avoids all DOM conformity checks. +Therefore this function is quite useful if one builds XML documents in memory +where the order and ownership (C<<<<<< ownerDocument >>>>>>) is assured. + +addChild() uses libxml2's own xmlAddChild() function. Thus it has to be used +with extra care: If a text node is added to a node and the node itself or its +last childnode is as well a text node, the node to add will be merged with the +one already available. The current node will be removed from memory after this +action. Because perl is not aware of this action, the perl instance is still +available. XML::LibXML will catch the loss of a node and refuse to run any +function called on that node. + + + + my $t1 = $doc->createTextNode( "foo" ); + my $t2 = $doc->createTextNode( "bar" ); + $t1->addChild( $t2 ); # is OK + my $val = $t2->nodeValue(); # will fail, script dies + +Also addChild() will not check if the added node belongs to the same document +as the node it will be added to. This could lead to inconsistent documents and +in more worse cases even to memory violations, if one does not keep track of +this issue. + +Although this sounds like a lot of trouble, addChild() is useful if a document +is built from a stream, such as happens sometimes in SAX handlers or filters. + +If you are not sure about the source of your nodes, you better stay with +appendChild(), because this function is more user friendly in the sense of +being more error tolerant. + + +=item addNewChild + + $node = $parent->addNewChild( $nsURI, $name ); + +Similar to C<<<<<< addChild() >>>>>>, this function uses low level libxml2 functionality to provide faster +interface for DOM building. I<<<<<< addNewChild() >>>>>> uses C<<<<<< xmlNewChild() >>>>>> to create a new node on a given parent element. + +addNewChild() has two parameters $nsURI and $name, where $nsURI is an +(optional) namespace URI. $name is the fully qualified element name; +addNewChild() will determine the correct prefix if necessary. + +The function returns the newly created node. + +This function is very useful for DOM building, where a created node can be +directly associated with its parent. I<<<<<< NOTE >>>>>> this function is not part of the DOM specification and its use will limit your +code to XML::LibXML. + + +=item addSibling + + $node->addSibling($newNode); + +addSibling() allows adding an additional node to the end of a nodelist, defined +by the given node. + + +=item cloneNode + + $newnode =$node->cloneNode( $deep ); + +I<<<<<< cloneNode >>>>>> creates a copy of C<<<<<< $node >>>>>>. When $deep is set to 1 (true) the function will copy all child nodes as well. +If $deep is 0 only the current node will be copied. Note that in case of +element, attributes are copied even if $deep is 0. + +Note that the behavior of this function for $deep=0 has changed in 1.62 in +order to be consistent with the DOM spec (in older versions attributes and +namespace information was not copied for elements). + + +=item parentNode + + $parentnode = $node->parentNode; + +Returns simply the Parent Node of the current node. + + +=item nextSibling + + $nextnode = $node->nextSibling(); + +Returns the next sibling if any . + + +=item nextNonBlankSibling + + $nextnode = $node->nextNonBlankSibling(); + +Returns the next non-blank sibling if any (a node is blank if it is a Text or +CDATA node consisting of whitespace only). This method is not defined by DOM. + + +=item previousSibling + + $prevnode = $node->previousSibling(); + +Analogous to I<<<<<< getNextSibling >>>>>> the function returns the previous sibling if any. + + +=item previousNonBlankSibling + + $prevnode = $node->previousNonBlankSibling(); + +Returns the previous non-blank sibling if any (a node is blank if it is a Text +or CDATA node consisting of whitespace only). This method is not defined by +DOM. + + +=item hasChildNodes + + $boolean = $node->hasChildNodes(); + +If the current node has child nodes this function returns TRUE (1), otherwise +it returns FALSE (0, not undef). + + +=item firstChild + + $childnode = $node->firstChild; + +If a node has child nodes this function will return the first node in the child +list. + + +=item lastChild + + $childnode = $node->lastChild; + +If the C<<<<<< $node >>>>>> has child nodes this function returns the last child node. + + +=item ownerDocument + + $documentnode = $node->ownerDocument; + +Through this function it is always possible to access the document the current +node is bound to. + + +=item getOwner + + $node = $node->getOwner; + +This function returns the node the current node is associated with. In most +cases this will be a document node or a document fragment node. + + +=item setOwnerDocument + + $node->setOwnerDocument( $doc ); + +This function binds a node to another DOM. This method unbinds the node first, +if it is already bound to another document. + +This function is the opposite calling of L<<<<<< XML::LibXML::Document >>>>>>'s adoptNode() function. Because of this it has the same limitations with +Entity References as adoptNode(). + + +=item insertBefore + + $node->insertBefore( $newNode, $refNode ); + +The method inserts C<<<<<< $newNode >>>>>> before C<<<<<< $refNode >>>>>>. If C<<<<<< $refNode >>>>>> is undefined, the newNode will be set as the new last child of the parent node. +This function differs from the DOM L2 specification, in the case, if the new +node is not part of the document, the node will be imported first, +automatically. + +$refNode has to be passed to the function even if it is undefined: + + + + $node->insertBefore( $newNode, undef ); # the same as $node->appendChild( $newNode ); + $node->insertBefore( $newNode ); # wrong + +Note, that the reference node has to be a direct child of the node the function +is called on. Also, $newChild is not allowed to be an ancestor of the new +parent node. + + +=item insertAfter + + $node->insertAfter( $newNode, $refNode ); + +The method inserts C<<<<<< $newNode >>>>>> after C<<<<<< $refNode >>>>>>. If C<<<<<< $refNode >>>>>> is undefined, the newNode will be set as the new last child of the parent node. + +Note, that $refNode has to be passed explicitly even if it is undef. + + +=item findnodes + + @nodes = $node->findnodes( $xpath_expression ); + +I<<<<<< findnodes >>>>>> evaluates the xpath expression (XPath 1.0) on the current node and returns the +resulting node set as an array. In scalar context, returns an L<<<<<< XML::LibXML::NodeList >>>>>> object. + +The xpath expression can be passed either as a string, or as a L<<<<<< XML::LibXML::XPathExpression >>>>>> object. + +I<<<<<< NOTE ON NAMESPACES AND XPATH >>>>>>: + +A common mistake about XPath is to assume that node tests consisting of an +element name with no prefix match elements in the default namespace. This +assumption is wrong - by XPath specification, such node tests can only match +elements that are in no (i.e. null) namespace. + +So, for example, one cannot match the root element of an XHTML document with C<<<<<< $node-Efind('/html') >>>>>> since C<<<<<< '/html' >>>>>> would only match if the root element C<<<<<< EhtmlE >>>>>> had no namespace, but all XHTML elements belong to the namespace +http://www.w3.org/1999/xhtml. (Note that C<<<<<< xmlns="..." >>>>>> namespace declarations can also be specified in a DTD, which makes the +situation even worse, since the XML document looks as if there was no default +namespace). + +There are several possible ways to deal with namespaces in XPath: + + +=over 4 + +=item * + +The recommended way is to use the L<<<<<< XML::LibXML::XPathContext >>>>>> module to define an explicit context for XPath evaluation, in which a document +independent prefix-to-namespace mapping can be defined. For example: + + + + my $xpc = XML::LibXML::XPathContext->new; + $xpc->registerNs('x', 'http://www.w3.org/1999/xhtml'); + $xpc->find('/x:html',$node); + + + +=item * + +Another possibility is to use prefixes declared in the queried document (if +known). If the document declares a prefix for the namespace in question (and +the context node is in the scope of the declaration), C<<<<<< XML::LibXML >>>>>> allows you to use the prefix in the XPath expression, e.g.: + + + + $node->find('/x:html'); + + + +=back + +See also XML::LibXML::XPathContext->findnodes. + + +=item find + + $result = $node->find( $xpath ); + +I<<<<<< find >>>>>> evaluates the XPath 1.0 expression using the current node as the context of the +expression, and returns the result depending on what type of result the XPath +expression had. For example, the XPath "1 * 3 + 52" results in a L<<<<<< XML::LibXML::Number >>>>>> object being returned. Other expressions might return an L<<<<<< XML::LibXML::Boolean >>>>>> object, or an L<<<<<< XML::LibXML::Literal >>>>>> object (a string). Each of those objects uses Perl's overload feature to "do +the right thing" in different contexts. + +The xpath expression can be passed either as a string, or as a L<<<<<< XML::LibXML::XPathExpression >>>>>> object. + +See also L<<<<<< XML::LibXML::XPathContext >>>>>>->find. + + +=item findvalue + + print $node->findvalue( $xpath ); + +I<<<<<< findvalue >>>>>> is exactly equivalent to: + + + + $node->find( $xpath )->to_literal; + +That is, it returns the literal value of the results. This enables you to +ensure that you get a string back from your search, allowing certain shortcuts. +This could be used as the equivalent of XSLT's . + +See also L<<<<<< XML::LibXML::XPathContext >>>>>>->findvalue. + +The xpath expression can be passed either as a string, or as a L<<<<<< XML::LibXML::XPathExpression >>>>>> object. + + +=item exists + + $bool = $node->exists( $xpath_expression ); + +This method behaves like I<<<<<< findnodes >>>>>>, except that it only returns a boolean value (1 if the expression matches a +node, 0 otherwise) and may be faster than I<<<<<< findnodes >>>>>>, because the XPath evaluation may stop early on the first match (this is true +for libxml2 >= 2.6.27). + +For XPath expressions that do not return node-set, the method returns true if +the returned value is a non-zero number or a non-empty string. + + +=item childNodes + + @childnodes = $node->childNodes(); + +I<<<<<< childNodes >>>>>> implements a more intuitive interface to the childnodes of the current node. It +enables you to pass all children directly to a C<<<<<< map >>>>>> or C<<<<<< grep >>>>>>. If this function is called in scalar context, a L<<<<<< XML::LibXML::NodeList >>>>>> object will be returned. + + +=item nonBlankChildNodes + + @childnodes = $node->nonBlankChildNodes(); + +This is like I<<<<<< childNodes >>>>>>, but returns only non-blank nodes (where a node is blank if it is a Text or +CDATA node consisting of whitespace only). This method is not defined by DOM. + + +=item toString + + $xmlstring = $node->toString($format,$docencoding); + +This method is similar to the method C<<<<<< toString >>>>>> of a L<<<<<< XML::LibXML::Document >>>>>> but for a single node. It returns a string consisting of XML serialization of +the given node and all its descendants. Unlike C<<<<<< XML::LibXML::Document::toString >>>>>>, in this case the resulting string is by default a character string (UTF-8 +encoded with UTF8 flag on). An optional flag $format controls indentation, as +in C<<<<<< XML::LibXML::Document::toString >>>>>>. If the second optional $docencoding flag is true, the result will be a byte +string in the document encoding (see C<<<<<< XML::LibXML::Document::actualEncoding >>>>>>). + + +=item toStringC14N + + $c14nstring = $node->toStringC14N(); + $c14nstring = $node->toStringC14N($with_comments, $xpath_expression , $xpath_context); + +The function is similar to toString(). Instead of simply serializing the +document tree, it transforms it as it is specified in the XML-C14N +Specification (see L<<<<<< http://www.w3.org/TR/xml-c14n >>>>>>). Such transformation is known as canonization. + +If $with_comments is 0 or not defined, the result-document will not contain any +comments that exist in the original document. To include comments into the +canonized document, $with_comments has to be set to 1. + +The parameter $xpath_expression defines the nodeset of nodes that should be +visible in the resulting document. This can be used to filter out some nodes. +One has to note, that only the nodes that are part of the nodeset, will be +included into the result-document. Their child-nodes will not exist in the +resulting document, unless they are part of the nodeset defined by the xpath +expression. + +If $xpath_expression is omitted or empty, toStringC14N() will include all nodes +in the given sub-tree, using the following XPath expressions: with comments + + (. | .//node() | .//@* | .//namespace::*) + +and without comments + + (. | .//node() | .//@* | .//namespace::*)[not(self::comment())] + + + +An optional parameter $xpath_context can be used to pass an L<<<<<< XML::LibXML::XPathContext >>>>>> object defining the context for evaluation of $xpath_expression. This is useful +for mapping namespace prefixes used in the XPath expression to namespace URIs. +Note, however, that $node will be used as the context node for the evaluation, +not the context node of $xpath_context! + + +=item toStringC14N_v1_1 + + $c14nstring = $node->toStringC14N_v1_1(); + $c14nstring = $node->toStringC14N_v1_1($with_comments, $xpath_expression , $xpath_context); + +This function behaves like toStringC14N() except that it uses the +"XML_C14N_1_1" constant for canonicalising using the "C14N 1.1 spec". + + +=item toStringEC14N + + $ec14nstring = $node->toStringEC14N(); + $ec14nstring = $node->toStringEC14N($with_comments, $xpath_expression, $inclusive_prefix_list); + $ec14nstring = $node->toStringEC14N($with_comments, $xpath_expression, $xpath_context, $inclusive_prefix_list); + +The function is similar to toStringC14N() but follows the XML-EXC-C14N +Specification (see L<<<<<< http://www.w3.org/TR/xml-exc-c14n >>>>>>) for exclusive canonization of XML. + +The arguments $with_comments, $xpath_expression, $xpath_context are as in +toStringC14N(). An ARRAY reference can be passed as the last argument +$inclusive_prefix_list, listing namespace prefixes that are to be handled in +the manner described by the Canonical XML Recommendation (i.e. preserved in the +output even if the namespace is not used). C.f. the spec for details. + + +=item serialize + + $str = $doc->serialize($format); + +An alias for toString(). This function was name added to be more consistent +with libxml2. + + +=item serialize_c14n + +An alias for toStringC14N(). + + +=item serialize_exc_c14n + +An alias for toStringEC14N(). + + +=item localname + + $localname = $node->localname; + +Returns the local name of a tag. This is the part behind the colon. + + +=item prefix + + $nameprefix = $node->prefix; + +Returns the prefix of a tag. This is the part before the colon. + + +=item namespaceURI + + $uri = $node->namespaceURI(); + +returns the URI of the current namespace. + + +=item hasAttributes + + $boolean = $node->hasAttributes(); + +returns 1 (TRUE) if the current node has any attributes set, otherwise 0 +(FALSE) is returned. + + +=item attributes + + @attributelist = $node->attributes(); + +This function returns all attributes and namespace declarations assigned to the +given node. + +Because XML::LibXML does not implement namespace declarations and attributes +the same way, it is required to test what kind of node is handled while +accessing the functions result. + +If this function is called in array context the attribute nodes are returned as +an array. In scalar context, the function will return a L<<<<<< XML::LibXML::NamedNodeMap >>>>>> object. + + +=item lookupNamespaceURI + + $URI = $node->lookupNamespaceURI( $prefix ); + +Find a namespace URI by its prefix starting at the current node. + + +=item lookupNamespacePrefix + + $prefix = $node->lookupNamespacePrefix( $URI ); + +Find a namespace prefix by its URI starting at the current node. + +I<<<<<< NOTE >>>>>> Only the namespace URIs are meant to be unique. The prefix is only document +related. Also the document might have more than a single prefix defined for a +namespace. + + +=item normalize + + $node->normalize; + +This function normalizes adjacent text nodes. This function is not as strict as +libxml2's xmlTextMerge() function, since it will not free a node that is still +referenced by the perl layer. + + +=item getNamespaces + + @nslist = $node->getNamespaces; + +If a node has any namespaces defined, this function will return these +namespaces. Note, that this will not return all namespaces that are in scope, +but only the ones declared explicitly for that node. + +Although getNamespaces is available for all nodes, it only makes sense if used +with element nodes. + + +=item removeChildNodes + + $node->removeChildNodes(); + +This function is not specified for any DOM level: It removes all childnodes +from a node in a single step. Other than the libxml2 function itself +(xmlFreeNodeList), this function will not immediately remove the nodes from the +memory. This saves one from getting memory violations, if there are nodes still +referred to from the Perl level. + + +=item baseURI () + + $strURI = $node->baseURI(); + +Searches for the base URL of the node. The method should work on both XML and +HTML documents even if base mechanisms for these are completely different. It +returns the base as defined in RFC 2396 sections "5.1.1. Base URI within +Document Content" and "5.1.2. Base URI from the Encapsulating Entity". However +it does not return the document base (5.1.3), use method C<<<<<< URI >>>>>> of C<<<<<< XML::LibXML::Document >>>>>> for this. + + +=item setBaseURI ($strURI) + + $node->setBaseURI($strURI); + +This method only does something useful for an element node in an XML document. +It sets the xml:base attribute on the node to $strURI, which effectively sets +the base URI of the node to the same value. + +Note: For HTML documents this behaves as if the document was XML which may not +be desired, since it does not effectively set the base URI of the node. See RFC +2396 appendix D for an example of how base URI can be specified in HTML. + + +=item nodePath + + $node->nodePath(); + +This function is not specified for any DOM level: It returns a canonical +structure based XPath for a given node. + + +=item line_number + + $lineno = $node->line_number(); + +This function returns the line number where the tag was found during parsing. +If a node is added to the document the line number is 0. Problems may occur, if +a node from one document is passed to another one. + +IMPORTANT: Due to limitations in the libxml2 library line numbers greater than +65535 will be returned as 65535. Please see L<<<<<< http://bugzilla.gnome.org/show_bug.cgi?id=325533 >>>>>> for more details. + +Note: line_number() is special to XML::LibXML and not part of the DOM +specification. + +If the line_numbers flag of the parser was not activated before parsing, +line_number() will always return 0. + + + +=back + +=head1 AUTHORS + +Matt Sergeant, +Christian Glahn, +Petr Pajas + + +=head1 VERSION + +2.0207 + +=head1 COPYRIGHT + +2001-2007, AxKit.com Ltd. + +2002-2006, Christian Glahn. + +2006-2009, Petr Pajas. + +=cut + + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. + diff --git a/lib/XML/LibXML/NodeList.pm b/lib/XML/LibXML/NodeList.pm new file mode 100644 index 0000000..ca748d9 --- /dev/null +++ b/lib/XML/LibXML/NodeList.pm @@ -0,0 +1,345 @@ +# $Id$ +# +# This is free software, you may use it and distribute it under the same terms as +# Perl itself. +# +# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas +# +# + +package XML::LibXML::NodeList; + +use strict; +use warnings; + +use XML::LibXML::Boolean; +use XML::LibXML::Literal; +use XML::LibXML::Number; + +use vars qw($VERSION); +$VERSION = "2.0207"; # VERSION TEMPLATE: DO NOT CHANGE + +use overload + '""' => \&to_literal, + 'bool' => \&to_boolean, + 'cmp' => sub { + my($aa, $bb, $order) = @_; + return ($order ? ("$bb" cmp "$aa") : ("$aa" cmp "$bb")); + }, + ; + +sub new { + my $class = shift; + bless [@_], $class; +} + +sub new_from_ref { + my ($class,$array_ref,$reuse) = @_; + return bless $reuse ? $array_ref : [@$array_ref], $class; +} + +sub pop { + my $self = CORE::shift; + CORE::pop @$self; +} + +sub push { + my $self = CORE::shift; + CORE::push @$self, @_; +} + +sub append { + my $self = CORE::shift; + my ($nodelist) = @_; + CORE::push @$self, $nodelist->get_nodelist; +} + +sub shift { + my $self = CORE::shift; + CORE::shift @$self; +} + +sub unshift { + my $self = CORE::shift; + CORE::unshift @$self, @_; +} + +sub prepend { + my $self = CORE::shift; + my ($nodelist) = @_; + CORE::unshift @$self, $nodelist->get_nodelist; +} + +sub size { + my $self = CORE::shift; + scalar @$self; +} + +sub get_node { + # uses array index starting at 1, not 0 + # this is mainly because of XPath. + my $self = CORE::shift; + my ($pos) = @_; + $self->[$pos - 1]; +} + +sub item +{ + my ($self, $pos) = @_; + return $self->[$pos]; +} + +sub get_nodelist { + my $self = CORE::shift; + @$self; +} + +sub to_boolean { + my $self = CORE::shift; + return (@$self > 0) ? XML::LibXML::Boolean->True : XML::LibXML::Boolean->False; +} + +# string-value of a nodelist is the string-value of the first node +sub string_value { + my $self = CORE::shift; + return '' unless @$self; + return $self->[0]->string_value; +} + +sub to_literal { + my $self = CORE::shift; + return XML::LibXML::Literal->new( + join('', CORE::grep {defined $_} CORE::map { $_->string_value } @$self) + ); +} + +sub to_literal_delimited { + my $self = CORE::shift; + return XML::LibXML::Literal->new( + join(CORE::shift, CORE::grep {defined $_} CORE::map { $_->string_value } @$self) + ); +} + +sub to_literal_list { + my $self = CORE::shift; + my @nodes = CORE::map{ XML::LibXML::Literal->new($_->string_value())->value() } @{$self}; + + if (wantarray) { + return( @nodes ); + } + return( \@nodes ); +} + +sub to_number { + my $self = CORE::shift; + return XML::LibXML::Number->new( + $self->to_literal + ); +} + +sub iterator { + warn "this function is obsolete!\nIt was disabled in version 1.54\n"; + return undef; +} + +sub map { + my $self = CORE::shift; + my $sub = __is_code(CORE::shift); + local $_; + my @results = CORE::map { @{[ $sub->($_) ]} } @$self; + return unless defined wantarray; + return wantarray ? @results : (ref $self)->new(@results); +} + +sub grep { + my $self = CORE::shift; + my $sub = __is_code(CORE::shift); + local $_; + my @results = CORE::grep { $sub->($_) } @$self; + return unless defined wantarray; + return wantarray ? @results : (ref $self)->new(@results); +} + +sub sort { + my $self = CORE::shift; + my $sub = __is_code(CORE::shift); + my @results = CORE::sort { $sub->($a,$b) } @$self; + return wantarray ? @results : (ref $self)->new(@results); +} + +sub foreach { + my $self = CORE::shift; + my $sub = CORE::shift; + + foreach my $item (@$self) + { + local $_ = $item; + $sub->($item); + } + + return wantarray ? @$self : $self; +} + +sub reverse { + my $self = CORE::shift; + my @results = CORE::reverse @$self; + return wantarray ? @results : (ref $self)->new(@results); +} + +sub reduce { + my $self = CORE::shift; + my $sub = __is_code(CORE::shift); + + my @list = @$self; + CORE::unshift @list, $_[0] if @_; + + my $a = CORE::shift(@list); + foreach my $b (@list) + { + $a = $sub->($a, $b); + } + return $a; +} + +sub __is_code { + my ($code) = @_; + + if (ref $code eq 'CODE') { + return $code; + } + + # There are better ways of doing this, but here I've tried to + # avoid adding any additional external dependencies. + # + if (UNIVERSAL::can($code, 'can') # is blessed (sort of) + and overload::Overloaded($code) # is overloaded + and overload::Method($code, '&{}')) { # overloads '&{}' + return $code; + } + + # The other possibility is that $code is a coderef, but is + # blessed into a class that doesn't overload '&{}'. In which + # case... well, I'm stumped! + + die "Not a subroutine reference\n"; +} + +1; +__END__ + +=head1 NAME + +XML::LibXML::NodeList - a list of XML document nodes + +=head1 DESCRIPTION + +An XML::LibXML::NodeList object contains an ordered list of nodes, as +detailed by the W3C DOM documentation of Node Lists. + +=head1 SYNOPSIS + + my $results = $dom->findnodes('//somepath'); + foreach my $context ($results->get_nodelist) { + my $newresults = $context->findnodes('./other/element'); + ... + } + +=head1 API + +=head2 new(@nodes) + +You will almost never have to create a new NodeList object, as it is all +done for you by XPath. + +=head2 get_nodelist() + +Returns a list of nodes, the contents of the node list, as a perl list. + +=head2 string_value() + +Returns the string-value of the first node in the list. +See the XPath specification for what "string-value" means. + +=head2 to_literal() + +Returns the concatenation of all the string-values of all +the nodes in the list. + +=head2 to_literal_delimited($separator) + +Returns the concatenation of all the string-values of all +the nodes in the list, delimited by the specified separator. + +=head2 to_literal_list() + +Returns all the string-values of all the nodes in the list as +a perl list. + +=head2 get_node($pos) + +Returns the node at $pos. The node position in XPath is based at 1, not 0. + +=head2 size() + +Returns the number of nodes in the NodeList. + +=head2 pop() + +Equivalent to perl's pop function. + +=head2 push(@nodes) + +Equivalent to perl's push function. + +=head2 append($nodelist) + +Given a nodelist, appends the list of nodes in $nodelist to the end of the +current list. + +=head2 shift() + +Equivalent to perl's shift function. + +=head2 unshift(@nodes) + +Equivalent to perl's unshift function. + +=head2 prepend($nodelist) + +Given a nodelist, prepends the list of nodes in $nodelist to the front of +the current list. + +=head2 map($coderef) + +Equivalent to perl's map function. + +=head2 grep($coderef) + +Equivalent to perl's grep function. + +=head2 sort($coderef) + +Equivalent to perl's sort function. + +Caveat: Perl's magic C<$a> and C<$b> variables are not available in +C<$coderef>. Instead the two terms are passed to the coderef as arguments. + +=head2 reverse() + +Equivalent to perl's reverse function. + +=head2 foreach($coderef) + +Inspired by perl's foreach loop. Executes the coderef on each item in +the list. Similar to C, but instead of returning the list of values +returned by $coderef, returns the original NodeList. + +=head2 reduce($coderef, $init) + +Equivalent to List::Util's reduce function. C<$init> is optional and +provides an initial value for the reduction. + +Caveat: Perl's magic C<$a> and C<$b> variables are not available in +C<$coderef>. Instead the two terms are passed to the coderef as arguments. + +=cut diff --git a/lib/XML/LibXML/Number.pm b/lib/XML/LibXML/Number.pm new file mode 100644 index 0000000..cddd4c0 --- /dev/null +++ b/lib/XML/LibXML/Number.pm @@ -0,0 +1,98 @@ +# $Id$ +# +# This is free software, you may use it and distribute it under the same terms as +# Perl itself. +# +# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas +# +# + +package XML::LibXML::Number; +use XML::LibXML::Boolean; +use XML::LibXML::Literal; +use strict; +use warnings; + +use vars qw ($VERSION); +$VERSION = "2.0207"; # VERSION TEMPLATE: DO NOT CHANGE + +use overload + '""' => \&value, + '0+' => \&value, + '<=>' => \&cmp; + +sub new { + my $class = shift; + my $number = shift; + if ($number !~ /^\s*(-\s*)?(\d+(\.\d*)?|\.\d+)\s*$/) { + $number = undef; + } + else { + $number =~ s/\s+//g; + } + bless \$number, $class; +} + +sub as_string { + my $self = shift; + defined $$self ? $$self : 'NaN'; +} + +sub as_xml { + my $self = shift; + return "" . (defined($$self) ? $$self : 'NaN') . "\n"; +} + +sub value { + my $self = shift; + $$self; +} + +sub cmp { + my $self = shift; + my ($other, $swap) = @_; + if ($swap) { + return $other <=> $$self; + } + return $$self <=> $other; +} + +sub evaluate { + my $self = shift; + $self; +} + +sub to_boolean { + my $self = shift; + return $$self ? XML::LibXML::Boolean->True : XML::LibXML::Boolean->False; +} + +sub to_literal { XML::LibXML::Literal->new($_[0]->as_string); } +sub to_number { $_[0]; } + +sub string_value { return $_[0]->value } + +1; +__END__ + +=head1 NAME + +XML::LibXML::Number - Simple numeric values. + +=head1 DESCRIPTION + +This class holds simple numeric values. It doesn't support -0, +/- Infinity, +or NaN, as the XPath spec says it should, but I'm not hurting anyone I don't think. + +=head1 API + +=head2 new($num) + +Creates a new XML::LibXML::Number object, with the value in $num. Does some +rudimentary numeric checking on $num to ensure it actually is a number. + +=head2 value() + +Also as overloaded stringification. Returns the numeric value held. + +=cut diff --git a/lib/XML/LibXML/PI.pod b/lib/XML/LibXML/PI.pod new file mode 100644 index 0000000..99323ff --- /dev/null +++ b/lib/XML/LibXML/PI.pod @@ -0,0 +1,94 @@ +=head1 NAME + +XML::LibXML::PI - XML::LibXML Processing Instructions + +=head1 SYNOPSIS + + + + use XML::LibXML; + # Only methods specific to Processing Instruction nodes are listed here, + # see the XML::LibXML::Node manpage for other methods + + $pinode->setData( $data_string ); + $pinode->setData( name=>string_value [...] ); + +=head1 DESCRIPTION + +Processing instructions are implemented with XML::LibXML with read and write +access. The PI data is the PI without the PI target (as specified in XML 1.0 +[17]) as a string. This string can be accessed with getData as implemented in L<<<<<< XML::LibXML::Node >>>>>>. + +The write access is aware about the fact, that many processing instructions +have attribute like data. Therefore setData() provides besides the DOM spec +conform Interface to pass a set of named parameter. So the code segment + + + + my $pi = $dom->createProcessingInstruction("abc"); + $pi->setData(foo=>'bar', foobar=>'foobar'); + $dom->appendChild( $pi ); + +will result the following PI in the DOM: + + + + + +Which is how it is specified in the DOM specification. This three step +interface creates temporary a node in perl space. This can be avoided while +using the insertProcessingInstruction() method. Instead of the three calls +described above, the call + + + + $dom->insertProcessingInstruction("abc",'foo="bar" foobar="foobar"'); + +will have the same result as above. + +L<<<<<< XML::LibXML::PI >>>>>>'s implementation of setData() documented below differs a bit from the standard +version as available in L<<<<<< XML::LibXML::Node >>>>>>: + +=over 4 + +=item setData + + $pinode->setData( $data_string ); + $pinode->setData( name=>string_value [...] ); + +This method allows one to change the content data of a PI. Additionally to the +interface specified for DOM Level2, the method provides a named parameter +interface to set the data. This parameter list is converted into a string +before it is appended to the PI. + + + +=back + +=head1 AUTHORS + +Matt Sergeant, +Christian Glahn, +Petr Pajas + + +=head1 VERSION + +2.0207 + +=head1 COPYRIGHT + +2001-2007, AxKit.com Ltd. + +2002-2006, Christian Glahn. + +2006-2009, Petr Pajas. + +=cut + + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. + diff --git a/lib/XML/LibXML/Parser.pod b/lib/XML/LibXML/Parser.pod new file mode 100644 index 0000000..034395d --- /dev/null +++ b/lib/XML/LibXML/Parser.pod @@ -0,0 +1,1008 @@ +=head1 NAME + +XML::LibXML::Parser - Parsing XML Data with XML::LibXML + +=head1 SYNOPSIS + + + + use XML::LibXML '1.70'; + + # Parser constructor + + $parser = XML::LibXML->new(); + $parser = XML::LibXML->new(option=>value, ...); + $parser = XML::LibXML->new({option=>value, ...}); + + # Parsing XML + + $dom = XML::LibXML->load_xml( + location => $file_or_url + # parser options ... + ); + $dom = XML::LibXML->load_xml( + string => $xml_string + # parser options ... + ); + $dom = XML::LibXML->load_xml( + string => (\$xml_string) + # parser options ... + ); + $dom = XML::LibXML->load_xml({ + IO => $perl_file_handle + # parser options ... + ); + $dom = $parser->load_xml(...); + + # Parsing HTML + + $dom = XML::LibXML->load_html(...); + $dom = $parser->load_html(...); + + # Parsing well-balanced XML chunks + + $fragment = $parser->parse_balanced_chunk( $wbxmlstring, $encoding ); + + # Processing XInclude + + $parser->process_xincludes( $doc ); + $parser->processXIncludes( $doc ); + + # Old-style parser interfaces + + $doc = $parser->parse_file( $xmlfilename ); + $doc = $parser->parse_fh( $io_fh ); + $doc = $parser->parse_string( $xmlstring); + $doc = $parser->parse_html_file( $htmlfile, \%opts ); + $doc = $parser->parse_html_fh( $io_fh, \%opts ); + $doc = $parser->parse_html_string( $htmlstring, \%opts ); + + # Push parser + + $parser->parse_chunk($string, $terminate); + $parser->init_push(); + $parser->push(@data); + $doc = $parser->finish_push( $recover ); + + # Set/query parser options + + $parser->option_exists($name); + $parser->get_option($name); + $parser->set_option($name,$value); + $parser->set_options({$name=>$value,...}); + + # XML catalogs + + $parser->load_catalog( $catalog_file ); + +=head1 PARSING + +An XML document is read into a data structure such as a DOM tree by a piece of +software, called a parser. XML::LibXML currently provides four different parser +interfaces: + + +=over 4 + +=item * + +A DOM Pull-Parser + + + +=item * + +A DOM Push-Parser + + + +=item * + +A SAX Parser + + + +=item * + +A DOM based SAX Parser. + + + +=back + + +=head2 Creating a Parser Instance + +XML::LibXML provides an OO interface to the libxml2 parser functions. Thus you +have to create a parser instance before you can parse any XML data. + +=over 4 + +=item new + + + $parser = XML::LibXML->new(); + $parser = XML::LibXML->new(option=>value, ...); + $parser = XML::LibXML->new({option=>value, ...}); + +Create a new XML and HTML parser instance. Each parser instance holds default +values for various parser options. Optionally, one can pass a hash reference or +a list of option => value pairs to set a different default set of options. +Unless specified otherwise, the options C<<<<<< load_ext_dtd >>>>>>, and C<<<<<< expand_entities >>>>>> are set to 1. See L<<<<<< Parser Options >>>>>> for a list of libxml2 parser's options. + + + +=back + + +=head2 DOM Parser + +One of the common parser interfaces of XML::LibXML is the DOM parser. This +parser reads XML data into a DOM like data structure, so each tag can get +accessed and transformed. + +XML::LibXML's DOM parser is not only capable to parse XML data, but also +(strict) HTML files. There are three ways to parse documents - as a string, as +a Perl filehandle, or as a filename/URL. The return value from each is a L<<<<<< XML::LibXML::Document >>>>>> object, which is a DOM object. + +All of the functions listed below will throw an exception if the document is +invalid. To prevent this causing your program exiting, wrap the call in an +eval{} block + +=over 4 + +=item load_xml + + + $dom = XML::LibXML->load_xml( + location => $file_or_url + # parser options ... + ); + $dom = XML::LibXML->load_xml( + string => $xml_string + # parser options ... + ); + $dom = XML::LibXML->load_xml( + string => (\$xml_string) + # parser options ... + ); + $dom = XML::LibXML->load_xml({ + IO => $perl_file_handle + # parser options ... + ); + $dom = $parser->load_xml(...); + + +This function is available since XML::LibXML 1.70. It provides easy to use +interface to the XML parser that parses given file (or non-HTTPS URL), string, +or input stream to a DOM tree. The arguments can be passed in a HASH reference +or as name => value pairs. The function can be called as a class method or an +object method. In both cases it internally creates a new parser instance +passing the specified parser options; if called as an object method, it clones +the original parser (preserving its settings) and additionally applies the +specified options to the new parser. See the constructor C<<<<<< new >>>>>> and L<<<<<< Parser Options >>>>>> for more information. + +Note that, due to a limitation in the underlying libxml2 library, this call +does not recognize HTTPS-based URLs. (It will treat an HTTPS URL as a filename, +likely throwing a "No such file or directory" exception.) + + +=item load_html + + + $dom = XML::LibXML->load_html(...); + $dom = $parser->load_html(...); + + +This function is available since XML::LibXML 1.70. It has the same usage as C<<<<<< load_xml >>>>>>, providing interface to the HTML parser. See C<<<<<< load_xml >>>>>> for more information. + + + +=back + +Parsing HTML may cause problems, especially if the ampersand ('&') is used. +This is a common problem if HTML code is parsed that contains links to +CGI-scripts. Such links cause the parser to throw errors. In such cases libxml2 +still parses the entire document as there was no error, but the error causes +XML::LibXML to stop the parsing process. However, the document is not lost. +Such HTML documents should be parsed using the I<<<<<< recover >>>>>> flag. By default recovering is deactivated. + +The functions described above are implemented to parse well formed documents. +In some cases a program gets well balanced XML instead of well formed documents +(e.g. an XML fragment from a database). With XML::LibXML it is not required to +wrap such fragments in the code, because XML::LibXML is capable even to parse +well balanced XML fragments. + +=over 4 + +=item parse_balanced_chunk + + $fragment = $parser->parse_balanced_chunk( $wbxmlstring, $encoding ); + +This function parses a well balanced XML string into a L<<<<<< XML::LibXML::DocumentFragment >>>>>>. The first arguments contains the input string, the optional second argument +can be used to specify character encoding of the input (UTF-8 is assumed by +default). + + +=item parse_xml_chunk + +This is the old name of parse_balanced_chunk(). Because it may causes confusion +with the push parser interface, this function should not be used anymore. + + + +=back + +By default XML::LibXML does not process XInclude tags within an XML Document +(see options section below). XML::LibXML allows one to post-process a document +to expand XInclude tags. + +=over 4 + +=item process_xincludes + + $parser->process_xincludes( $doc ); + +After a document is parsed into a DOM structure, you may want to expand the +documents XInclude tags. This function processes the given document structure +and expands all XInclude tags (or throws an error) by using the flags and +callbacks of the given parser instance. + +Note that the resulting Tree contains some extra nodes (of type +XML_XINCLUDE_START and XML_XINCLUDE_END) after successfully processing the +document. These nodes indicate where data was included into the original tree. +if the document is serialized, these extra nodes will not show up. + +Remember: A Document with processed XIncludes differs from the original +document after serialization, because the original XInclude tags will not get +restored! + +If the parser flag "expand_xincludes" is set to 1, you need not to post process +the parsed document. + + +=item processXIncludes + + $parser->processXIncludes( $doc ); + +This is an alias to process_xincludes, but through a JAVA like function name. + + +=item parse_file + + $doc = $parser->parse_file( $xmlfilename ); + +This function parses an XML document from a file or network; $xmlfilename can +be either a filename or a (non-HTTPS) URL. Note that for parsing files, this +function is the fastest choice, about 6-8 times faster then parse_fh(). + + +=item parse_fh + + $doc = $parser->parse_fh( $io_fh ); + +parse_fh() parses a IOREF or a subclass of IO::Handle. + +Because the data comes from an open handle, libxml2's parser does not know +about the base URI of the document. To set the base URI one should use +parse_fh() as follows: + + + + my $doc = $parser->parse_fh( $io_fh, $baseuri ); + + +=item parse_string + + $doc = $parser->parse_string( $xmlstring); + +This function is similar to parse_fh(), but it parses an XML document that is +available as a single string in memory, or alternatively as a reference to a +scalar containing a string. Again, you can pass an optional base URI to the +function. + + + + my $doc = $parser->parse_string( $xmlstring, $baseuri ); + my $doc = $parser->parse_string(\$xmlstring, $baseuri); + + +=item parse_html_file + + $doc = $parser->parse_html_file( $htmlfile, \%opts ); + +Similar to parse_file() but parses HTML (strict) documents; $htmlfile can be +filename or (non-HTTPS) URL. + +An optional second argument can be used to pass some options to the HTML parser +as a HASH reference. See options labeled with HTML in L<<<<<< Parser Options >>>>>>. + + +=item parse_html_fh + + $doc = $parser->parse_html_fh( $io_fh, \%opts ); + +Similar to parse_fh() but parses HTML (strict) streams. + +An optional second argument can be used to pass some options to the HTML parser +as a HASH reference. See options labeled with HTML in L<<<<<< Parser Options >>>>>>. + +Note: encoding option may not work correctly with this function in libxml2 < +2.6.27 if the HTML file declares charset using a META tag. + + +=item parse_html_string + + $doc = $parser->parse_html_string( $htmlstring, \%opts ); + +Similar to parse_string() but parses HTML (strict) strings. + +An optional second argument can be used to pass some options to the HTML parser +as a HASH reference. See options labeled with HTML in L<<<<<< Parser Options >>>>>>. + + + +=back + + +=head2 Push Parser + +XML::LibXML provides a push parser interface. Rather than pulling the data from +a given source the push parser waits for the data to be pushed into it. + +This allows one to parse large documents without waiting for the parser to +finish. The interface is especially useful if a program needs to pre-process +the incoming pieces of XML (e.g. to detect document boundaries). + +While XML::LibXML parse_*() functions force the data to be a well-formed XML, +the push parser will take any arbitrary string that contains some XML data. The +only requirement is that all the pushed strings are together a well formed +document. With the push parser interface a program can interrupt the parsing +process as required, where the parse_*() functions give not enough flexibility. + +Different to the pull parser implemented in parse_fh() or parse_file(), the +push parser is not able to find out about the documents end itself. Thus the +calling program needs to indicate explicitly when the parsing is done. + +In XML::LibXML this is done by a single function: + +=over 4 + +=item parse_chunk + + $parser->parse_chunk($string, $terminate); + +parse_chunk() tries to parse a given chunk of data, which isn't necessarily +well balanced data. The function takes two parameters: The chunk of data as a +string and optional a termination flag. If the termination flag is set to a +true value (e.g. 1), the parsing will be stopped and the resulting document +will be returned as the following example describes: + + + + my $parser = XML::LibXML->new; + for my $string ( "<", "foo", ' bar="hello world"', "/>") { + $parser->parse_chunk( $string ); + } + my $doc = $parser->parse_chunk("", 1); # terminate the parsing + + + +=back + +Internally XML::LibXML provides three functions that control the push parser +process: + +=over 4 + +=item init_push + + $parser->init_push(); + +Initializes the push parser. + + +=item push + + $parser->push(@data); + +This function pushes the data stored inside the array to libxml2's parser. Each +entry in @data must be a normal scalar! This method can be called repeatedly. + + +=item finish_push + + $doc = $parser->finish_push( $recover ); + +This function returns the result of the parsing process. If this function is +called without a parameter it will complain about non well-formed documents. If +$restore is 1, the push parser can be used to restore broken or non well formed +(XML) documents as the following example shows: + + + + eval { + $parser->push( "", "bar" ); + $doc = $parser->finish_push(); # will report broken XML + }; + if ( $@ ) { + # ... + } + +This can be annoying if the closing tag is missed by accident. The following +code will restore the document: + + + + eval { + $parser->push( "", "bar" ); + $doc = $parser->finish_push(1); # will return the data parsed + # unless an error happened + }; + + print $doc->toString(); # returns "bar" + +Of course finish_push() will return nothing if there was no data pushed to the +parser before. + + + +=back + + +=head2 Pull Parser (Reader) + +XML::LibXML also provides a pull-parser interface similar to the XmlReader +interface in .NET. This interface is almost streaming, and is usually faster +and simpler to use than SAX. See L<<<<<< XML::LibXML::Reader >>>>>>. + + +=head2 Direct SAX Parser + +XML::LibXML provides a direct SAX parser in the L<<<<<< XML::LibXML::SAX >>>>>> module. + + +=head2 DOM based SAX Parser + +XML::LibXML also provides a DOM based SAX parser. The SAX parser is defined in +the module XML::LibXML::SAX::Parser. As it is not a stream based parser, it +parses documents into a DOM and traverses the DOM tree instead. + +The API of this parser is exactly the same as any other Perl SAX2 parser. See +XML::SAX::Intro for details. + +Aside from the regular parsing methods, you can access the DOM tree traverser +directly, using the generate() method: + + + + my $doc = build_yourself_a_document(); + my $saxparser = $XML::LibXML::SAX::Parser->new( ... ); + $parser->generate( $doc ); + +This is useful for serializing DOM trees, for example that you might have done +prior processing on, or that you have as a result of XSLT processing. + +I<<<<<< WARNING >>>>>> + +This is NOT a streaming SAX parser. As I said above, this parser reads the +entire document into a DOM and serialises it. Some people couldn't read that in +the paragraph above so I've added this warning. If you want a streaming SAX +parser look at the L<<<<<< XML::LibXML::SAX >>>>>> man page + + +=head1 SERIALIZATION + +XML::LibXML provides some functions to serialize nodes and documents. The +serialization functions are described on the L<<<<<< XML::LibXML::Node >>>>>> manpage or the L<<<<<< XML::LibXML::Document >>>>>> manpage. XML::LibXML checks three global flags that alter the serialization +process: + + +=over 4 + +=item * + +skipXMLDeclaration + + + +=item * + +skipDTD + + + +=item * + +setTagCompression + + + +=back + +of that three functions only setTagCompression is available for all +serialization functions. + +Because XML::LibXML does these flags not itself, one has to define them locally +as the following example shows: + + + + local $XML::LibXML::skipXMLDeclaration = 1; + local $XML::LibXML::skipDTD = 1; + local $XML::LibXML::setTagCompression = 1; + +If skipXMLDeclaration is defined and not '0', the XML declaration is omitted +during serialization. + +If skipDTD is defined and not '0', an existing DTD would not be serialized with +the document. + +If setTagCompression is defined and not '0' empty tags are displayed as open +and closing tags rather than the shortcut. For example the empty tag I<<<<<< foo >>>>>> will be rendered as I<<<<<< EfooEE/fooE >>>>>> rather than I<<<<<< Efoo/E >>>>>>. + + +=head1 PARSER OPTIONS + +Handling of libxml2 parser options has been unified and improved in XML::LibXML +1.70. You can now set default options for a particular parser instance by +passing them to the constructor as C<<<<<< XML::LibXML-Enew({name=Evalue, ...}) >>>>>> or C<<<<<< XML::LibXML-Enew(name=Evalue,...) >>>>>>. The options can be queried and changed using the following methods (pre-1.70 +interfaces such as C<<<<<< $parser-Eload_ext_dtd(0) >>>>>> also exist, see below): + +=over 4 + +=item option_exists + + $parser->option_exists($name); + +Returns 1 if the current XML::LibXML version supports the option C<<<<<< $name >>>>>>, otherwise returns 0 (note that this does not necessarily mean that the option +is supported by the underlying libxml2 library). + + +=item get_option + + $parser->get_option($name); + +Returns the current value of the parser option C<<<<<< $name >>>>>>. + + +=item set_option + + $parser->set_option($name,$value); + +Sets option C<<<<<< $name >>>>>> to value C<<<<<< $value >>>>>>. + + +=item set_options + + $parser->set_options({$name=>$value,...}); + +Sets multiple parsing options at once. + + + +=back + +IMPORTANT NOTE: This documentation reflects the parser flags available in +libxml2 2.7.3. Some options have no effect if an older version of libxml2 is +used. + +Each of the flags listed below is labeled + +=over 4 + +=item /parser/ + +if it can be used with a C<<<<<< XML::LibXML >>>>>> parser object (i.e. passed to C<<<<<< XML::LibXML-Enew >>>>>>, C<<<<<< XML::LibXML-Eset_option >>>>>>, etc.) + + +=item /html/ + +if it can be used passed to the C<<<<<< parse_html_* >>>>>> methods + + +=item /reader/ + +if it can be used with the C<<<<<< XML::LibXML::Reader >>>>>>. + + + +=back + +Unless specified otherwise, the default for boolean valued options is 0 +(false). + +The available options are: + +=over 4 + +=item URI + +/parser, html, reader/ + +In case of parsing strings or file handles, XML::LibXML doesn't know about the +base uri of the document. To make relative references such as XIncludes work, +one has to set a base URI, that is then used for the parsed document. + + +=item line_numbers + +/parser, html, reader/ + +If this option is activated, libxml2 will store the line number of each element +node in the parsed document. The line number can be obtained using the C<<<<<< line_number() >>>>>> method of the C<<<<<< XML::LibXML::Node >>>>>> class (for non-element nodes this may report the line number of the containing +element). The line numbers are also used for reporting positions of validation +errors. + +IMPORTANT: Due to limitations in the libxml2 library line numbers greater than +65535 will be returned as 65535. Unfortunately, this is a long and sad story, +please see L<<<<<< http://bugzilla.gnome.org/show_bug.cgi?id=325533 >>>>>> for more details. + + +=item encoding + +/html/ + +character encoding of the input + + +=item recover + +/parser, html, reader/ + +recover from errors; possible values are 0, 1, and 2 + +A true value turns on recovery mode which allows one to parse broken XML or +HTML data. The recovery mode allows the parser to return the successfully +parsed portion of the input document. This is useful for almost well-formed +documents, where for example a closing tag is missing somewhere. Still, +XML::LibXML will only parse until the first fatal (non-recoverable) error +occurs, reporting recoverable parsing errors as warnings. To suppress even +these warnings, use recover=>2. + +Note that validation is switched off automatically in recovery mode. + + +=item expand_entities + +/parser, reader/ + +substitute entities; possible values are 0 and 1; default is 1 + +Note that although this flag disables entity substitution, it does not prevent +the parser from loading external entities; when substitution of an external +entity is disabled, the entity will be represented in the document tree by an +XML_ENTITY_REF_NODE node whose subtree will be the content obtained by parsing +the external resource; Although this nesting is visible from the DOM it is +transparent to XPath data model, so it is possible to match nodes in an +unexpanded entity by the same XPath expression as if the entity were expanded. +See also ext_ent_handler. + + +=item ext_ent_handler + +/parser/ + +Provide a custom external entity handler to be used when expand_entities is set +to 1. Possible value is a subroutine reference. + +This feature does not work properly in libxml2 < 2.6.27! + +The subroutine provided is called whenever the parser needs to retrieve the +content of an external entity. It is called with two arguments: the system ID +(URI) and the public ID. The value returned by the subroutine is parsed as the +content of the entity. + +This method can be used to completely disable entity loading, e.g. to prevent +exploits of the type described at (L<<<<<< http://searchsecuritychannel.techtarget.com/generic/0,295582,sid97_gci1304703,00.html >>>>>>), where a service is tricked to expose its private data by letting it parse a +remote file (RSS feed) that contains an entity reference to a local file (e.g. C<<<<<< /etc/fstab >>>>>>). + +A more granular solution to this problem, however, is provided by custom URL +resolvers, as in + + my $c = XML::LibXML::InputCallback->new(); + sub match { # accept file:/ URIs except for XML catalogs in /etc/xml/ + my ($uri) = @_; + return ($uri=~m{^file:/} + and $uri !~ m{^file:///etc/xml/}) + ? 1 : 0; + } + $c->register_callbacks([ \&match, sub{}, sub{}, sub{} ]); + $parser->input_callbacks($c); + + + + +=item load_ext_dtd + +/parser, reader/ + +load the external DTD subset while parsing; possible values are 0 and 1. Unless +specified, XML::LibXML sets this option to 1. + +This flag is also required for DTD Validation, to provide complete attribute, +and to expand entities, regardless if the document has an internal subset. Thus +switching off external DTD loading, will disable entity expansion, validation, +and complete attributes on internal subsets as well. + + +=item complete_attributes + +/parser, reader/ + +create default DTD attributes; possible values are 0 and 1 + + +=item validation + +/parser, reader/ + +validate with the DTD; possible values are 0 and 1 + + +=item suppress_errors + +/parser, html, reader/ + +suppress error reports; possible values are 0 and 1 + + +=item suppress_warnings + +/parser, html, reader/ + +suppress warning reports; possible values are 0 and 1 + + +=item pedantic_parser + +/parser, html, reader/ + +pedantic error reporting; possible values are 0 and 1 + + +=item no_blanks + +/parser, html, reader/ + +remove blank nodes; possible values are 0 and 1 + + +=item no_defdtd + +/html/ + +do not add a default DOCTYPE; possible values are 0 and 1 + +the default is (0) to add a DTD when the input html lacks one + + +=item expand_xinclude or xinclude + +/parser, reader/ + +Implement XInclude substitution; possible values are 0 and 1 + +Expands XInclude tags immediately while parsing the document. Note that the +parser will use the URI resolvers installed via C<<<<<< XML::LibXML::InputCallback >>>>>> to parse the included document (if any). + + +=item no_xinclude_nodes + +/parser, reader/ + +do not generate XINCLUDE START/END nodes; possible values are 0 and 1 + + +=item no_network + +/parser, html, reader/ + +Forbid network access; possible values are 0 and 1 + +If set to true, all attempts to fetch non-local resources (such as DTD or +external entities) will fail (unless custom callbacks are defined). + +It may be necessary to use the flag C<<<<<< recover >>>>>> for processing documents requiring such resources while networking is off. + + +=item clean_namespaces + +/parser, reader/ + +remove redundant namespaces declarations during parsing; possible values are 0 +and 1. + + +=item no_cdata + +/parser, html, reader/ + +merge CDATA as text nodes; possible values are 0 and 1 + + +=item no_basefix + +/parser, reader/ + +not fixup XINCLUDE xml#base URIS; possible values are 0 and 1 + + +=item huge + +/parser, html, reader/ + +relax any hardcoded limit from the parser; possible values are 0 and 1. Unless +specified, XML::LibXML sets this option to 0. + +Note: the default value for this option was changed to protect against denial +of service through entity expansion attacks. Before enabling the option ensure +you have taken alternative measures to protect your application against this +type of attack. + + +=item gdome + +/parser/ + +THIS OPTION IS EXPERIMENTAL! + +Although quite powerful, XML::LibXML's DOM implementation is incomplete with +respect to the DOM level 2 or level 3 specifications. XML::GDOME is based on +libxml2 as well, and provides a rather complete DOM implementation by wrapping +libgdome. This flag allows you to make use of XML::LibXML's full parser options +and XML::GDOME's DOM implementation at the same time. + +To make use of this function, one has to install libgdome and configure +XML::LibXML to use this library. For this you need to rebuild XML::LibXML! + +Note: this feature was not seriously tested in recent XML::LibXML releases. + + + +=back + +For compatibility with XML::LibXML versions prior to 1.70, the following +methods are also supported for querying and setting the corresponding parser +options (if called without arguments, the methods return the current value of +the corresponding parser options; with an argument sets the option to a given +value): + + + + $parser->validation(); + $parser->recover(); + $parser->pedantic_parser(); + $parser->line_numbers(); + $parser->load_ext_dtd(); + $parser->complete_attributes(); + $parser->expand_xinclude(); + $parser->gdome_dom(); + $parser->clean_namespaces(); + $parser->no_network(); + +The following obsolete methods trigger parser options in some special way: + +=over 4 + +=item recover_silently + + + + $parser->recover_silently(1); + +If called without an argument, returns true if the current value of the C<<<<<< recover >>>>>> parser option is 2 and returns false otherwise. With a true argument sets the C<<<<<< recover >>>>>> parser option to 2; with a false argument sets the C<<<<<< recover >>>>>> parser option to 0. + + +=item expand_entities + + + + $parser->expand_entities(0); + +Get/set the C<<<<<< expand_entities >>>>>> option. If called with a true argument, also turns the C<<<<<< load_ext_dtd >>>>>> option to 1. + + +=item keep_blanks + + + + $parser->keep_blanks(0); + +This is actually the opposite of the C<<<<<< no_blanks >>>>>> parser option. If used without an argument retrieves negated value of C<<<<<< no_blanks >>>>>>. If used with an argument sets C<<<<<< no_blanks >>>>>> to the opposite value. + + +=item base_uri + + + + $parser->base_uri( $your_base_uri ); + +Get/set the C<<<<<< URI >>>>>> option. + + + +=back + + +=head1 XML CATALOGS + +C<<<<<< libxml2 >>>>>> supports XML catalogs. Catalogs are used to map remote resources to their local +copies. Using catalogs can speed up parsing processes if many external +resources from remote addresses are loaded into the parsed documents (such as +DTDs or XIncludes). + +Note that libxml2 has a global pool of loaded catalogs, so if you apply the +method C<<<<<< load_catalog >>>>>> to one parser instance, all parser instances will start using the catalog (in +addition to other previously loaded catalogs). + +Note also that catalogs are not used when a custom external entity handler is +specified. At the current state it is not possible to make use of both types of +resolving systems at the same time. + +=over 4 + +=item load_catalog + + $parser->load_catalog( $catalog_file ); + +Loads the XML catalog file $catalog_file. + + + + # Global external entity loader (similar to ext_ent_handler option + # but this works really globally, also in XML::LibXSLT include etc..) + + XML::LibXML::externalEntityLoader(\&my_loader); + + + +=back + + +=head1 ERROR REPORTING + +XML::LibXML throws exceptions during parsing, validation or XPath processing +(and some other occasions). These errors can be caught by using I<<<<<< eval >>>>>> blocks. The error is stored in I<<<<<< $@ >>>>>>. There are two implementations: the old one throws $@ which is just a message +string, in the new one $@ is an object from the class XML::LibXML::Error; this +class overrides the operator "" so that when printed, the object flattens to +the usual error message. + +XML::LibXML throws errors as they occur. This is a very common misunderstanding +in the use of XML::LibXML. If the eval is omitted, XML::LibXML will always halt +your script by "croaking" (see Carp man page for details). + +Also note that an increasing number of functions throw errors if bad data is +passed as arguments. If you cannot assure valid data passed to XML::LibXML you +should eval these functions. + +Note: since version 1.59, get_last_error() is no longer available in +XML::LibXML for thread-safety reasons. + +=head1 AUTHORS + +Matt Sergeant, +Christian Glahn, +Petr Pajas + + +=head1 VERSION + +2.0207 + +=head1 COPYRIGHT + +2001-2007, AxKit.com Ltd. + +2002-2006, Christian Glahn. + +2006-2009, Petr Pajas. + +=cut + + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. + diff --git a/lib/XML/LibXML/Pattern.pod b/lib/XML/LibXML/Pattern.pod new file mode 100644 index 0000000..3cf4d72 --- /dev/null +++ b/lib/XML/LibXML/Pattern.pod @@ -0,0 +1,114 @@ +=head1 NAME + +XML::LibXML::Pattern - XML::LibXML::Pattern - interface to libxml2 XPath patterns + +=head1 SYNOPSIS + + + + use XML::LibXML; + my $pattern = XML::LibXML::Pattern->new('/x:html/x:body//x:div', { 'x' => 'http://www.w3.org/1999/xhtml' }); + # test a match on an XML::LibXML::Node $node + + if ($pattern->matchesNode($node)) { ... } + + # or on an XML::LibXML::Reader + + if ($reader->matchesPattern($pattern)) { ... } + + # or skip reading all nodes that do not match + + print $reader->nodePath while $reader->nextPatternMatch($pattern); + + $pattern = XML::LibXML::Pattern->new( pattern, { prefix => namespace_URI, ... } ); + $bool = $pattern->matchesNode($node); + +=head1 DESCRIPTION + +This is a perl interface to libxml2's pattern matching support I<<<<<< http://xmlsoft.org/html/libxml-pattern.html >>>>>>. This feature requires recent versions of libxml2. + +Patterns are a small subset of XPath language, which is limited to +(disjunctions of) location paths involving the child and descendant axes in +abbreviated form as described by the extended BNF given below: + + + + Selector ::= Path ( '|' Path )* + Path ::= ('.//' | '//' | '/' )? Step ( '/' Step )* + Step ::= '.' | NameTest + NameTest ::= QName | '*' | NCName ':' '*' + +For readability, whitespace may be used in selector XPath expressions even +though not explicitly allowed by the grammar: whitespace may be freely added +within patterns before or after any token, where + + + + token ::= '.' | '/' | '//' | '|' | NameTest + +Note that no predicates or attribute tests are allowed. + +Patterns are particularly useful for stream parsing provided via the C<<<<<< XML::LibXML::Reader >>>>>> interface. + +=over 4 + +=item new() + + $pattern = XML::LibXML::Pattern->new( pattern, { prefix => namespace_URI, ... } ); + +The constructor of a pattern takes a pattern expression (as described by the +BNF grammar above) and an optional HASH reference mapping prefixes to namespace +URIs. The method returns a compiled pattern object. + +Note that if the document has a default namespace, it must still be given an +prefix in order to be matched (as demanded by the XPath 1.0 specification). For +example, to match an element C<<<<<< Ea xmlns="http://foo.bar"E/aE >>>>>>, one should use a pattern like this: + + + + $pattern = XML::LibXML::Pattern->new( 'foo:a', { foo => 'http://foo.bar' }); + + +=item matchesNode($node) + + $bool = $pattern->matchesNode($node); + +Given an XML::LibXML::Node object, returns a true value if the node is matched +by the compiled pattern expression. + + + +=back + + +=head1 SEE ALSO + +L<<<<<< XML::LibXML::Reader >>>>>> for other methods involving compiled patterns. + +=head1 AUTHORS + +Matt Sergeant, +Christian Glahn, +Petr Pajas + + +=head1 VERSION + +2.0207 + +=head1 COPYRIGHT + +2001-2007, AxKit.com Ltd. + +2002-2006, Christian Glahn. + +2006-2009, Petr Pajas. + +=cut + + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. + diff --git a/lib/XML/LibXML/Reader.pm b/lib/XML/LibXML/Reader.pm new file mode 100644 index 0000000..40f093c --- /dev/null +++ b/lib/XML/LibXML/Reader.pm @@ -0,0 +1,216 @@ +# $Id: Reader.pm,v 1.1.2.1 2004/04/20 20:09:48 pajas Exp $ +# +# This is free software, you may use it and distribute it under the same terms as +# Perl itself. +# +# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas +# +# +package XML::LibXML::Reader; + +use XML::LibXML; +use Carp; +use strict; +use warnings; + +use vars qw ($VERSION); +$VERSION = "2.0207"; # VERSION TEMPLATE: DO NOT CHANGE + +use 5.008_000; + +BEGIN { + UNIVERSAL::can('XML::LibXML::Reader','_newForFile') or + croak("Cannot use XML::LibXML::Reader module - ". + "your libxml2 is compiled without reader support!"); +} + +use base qw(Exporter); +use constant { + XML_READER_TYPE_NONE => 0, + XML_READER_TYPE_ELEMENT => 1, + XML_READER_TYPE_ATTRIBUTE => 2, + XML_READER_TYPE_TEXT => 3, + XML_READER_TYPE_CDATA => 4, + XML_READER_TYPE_ENTITY_REFERENCE => 5, + XML_READER_TYPE_ENTITY => 6, + XML_READER_TYPE_PROCESSING_INSTRUCTION => 7, + XML_READER_TYPE_COMMENT => 8, + XML_READER_TYPE_DOCUMENT => 9, + XML_READER_TYPE_DOCUMENT_TYPE => 10, + XML_READER_TYPE_DOCUMENT_FRAGMENT => 11, + XML_READER_TYPE_NOTATION => 12, + XML_READER_TYPE_WHITESPACE => 13, + XML_READER_TYPE_SIGNIFICANT_WHITESPACE => 14, + XML_READER_TYPE_END_ELEMENT => 15, + XML_READER_TYPE_END_ENTITY => 16, + XML_READER_TYPE_XML_DECLARATION => 17, + + XML_READER_NONE => -1, + XML_READER_START => 0, + XML_READER_ELEMENT => 1, + XML_READER_END => 2, + XML_READER_EMPTY => 3, + XML_READER_BACKTRACK => 4, + XML_READER_DONE => 5, + XML_READER_ERROR => 6 +}; +use vars qw( @EXPORT @EXPORT_OK %EXPORT_TAGS ); + +sub CLONE_SKIP { 1 } + +BEGIN { + +%EXPORT_TAGS = ( + types => + [qw( + XML_READER_TYPE_NONE + XML_READER_TYPE_ELEMENT + XML_READER_TYPE_ATTRIBUTE + XML_READER_TYPE_TEXT + XML_READER_TYPE_CDATA + XML_READER_TYPE_ENTITY_REFERENCE + XML_READER_TYPE_ENTITY + XML_READER_TYPE_PROCESSING_INSTRUCTION + XML_READER_TYPE_COMMENT + XML_READER_TYPE_DOCUMENT + XML_READER_TYPE_DOCUMENT_TYPE + XML_READER_TYPE_DOCUMENT_FRAGMENT + XML_READER_TYPE_NOTATION + XML_READER_TYPE_WHITESPACE + XML_READER_TYPE_SIGNIFICANT_WHITESPACE + XML_READER_TYPE_END_ELEMENT + XML_READER_TYPE_END_ENTITY + XML_READER_TYPE_XML_DECLARATION + )], + states => + [qw( + XML_READER_NONE + XML_READER_START + XML_READER_ELEMENT + XML_READER_END + XML_READER_EMPTY + XML_READER_BACKTRACK + XML_READER_DONE + XML_READER_ERROR + )] +); +@EXPORT = (@{$EXPORT_TAGS{types}},@{$EXPORT_TAGS{states}}); +@EXPORT_OK = @EXPORT; +$EXPORT_TAGS{all}=\@EXPORT_OK; +} + +our %_preserve_flag; + +{ + my %props = ( + load_ext_dtd => 1, # load the external subset + complete_attributes => 2, # default DTD attributes + validation => 3, # validate with the DTD + expand_entities => 4, # substitute entities + ); + sub getParserProp { + my ($self, $name) = @_; + my $prop = $props{$name}; + return undef unless defined $prop; + return $self->_getParserProp($prop); + } + sub setParserProp { + my $self = shift; + my %args = map { ref($_) eq 'HASH' ? (%$_) : $_ } @_; + my ($key, $value); + while (($key,$value) = each %args) { + my $prop = $props{ $key }; + $self->_setParserProp($prop,$value); + } + return; + } + + my (%string_pool,%rng_pool,%xsd_pool); # used to preserve data passed to the reader + sub new { + my ($class) = shift; + my %args = map { ref($_) eq 'HASH' ? (%$_) : $_ } @_; + my $encoding = $args{encoding}; + my $URI = $args{URI}; + $URI="$URI" if defined $URI; # stringify in case it is an URI object + my $options = XML::LibXML->_parser_options(\%args); + + my $self = undef; + if ( defined $args{location} ) { + $self = $class->_newForFile( $args{location}, $encoding, $options ); + } + elsif ( defined $args{string} ) { + $self = $class->_newForString( $args{string}, $URI, $encoding, $options ); + if (defined($self)) { + $string_pool{$self} = \$args{string}; + } + } + elsif ( defined $args{IO} ) { + $self = $class->_newForIO( $args{IO}, $URI, $encoding, $options ); + } + elsif ( defined $args{DOM} ) { + croak("DOM must be a XML::LibXML::Document node") + unless UNIVERSAL::isa($args{DOM}, 'XML::LibXML::Document'); + $self = $class->_newForDOM( $args{DOM} ); + } + elsif ( defined $args{FD} ) { + my $fd = fileno($args{FD}); + $self = $class->_newForFd( $fd, $URI, $encoding, $options ); + } + else { + croak("XML::LibXML::Reader->new: specify location, string, IO, DOM, or FD"); + } + if ($args{RelaxNG}) { + if (ref($args{RelaxNG})) { + $rng_pool{$self} = \$args{RelaxNG}; + $self->_setRelaxNG($args{RelaxNG}); + } else { + $self->_setRelaxNGFile($args{RelaxNG}); + } + } + if ($args{Schema}) { + if (ref($args{Schema})) { + $xsd_pool{$self} = \$args{Schema}; + $self->_setXSD($args{Schema}); + } else { + $self->_setXSDFile($args{Schema}); + } + } + return $self; + } + sub DESTROY { + my $self = shift; + delete $string_pool{$self}; + delete $rng_pool{$self}; + delete $xsd_pool{$self}; + $self->_DESTROY; + } +} +sub close { + my ($reader) = @_; + # _close return -1 on failure, 0 on success + # perl close returns 0 on failure, 1 on success + return $reader->_close == 0 ? 1 : 0; +} + +sub preservePattern { + my $reader=shift; + my ($pattern,$ns_map)=@_; + if (ref($ns_map) eq 'HASH') { + # translate prefix=>URL hash to a (URL,prefix) list + $reader->_preservePattern($pattern,[reverse %$ns_map]); + } else { + $reader->_preservePattern(@_); + } +} + +sub nodePath { + my $reader=shift; + my $path = $reader->_nodePath; + $path=~s/\[\d+\]//g; # make /foo[1]/bar[1] just /foo/bar, since + # sibling count in the buffered fragment is + # basically random and generally misleading + return $path; +} + +1; +__END__ diff --git a/lib/XML/LibXML/Reader.pod b/lib/XML/LibXML/Reader.pod new file mode 100644 index 0000000..094e852 --- /dev/null +++ b/lib/XML/LibXML/Reader.pod @@ -0,0 +1,677 @@ +=head1 NAME + +XML::LibXML::Reader - XML::LibXML::Reader - interface to libxml2 pull parser + +=head1 SYNOPSIS + + + + use XML::LibXML::Reader; + + + + my $reader = XML::LibXML::Reader->new(location => "file.xml") + or die "cannot read file.xml\n"; + while ($reader->read) { + processNode($reader); + } + + + + sub processNode { + my $reader = shift; + printf "%d %d %s %d\n", ($reader->depth, + $reader->nodeType, + $reader->name, + $reader->isEmptyElement); + } + +or + + + + my $reader = XML::LibXML::Reader->new(location => "file.xml") + or die "cannot read file.xml\n"; + $reader->preservePattern('//table/tr'); + $reader->finish; + print $reader->document->toString(1); + + +=head1 DESCRIPTION + +This is a perl interface to libxml2's pull-parser implementation xmlTextReader I<<<<<< http://xmlsoft.org/html/libxml-xmlreader.html >>>>>>. This feature requires at least libxml2-2.6.21. Pull-parsers (such as StAX in +Java, or XmlReader in C#) use an iterator approach to parse XML documents. They +are easier to program than event-based parser (SAX) and much more lightweight +than tree-based parser (DOM), which load the complete tree into memory. + +The Reader acts as a cursor going forward on the document stream and stopping +at each node on the way. At every point, the DOM-like methods of the Reader +object allow one to examine the current node (name, namespace, attributes, +etc.) + +The user's code keeps control of the progress and simply calls the C<<<<<< read() >>>>>> function repeatedly to progress to the next node in the document order. Other +functions provide means for skipping complete sub-trees, or nodes until a +specific element, etc. + +At every time, only a very limited portion of the document is kept in the +memory, which makes the API more memory-efficient than using DOM. However, it +is also possible to mix Reader with DOM. At every point the user may copy the +current node (optionally expanded into a complete sub-tree) from the processed +document to another DOM tree, or to instruct the Reader to collect sub-document +in form of a DOM tree consisting of selected nodes. + +Reader API also supports namespaces, xml:base, entity handling, and DTD +validation. Schema and RelaxNG validation support will probably be added in +some later revision of the Perl interface. + +The naming of methods compared to libxml2 and C# XmlTextReader has been changed +slightly to match the conventions of XML::LibXML. Some functions have been +changed or added with respect to the C interface. + + +=head1 CONSTRUCTOR + +Depending on the XML source, the Reader object can be created with either of: + + + + my $reader = XML::LibXML::Reader->new( location => "file.xml", ... ); + my $reader = XML::LibXML::Reader->new( string => $xml_string, ... ); + my $reader = XML::LibXML::Reader->new( IO => $file_handle, ... ); + my $reader = XML::LibXML::Reader->new( FD => fileno(STDIN), ... ); + my $reader = XML::LibXML::Reader->new( DOM => $dom, ... ); + +where ... are (optional) reader options described below in L<<<<<< Reader options >>>>>> or various parser options described in L<<<<<< XML::LibXML::Parser >>>>>>. The constructor recognizes the following XML sources: + + +=head2 Source specification + +=over 4 + +=item location + +Read XML from a local file or (non-HTTPS) URL. + + +=item string + +Read XML from a string. + + +=item IO + +Read XML a Perl IO filehandle. + + +=item FD + +Read XML from a file descriptor (bypasses Perl I/O layer, only applicable to +filehandles for regular files or pipes). Possibly faster than IO. + + +=item DOM + +Use reader API to walk through a pre-parsed L<<<<<< XML::LibXML::Document >>>>>>. + + + +=back + + +=head2 Reader options + +=over 4 + +=item encoding => $encoding + +override document encoding. + + +=item RelaxNG => $rng_schema + +can be used to pass either a L<<<<<< XML::LibXML::RelaxNG >>>>>> object or a filename or (non-HTTPS) URL of a RelaxNG schema to the constructor. +The schema is then used to validate the document as it is processed. + + +=item Schema => $xsd_schema + +can be used to pass either a L<<<<<< XML::LibXML::Schema >>>>>> object or a filename or (non-HTTPS) URL of a W3C XSD schema to the constructor. +The schema is then used to validate the document as it is processed. + + +=item ... + +the reader further supports various parser options described in L<<<<<< XML::LibXML::Parser >>>>>> (specifically those labeled by /reader/). + + + +=back + + +=head1 METHODS CONTROLLING PARSING PROGRESS + +=over 4 + +=item read () + +Moves the position to the next node in the stream, exposing its properties. + +Returns 1 if the node was read successfully, 0 if there is no more nodes to +read, or -1 in case of error + + +=item readAttributeValue () + +Parses an attribute value into one or more Text and EntityReference nodes. + +Returns 1 in case of success, 0 if the reader was not positioned on an +attribute node or all the attribute values have been read, or -1 in case of +error. + + +=item readState () + +Gets the read state of the reader. Returns the state value, or -1 in case of +error. The module exports constants for the Reader states, see STATES below. + + +=item depth () + +The depth of the node in the tree, starts at 0 for the root node. + + +=item next () + +Skip to the node following the current one in the document order while avoiding +the sub-tree if any. Returns 1 if the node was read successfully, 0 if there is +no more nodes to read, or -1 in case of error. + + +=item nextElement (localname?,nsURI?) + +Skip nodes following the current one in the document order until a specific +element is reached. The element's name must be equal to a given localname if +defined, and its namespace must equal to a given nsURI if defined. Either of +the arguments can be undefined (or omitted, in case of the latter or both). + +Returns 1 if the element was found, 0 if there is no more nodes to read, or -1 +in case of error. + + +=item nextPatternMatch (compiled_pattern) + +Skip nodes following the current one in the document order until an element +matching a given compiled pattern is reached. See L<<<<<< XML::LibXML::Pattern >>>>>> for information on compiled patterns. See also the C<<<<<< matchesPattern >>>>>> method. + +Returns 1 if the element was found, 0 if there is no more nodes to read, or -1 +in case of error. + + +=item skipSiblings () + +Skip all nodes on the same or lower level until the first node on a higher +level is reached. In particular, if the current node occurs in an element, the +reader stops at the end tag of the parent element, otherwise it stops at a node +immediately following the parent node. + +Returns 1 if successful, 0 if end of the document is reached, or -1 in case of +error. + + +=item nextSibling () + +It skips to the node following the current one in the document order while +avoiding the sub-tree if any. + +Returns 1 if the node was read successfully, 0 if there is no more nodes to +read, or -1 in case of error + + +=item nextSiblingElement (name?,nsURI?) + +Like nextElement but only processes sibling elements of the current node +(moving forward using C<<<<<< nextSibling () >>>>>> rather than C<<<<<< read () >>>>>>, internally). + +Returns 1 if the element was found, 0 if there is no more sibling nodes, or -1 +in case of error. + + +=item finish () + +Skip all remaining nodes in the document, reaching end of the document. + +Returns 1 if successful, 0 in case of error. + + +=item close () + +This method releases any resources allocated by the current instance and closes +any underlying input. It returns 0 on failure and 1 on success. This method is +automatically called by the destructor when the reader is forgotten, therefore +you do not have to call it directly. + + + +=back + + +=head1 METHODS EXTRACTING INFORMATION + +=over 4 + +=item name () + +Returns the qualified name of the current node, equal to (Prefix:)LocalName. + + +=item nodeType () + +Returns the type of the current node. See NODE TYPES below. + + +=item localName () + +Returns the local name of the node. + + +=item prefix () + +Returns the prefix of the namespace associated with the node. + + +=item namespaceURI () + +Returns the URI defining the namespace associated with the node. + + +=item isEmptyElement () + +Check if the current node is empty, this is a bit bizarre in the sense that + will be considered empty while will not. + + +=item hasValue () + +Returns true if the node can have a text value. + + +=item value () + +Provides the text value of the node if present or undef if not available. + + +=item readInnerXml () + +Reads the contents of the current node, including child nodes and markup. +Returns a string containing the XML of the node's content, or undef if the +current node is neither an element nor attribute, or has no child nodes. + + +=item readOuterXml () + +Reads the contents of the current node, including child nodes and markup. + +Returns a string containing the XML of the node including its content, or undef +if the current node is neither an element nor attribute. + + +=item nodePath() + +Returns a canonical location path to the current element from the root node to +the current node. Namespaced elements are matched by '*', because there is no +way to declare prefixes within XPath patterns. Unlike C<<<<<< XML::LibXML::Node::nodePath() >>>>>>, this function does not provide sibling counts (i.e. instead of e.g. '/a/b[1]' +and '/a/b[2]' you get '/a/b' for both matches). + + +=item matchesPattern(compiled_pattern) + +Returns a true value if the current node matches a compiled pattern. See L<<<<<< XML::LibXML::Pattern >>>>>> for information on compiled patterns. See also the C<<<<<< nextPatternMatch >>>>>> method. + + + +=back + + +=head1 METHODS EXTRACTING DOM NODES + +=over 4 + +=item document () + +Provides access to the document tree built by the reader. This function can be +used to collect the preserved nodes (see C<<<<<< preserveNode() >>>>>> and preservePattern). + +CAUTION: Never use this function to modify the tree unless reading of the whole +document is completed! + + +=item copyCurrentNode (deep) + +This function is similar a DOM function C<<<<<< copyNode() >>>>>>. It returns a copy of the currently processed node as a corresponding DOM +object. Use deep = 1 to obtain the full sub-tree. + + +=item preserveNode () + +This tells the XML Reader to preserve the current node in the document tree. A +document tree consisting of the preserved nodes and their content can be +obtained using the method C<<<<<< document() >>>>>> once parsing is finished. + +Returns the node or NULL in case of error. + + +=item preservePattern (pattern,\%ns_map) + +This tells the XML Reader to preserve all nodes matched by the pattern (which +is a streaming XPath subset). A document tree consisting of the preserved nodes +and their content can be obtained using the method C<<<<<< document() >>>>>> once parsing is finished. + +An optional second argument can be used to provide a HASH reference mapping +prefixes used by the XPath to namespace URIs. + +The XPath subset available with this function is described at + + + + http://www.w3.org/TR/xmlschema-1/#Selector + +and matches the production + + + + Path ::= ('.//')? ( Step '/' )* ( Step | '@' NameTest ) + +Returns a positive number in case of success and -1 in case of error + + + +=back + + +=head1 METHODS PROCESSING ATTRIBUTES + +=over 4 + +=item attributeCount () + +Provides the number of attributes of the current node. + + +=item hasAttributes () + +Whether the node has attributes. + + +=item getAttribute (name) + +Provides the value of the attribute with the specified qualified name. + +Returns a string containing the value of the specified attribute, or undef in +case of error. + + +=item getAttributeNs (localName, namespaceURI) + +Provides the value of the specified attribute. + +Returns a string containing the value of the specified attribute, or undef in +case of error. + + +=item getAttributeNo (no) + +Provides the value of the attribute with the specified index relative to the +containing element. + +Returns a string containing the value of the specified attribute, or undef in +case of error. + + +=item isDefault () + +Returns true if the current attribute node was generated from the default value +defined in the DTD. + + +=item moveToAttribute (name) + +Moves the position to the attribute with the specified local name and namespace +URI. + +Returns 1 in case of success, -1 in case of error, 0 if not found + + +=item moveToAttributeNo (no) + +Moves the position to the attribute with the specified index relative to the +containing element. + +Returns 1 in case of success, -1 in case of error, 0 if not found + + +=item moveToAttributeNs (localName,namespaceURI) + +Moves the position to the attribute with the specified local name and namespace +URI. + +Returns 1 in case of success, -1 in case of error, 0 if not found + + +=item moveToFirstAttribute () + +Moves the position to the first attribute associated with the current node. + +Returns 1 in case of success, -1 in case of error, 0 if not found + + +=item moveToNextAttribute () + +Moves the position to the next attribute associated with the current node. + +Returns 1 in case of success, -1 in case of error, 0 if not found + + +=item moveToElement () + +Moves the position to the node that contains the current attribute node. + +Returns 1 in case of success, -1 in case of error, 0 if not moved + + +=item isNamespaceDecl () + +Determine whether the current node is a namespace declaration rather than a +regular attribute. + +Returns 1 if the current node is a namespace declaration, 0 if it is a regular +attribute or other type of node, or -1 in case of error. + + + +=back + + +=head1 OTHER METHODS + +=over 4 + +=item lookupNamespace (prefix) + +Resolves a namespace prefix in the scope of the current element. + +Returns a string containing the namespace URI to which the prefix maps or undef +in case of error. + + +=item encoding () + +Returns a string containing the encoding of the document or undef in case of +error. + + +=item standalone () + +Determine the standalone status of the document being read. Returns 1 if the +document was declared to be standalone, 0 if it was declared to be not +standalone, or -1 if the document did not specify its standalone status or in +case of error. + + +=item xmlVersion () + +Determine the XML version of the document being read. Returns a string +containing the XML version of the document or undef in case of error. + + +=item baseURI () + +Returns the base URI of a given node. + + +=item isValid () + +Retrieve the validity status from the parser. + +Returns 1 if valid, 0 if no, and -1 in case of error. + + +=item xmlLang () + +The xml:lang scope within which the node resides. + + +=item lineNumber () + +Provide the line number of the current parsing point. + + +=item columnNumber () + +Provide the column number of the current parsing point. + + +=item byteConsumed () + +This function provides the current index of the parser relative to the start of +the current entity. This function is computed in bytes from the beginning +starting at zero and finishing at the size in bytes of the file if parsing a +file. The function is of constant cost if the input is UTF-8 but can be costly +if run on non-UTF-8 input. + + +=item setParserProp (prop => value, ...) + +Change the parser processing behaviour by changing some of its internal +properties. The following properties are available with this function: +``load_ext_dtd'', ``complete_attributes'', ``validation'', ``expand_entities''. + +Since some of the properties can only be changed before any read has been done, +it is best to set the parsing properties at the constructor. + +Returns 0 if the call was successful, or -1 in case of error + + +=item getParserProp (prop) + +Get value of an parser internal property. The following property names can be +used: ``load_ext_dtd'', ``complete_attributes'', ``validation'', +``expand_entities''. + +Returns the value, usually 0 or 1, or -1 in case of error. + + + +=back + + +=head1 DESTRUCTION + +XML::LibXML takes care of the reader object destruction when the last reference +to the reader object goes out of scope. The document tree is preserved, though, +if either of $reader->document or $reader->preserveNode was used and references +to the document tree exist. + + +=head1 NODE TYPES + +The reader interface provides the following constants for node types (the +constant symbols are exported by default or if tag C<<<<<< :types >>>>>> is used). + + + + XML_READER_TYPE_NONE => 0 + XML_READER_TYPE_ELEMENT => 1 + XML_READER_TYPE_ATTRIBUTE => 2 + XML_READER_TYPE_TEXT => 3 + XML_READER_TYPE_CDATA => 4 + XML_READER_TYPE_ENTITY_REFERENCE => 5 + XML_READER_TYPE_ENTITY => 6 + XML_READER_TYPE_PROCESSING_INSTRUCTION => 7 + XML_READER_TYPE_COMMENT => 8 + XML_READER_TYPE_DOCUMENT => 9 + XML_READER_TYPE_DOCUMENT_TYPE => 10 + XML_READER_TYPE_DOCUMENT_FRAGMENT => 11 + XML_READER_TYPE_NOTATION => 12 + XML_READER_TYPE_WHITESPACE => 13 + XML_READER_TYPE_SIGNIFICANT_WHITESPACE => 14 + XML_READER_TYPE_END_ELEMENT => 15 + XML_READER_TYPE_END_ENTITY => 16 + XML_READER_TYPE_XML_DECLARATION => 17 + + +=head1 STATES + +The following constants represent the values returned by C<<<<<< readState() >>>>>>. They are exported by default, or if tag C<<<<<< :states >>>>>> is used: + + + + XML_READER_NONE => -1 + XML_READER_START => 0 + XML_READER_ELEMENT => 1 + XML_READER_END => 2 + XML_READER_EMPTY => 3 + XML_READER_BACKTRACK => 4 + XML_READER_DONE => 5 + XML_READER_ERROR => 6 + + +=head1 SEE ALSO + +L<<<<<< XML::LibXML::Pattern >>>>>> for information about compiled patterns. + +http://xmlsoft.org/html/libxml-xmlreader.html + +http://dotgnu.org/pnetlib-doc/System/Xml/XmlTextReader.html + + +=head1 ORIGINAL IMPLEMENTATION + +Heiko Klein, and Petr Pajas + +=head1 AUTHORS + +Matt Sergeant, +Christian Glahn, +Petr Pajas + + +=head1 VERSION + +2.0207 + +=head1 COPYRIGHT + +2001-2007, AxKit.com Ltd. + +2002-2006, Christian Glahn. + +2006-2009, Petr Pajas. + +=cut + + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. + diff --git a/lib/XML/LibXML/RegExp.pod b/lib/XML/LibXML/RegExp.pod new file mode 100644 index 0000000..0ef9b6e --- /dev/null +++ b/lib/XML/LibXML/RegExp.pod @@ -0,0 +1,78 @@ +=head1 NAME + +XML::LibXML::RegExp - XML::LibXML::RegExp - interface to libxml2 regular expressions + +=head1 SYNOPSIS + + + + use XML::LibXML; + my $compiled_re = XML::LibXML::RegExp->new('[0-9]{5}(-[0-9]{4})?'); + if ($compiled_re->isDeterministic()) { ... } + if ($compiled_re->matches($string)) { ... } + + $compiled_re = XML::LibXML::RegExp->new( $regexp_str ); + $bool = $compiled_re->matches($string); + $bool = $compiled_re->isDeterministic(); + +=head1 DESCRIPTION + +This is a perl interface to libxml2's implementation of regular expressions, +which are used e.g. for validation of XML Schema simple types (pattern facet). + +=over 4 + +=item new() + + $compiled_re = XML::LibXML::RegExp->new( $regexp_str ); + +The constructor takes a string containing a regular expression and returns a +compiled regexp object. + + +=item matches($string) + + $bool = $compiled_re->matches($string); + +Given a string value, returns a true value if the value is matched by the +compiled regular expression. + + +=item isDeterministic() + + $bool = $compiled_re->isDeterministic(); + +Returns a true value if the regular expression is deterministic; returns false +otherwise. (See the definition of determinism in the XML spec (L<<<<<< http://www.w3.org/TR/REC-xml/#determinism >>>>>>)) + + + +=back + +=head1 AUTHORS + +Matt Sergeant, +Christian Glahn, +Petr Pajas + + +=head1 VERSION + +2.0207 + +=head1 COPYRIGHT + +2001-2007, AxKit.com Ltd. + +2002-2006, Christian Glahn. + +2006-2009, Petr Pajas. + +=cut + + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. + diff --git a/lib/XML/LibXML/RelaxNG.pod b/lib/XML/LibXML/RelaxNG.pod new file mode 100644 index 0000000..a05d583 --- /dev/null +++ b/lib/XML/LibXML/RelaxNG.pod @@ -0,0 +1,93 @@ +=head1 NAME + +XML::LibXML::RelaxNG - RelaxNG Schema Validation + +=head1 SYNOPSIS + + + + use XML::LibXML; + $doc = XML::LibXML->new->parse_file($url); + + $rngschema = XML::LibXML::RelaxNG->new( location => $filename_or_url, no_network => 1 ); + $rngschema = XML::LibXML::RelaxNG->new( string => $xmlschemastring, no_network => 1 ); + $rngschema = XML::LibXML::RelaxNG->new( DOM => $doc, no_network => 1 ); + eval { $rngschema->validate( $doc ); }; + +=head1 DESCRIPTION + +The XML::LibXML::RelaxNG class is a tiny frontend to libxml2's RelaxNG +implementation. Currently it supports only schema parsing and document +validation. + + +=head1 METHODS + +=over 4 + +=item new + + $rngschema = XML::LibXML::RelaxNG->new( location => $filename_or_url, no_network => 1 ); + $rngschema = XML::LibXML::RelaxNG->new( string => $xmlschemastring, no_network => 1 ); + $rngschema = XML::LibXML::RelaxNG->new( DOM => $doc, no_network => 1 ); + +The constructor of XML::LibXML::RelaxNG needs to be called with list of +parameters. At least location, string or DOM parameter is required to specify +source of schema. Optional parameter no_network set to 1 cause that parser +would not access network and optional parameter recover set 1 cause that parser +would not call die() on errors. + +It is important, that each schema only have a single source. + +The location parameter allows one to parse a schema from the filesystem or a +(non-HTTPS) URL. + +The string parameter will parse the schema from the given XML string. + +The DOM parameter allows one to parse the schema from a pre-parsed L<<<<<< XML::LibXML::Document >>>>>>. + +Note that the constructor will die() if the schema does not meed the +constraints of the RelaxNG specification. + + +=item validate + + eval { $rngschema->validate( $doc ); }; + +This function allows one to validate a (parsed) document against the given +RelaxNG schema. The argument of this function should be an +XML::LibXML::Document object. If this function succeeds, it will return 0, +otherwise it will die() and report the errors found. Because of this validate() +should be always evaluated. + + + +=back + +=head1 AUTHORS + +Matt Sergeant, +Christian Glahn, +Petr Pajas + + +=head1 VERSION + +2.0207 + +=head1 COPYRIGHT + +2001-2007, AxKit.com Ltd. + +2002-2006, Christian Glahn. + +2006-2009, Petr Pajas. + +=cut + + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. + diff --git a/lib/XML/LibXML/SAX.pm b/lib/XML/LibXML/SAX.pm new file mode 100644 index 0000000..681499e --- /dev/null +++ b/lib/XML/LibXML/SAX.pm @@ -0,0 +1,122 @@ +# $Id$ +# +# This is free software, you may use it and distribute it under the same terms as +# Perl itself. +# +# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas +# +# + +package XML::LibXML::SAX; + +use strict; +use warnings; + +use vars qw($VERSION @ISA); + +$VERSION = "2.0207"; # VERSION TEMPLATE: DO NOT CHANGE + +use XML::LibXML; +use XML::SAX::Base; + +use parent qw(XML::SAX::Base); + +use Carp; +use IO::File; + +sub CLONE_SKIP { + return $XML::LibXML::__threads_shared ? 0 : 1; +} + +sub set_feature { + my ($self, $feat, $val) = @_; + + if ($feat eq 'http://xmlns.perl.org/sax/join-character-data') { + $self->{JOIN_CHARACTERS} = $val; + return 1; + } + + shift(@_); + return $self->SUPER::set_feature(@_); +} + +sub _parse_characterstream { + my ( $self, $fh ) = @_; + # this my catch the xml decl, so the parser won't get confused about + # a possibly wrong encoding. + croak( "not implemented yet" ); +} + +# See: +# https://rt.cpan.org/Public/Bug/Display.html?id=132759 +sub _calc_new_XML_LibXML_parser_for_compatibility_with_XML_Simple_etc +{ + return XML::LibXML->new( expand_entities => 1, ); +} + +sub _parse_bytestream { + my ( $self, $fh ) = @_; + $self->{ParserOptions}{LibParser} = $self->_calc_new_XML_LibXML_parser_for_compatibility_with_XML_Simple_etc() unless defined $self->{ParserOptions}{LibParser}; + $self->{ParserOptions}{ParseFunc} = \&XML::LibXML::parse_fh; + $self->{ParserOptions}{ParseFuncParam} = $fh; + $self->_parse; + return $self->end_document({}); +} + +sub _parse_string { + my ( $self, $string ) = @_; + $self->{ParserOptions}{LibParser} = $self->_calc_new_XML_LibXML_parser_for_compatibility_with_XML_Simple_etc() unless defined $self->{ParserOptions}{LibParser}; + $self->{ParserOptions}{ParseFunc} = \&XML::LibXML::parse_string; + $self->{ParserOptions}{ParseFuncParam} = $string; + $self->_parse; + return $self->end_document({}); +} + +sub _parse_systemid { + my $self = shift; + $self->{ParserOptions}{LibParser} = $self->_calc_new_XML_LibXML_parser_for_compatibility_with_XML_Simple_etc() unless defined $self->{ParserOptions}{LibParser}; + $self->{ParserOptions}{ParseFunc} = \&XML::LibXML::parse_file; + $self->{ParserOptions}{ParseFuncParam} = shift; + $self->_parse; + return $self->end_document({}); +} + +sub parse_chunk { + my ( $self, $chunk ) = @_; + $self->{ParserOptions}{LibParser} = $self->_calc_new_XML_LibXML_parser_for_compatibility_with_XML_Simple_etc() unless defined $self->{ParserOptions}{LibParser}; + $self->{ParserOptions}{ParseFunc} = \&XML::LibXML::parse_xml_chunk; + $self->{ParserOptions}{LibParser}->{IS_FILTER}=1; # a hack to prevent parse_xml_chunk from issuing end_document + $self->{ParserOptions}{ParseFuncParam} = $chunk; + $self->_parse; + return; +} + +sub _parse { + my $self = shift; + my $args = bless $self->{ParserOptions}, ref($self); + + if (defined($self->{JOIN_CHARACTERS})) { + $args->{LibParser}->{JOIN_CHARACTERS} = $self->{JOIN_CHARACTERS}; + } else { + $args->{LibParser}->{JOIN_CHARACTERS} = 0; + } + + $args->{LibParser}->set_handler( $self ); + eval { + $args->{ParseFunc}->($args->{LibParser}, $args->{ParseFuncParam}); + }; + + if ( $args->{LibParser}->{SAX}->{State} == 1 ) { + croak( "SAX Exception not implemented, yet; Data ended before document ended\n" ); + } + + # break a possible circular reference + $args->{LibParser}->set_handler( undef ); + if ( $@ ) { + croak $@; + } + return; +} + +1; + diff --git a/lib/XML/LibXML/SAX.pod b/lib/XML/LibXML/SAX.pod new file mode 100644 index 0000000..dc736f7 --- /dev/null +++ b/lib/XML/LibXML/SAX.pod @@ -0,0 +1,67 @@ +=head1 NAME + +XML::LibXML::SAX - XML::LibXML direct SAX parser + +=head1 DESCRIPTION + +XML::LibXML provides an interface to libxml2 direct SAX interface. Through this +interface it is possible to generate SAX events directly while parsing a +document. While using the SAX parser XML::LibXML will not create a DOM Document +tree. + +Such an interface is useful if very large XML documents have to be processed +and no DOM functions are required. By using this interface it is possible to +read data stored within an XML document directly into the application data +structures without loading the document into memory. + +The SAX interface of XML::LibXML is based on the famous XML::SAX interface. It +uses the generic interface as provided by XML::SAX::Base. + +Additionally to the generic functions, which are only able to process entire +documents, XML::LibXML::SAX provides I<<<<<< parse_chunk() >>>>>>. This method generates SAX events from well balanced data such as is often +provided by databases. + + +=head1 FEATURES + +I<<<<<< NOTE: >>>>>> This feature is experimental. + +You can enable character data joining which may yield a significant speed boost +in your XML processing in lower markup ratio situations by enabling the +http://xmlns.perl.org/sax/join-character-data feature of this parser. This is +done via the set_feature method like this: + + + + $p->set_feature('http://xmlns.perl.org/sax/join-character-data', 1); + +You can also specify a 0 to disable. The default is to have this feature +disabled. + +=head1 AUTHORS + +Matt Sergeant, +Christian Glahn, +Petr Pajas + + +=head1 VERSION + +2.0207 + +=head1 COPYRIGHT + +2001-2007, AxKit.com Ltd. + +2002-2006, Christian Glahn. + +2006-2009, Petr Pajas. + +=cut + + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. + diff --git a/lib/XML/LibXML/SAX/Builder.pm b/lib/XML/LibXML/SAX/Builder.pm new file mode 100644 index 0000000..c16ee87 --- /dev/null +++ b/lib/XML/LibXML/SAX/Builder.pm @@ -0,0 +1,335 @@ +# $Id$ +# +# This is free software, you may use it and distribute it under the same terms as +# Perl itself. +# +# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas +# +# + +package XML::LibXML::SAX::Builder; + +use strict; +use warnings; + +use XML::LibXML; +use XML::NamespaceSupport; + +use vars qw ($VERSION); + +sub CLONE_SKIP { + return $XML::LibXML::__threads_shared ? 0 : 1; +} + +$VERSION = "2.0207"; # VERSION TEMPLATE: DO NOT CHANGE + +sub new { + my $class = shift; + return bless {@_}, $class; +} + +sub result { $_[0]->{LAST_DOM}; } + +sub done { + my ($self) = @_; + my $dom = $self->{DOM}; + $dom = $self->{Parent} unless defined $dom; # this is for parsing document chunks + + delete $self->{NamespaceStack}; + delete $self->{Parent}; + delete $self->{DOM}; + + $self->{LAST_DOM} = $dom; + + return $dom; +} + +sub set_document_locator { +} + +sub start_dtd { + my ($self, $dtd) = @_; + if (defined $dtd->{Name} and + (defined $dtd->{SystemId} or defined $dtd->{PublicId})) { + $self->{DOM}->createExternalSubset($dtd->{Name},$dtd->{PublicId},$dtd->{SystemId}); + } +} + +sub end_dtd { +} + +sub start_document { + my ($self, $doc) = @_; + $self->{DOM} = XML::LibXML::Document->createDocument(); + + if ( defined $self->{Encoding} ) { + $self->xml_decl({Version => ($self->{Version} || '1.0') , Encoding => $self->{Encoding}}); + } + + $self->{NamespaceStack} = XML::NamespaceSupport->new; + $self->{NamespaceStack}->push_context; + $self->{Parent} = undef; + return (); +} + +sub xml_decl { + my $self = shift; + my $decl = shift; + + if ( defined $decl->{Version} ) { + $self->{DOM}->setVersion( $decl->{Version} ); + } + if ( defined $decl->{Encoding} ) { + $self->{DOM}->setEncoding( $decl->{Encoding} ); + } + return (); +} + +sub end_document { + my ($self, $doc) = @_; + my $d = $self->done(); + return $d; +} + +sub start_prefix_mapping { + my $self = shift; + my $ns = shift; + + unless ( defined $self->{DOM} or defined $self->{Parent} ) { + $self->{Parent} = XML::LibXML::DocumentFragment->new(); + $self->{NamespaceStack} = XML::NamespaceSupport->new; + $self->{NamespaceStack}->push_context; + } + + $self->{USENAMESPACESTACK} = 1; + + $self->{NamespaceStack}->declare_prefix( $ns->{Prefix}, $ns->{NamespaceURI} ); + return (); +} + + +sub end_prefix_mapping { + my $self = shift; + my $ns = shift; + $self->{NamespaceStack}->undeclare_prefix( $ns->{Prefix} ); + return (); +} + + +sub start_element { + my ($self, $el) = @_; + my $node; + + unless ( defined $self->{DOM} or defined $self->{Parent} ) { + $self->{Parent} = XML::LibXML::DocumentFragment->new(); + $self->{NamespaceStack} = XML::NamespaceSupport->new; + $self->{NamespaceStack}->push_context; + } + + if ( defined $self->{Parent} ) { + $el->{NamespaceURI} ||= ""; + $node = $self->{Parent}->addNewChild( $el->{NamespaceURI}, + $el->{Name} ); + } + else { + if ($el->{NamespaceURI}) { + if ( defined $self->{DOM} ) { + $node = $self->{DOM}->createRawElementNS($el->{NamespaceURI}, + $el->{Name}); + } + else { + $node = XML::LibXML::Element->new( $el->{Name} ); + $node->setNamespace( $el->{NamespaceURI}, + $el->{Prefix} , 1 ); + } + } + else { + if ( defined $self->{DOM} ) { + $node = $self->{DOM}->createRawElement($el->{Name}); + } + else { + $node = XML::LibXML::Element->new( $el->{Name} ); + } + } + + $self->{DOM}->setDocumentElement($node); + } + + # build namespaces + my $skip_ns= 0; + foreach my $p ( $self->{NamespaceStack}->get_declared_prefixes() ) { + $skip_ns= 1; + my $uri = $self->{NamespaceStack}->get_uri($p); + my $nodeflag = 0; + if ( defined $uri + and defined $el->{NamespaceURI} + and $uri eq $el->{NamespaceURI} ) { + # $nodeflag = 1; + next; + } + $node->setNamespace($uri, $p, 0 ); + } + + $self->{Parent} = $node; + + $self->{NamespaceStack}->push_context; + + # do attributes + foreach my $key (keys %{$el->{Attributes}}) { + my $attr = $el->{Attributes}->{$key}; + if (ref($attr)) { + # catch broken name/value pairs + next unless $attr->{Name} ; + next if $self->{USENAMESPACESTACK} + and ( $attr->{Name} eq "xmlns" + or ( defined $attr->{Prefix} + and $attr->{Prefix} eq "xmlns" ) ); + + + if ( defined $attr->{Prefix} + and $attr->{Prefix} eq "xmlns" and $skip_ns == 0 ) { + # ok, the generator does not set namespaces correctly! + my $uri = $attr->{Value}; + $node->setNamespace($uri, + $attr->{LocalName}, + $uri eq $el->{NamespaceURI} ? 1 : 0 ); + } + else { + $node->setAttributeNS($attr->{NamespaceURI} || "", + $attr->{Name}, $attr->{Value}); + } + } + else { + $node->setAttribute($key => $attr); + } + } + return (); +} + +sub end_element { + my ($self, $el) = @_; + return unless $self->{Parent}; + + $self->{NamespaceStack}->pop_context; + $self->{Parent} = $self->{Parent}->parentNode(); + return (); +} + +sub start_cdata { + my $self = shift; + $self->{IN_CDATA} = 1; + return (); +} + +sub end_cdata { + my $self = shift; + $self->{IN_CDATA} = 0; + return (); +} + +sub characters { + my ($self, $chars) = @_; + if ( not defined $self->{DOM} and not defined $self->{Parent} ) { + $self->{Parent} = XML::LibXML::DocumentFragment->new(); + $self->{NamespaceStack} = XML::NamespaceSupport->new; + $self->{NamespaceStack}->push_context; + } + return unless $self->{Parent}; + my $node; + + unless ( defined $chars and defined $chars->{Data} ) { + return; + } + + if ( defined $self->{DOM} ) { + if ( defined $self->{IN_CDATA} and $self->{IN_CDATA} == 1 ) { + $node = $self->{DOM}->createCDATASection($chars->{Data}); + } + else { + $node = $self->{Parent}->appendText($chars->{Data}); + return; + } + } + elsif ( defined $self->{IN_CDATA} and $self->{IN_CDATA} == 1 ) { + $node = XML::LibXML::CDATASection->new($chars->{Data}); + } + else { + $node = XML::LibXML::Text->new($chars->{Data}); + } + + $self->{Parent}->addChild($node); + return (); +} + +sub comment { + my ($self, $chars) = @_; + my $comment; + if ( not defined $self->{DOM} and not defined $self->{Parent} ) { + $self->{Parent} = XML::LibXML::DocumentFragment->new(); + $self->{NamespaceStack} = XML::NamespaceSupport->new; + $self->{NamespaceStack}->push_context; + } + + unless ( defined $chars and defined $chars->{Data} ) { + return; + } + + if ( defined $self->{DOM} ) { + $comment = $self->{DOM}->createComment( $chars->{Data} ); + } + else { + $comment = XML::LibXML::Comment->new( $chars->{Data} ); + } + + if ( defined $self->{Parent} ) { + $self->{Parent}->addChild($comment); + } + else { + $self->{DOM}->addChild($comment); + } + return (); +} + +sub processing_instruction { + my ( $self, $pi ) = @_; + my $PI; + return unless defined $self->{DOM}; + $PI = $self->{DOM}->createPI( $pi->{Target}, $pi->{Data} ); + + if ( defined $self->{Parent} ) { + $self->{Parent}->addChild( $PI ); + } + else { + $self->{DOM}->addChild( $PI ); + } + return (); +} + +sub warning { + my $self = shift; + my $error = shift; + # fill $@ but do not die seriously + eval { $error->throw; }; +} + +sub error { + my $self = shift; + my $error = shift; + delete $self->{NamespaceStack}; + delete $self->{Parent}; + delete $self->{DOM}; + $error->throw; +} + +sub fatal_error { + my $self = shift; + my $error = shift; + delete $self->{NamespaceStack}; + delete $self->{Parent}; + delete $self->{DOM}; + $error->throw; +} + +1; + +__END__ diff --git a/lib/XML/LibXML/SAX/Builder.pod b/lib/XML/LibXML/SAX/Builder.pod new file mode 100644 index 0000000..c199c2f --- /dev/null +++ b/lib/XML/LibXML/SAX/Builder.pod @@ -0,0 +1,58 @@ +=head1 NAME + +XML::LibXML::SAX::Builder - Building DOM trees from SAX events. + +=head1 SYNOPSIS + + + + use XML::LibXML::SAX::Builder; + my $builder = XML::LibXML::SAX::Builder->new(); + + my $gen = XML::Generator::DBI->new(Handler => $builder, dbh => $dbh); + $gen->execute("SELECT * FROM Users"); + + my $doc = $builder->result(); + + +=head1 DESCRIPTION + +This is a SAX handler that generates a DOM tree from SAX events. Usage is as +above. Input is accepted from any SAX1 or SAX2 event generator. + +Building DOM trees from SAX events is quite easy with +XML::LibXML::SAX::Builder. The class is designed as a SAX2 final handler not as +a filter! + +Since SAX is strictly stream oriented, you should not expect anything to return +from a generator. Instead you have to ask the builder instance directly to get +the document built. XML::LibXML::SAX::Builder's result() function holds the +document generated from the last SAX stream. + +=head1 AUTHORS + +Matt Sergeant, +Christian Glahn, +Petr Pajas + + +=head1 VERSION + +2.0207 + +=head1 COPYRIGHT + +2001-2007, AxKit.com Ltd. + +2002-2006, Christian Glahn. + +2006-2009, Petr Pajas. + +=cut + + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. + diff --git a/lib/XML/LibXML/SAX/Generator.pm b/lib/XML/LibXML/SAX/Generator.pm new file mode 100644 index 0000000..2ae881f --- /dev/null +++ b/lib/XML/LibXML/SAX/Generator.pm @@ -0,0 +1,158 @@ +# $Id: Generator.pm 772 2009-01-23 21:42:09Z pajas +# +# This is free software, you may use it and distribute it under the same terms as +# Perl itself. +# +# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas +# +# + +package XML::LibXML::SAX::Generator; + +use strict; +use warnings; + +use XML::LibXML; +use vars qw ($VERSION); + +$VERSION = "2.0207"; # VERSION TEMPLATE: DO NOT CHANGE + +sub CLONE_SKIP { + return $XML::LibXML::__threads_shared ? 0 : 1; +} + +warn("This class (", __PACKAGE__, ") is deprecated!"); + +sub new { + my $class = shift; + unshift @_, 'Handler' unless @_ != 1; + my %p = @_; + return bless \%p, $class; +} + +sub generate { + my $self = shift; + my ($node) = @_; + + my $document = { Parent => undef }; + $self->{Handler}->start_document($document); + + process_node($self->{Handler}, $node); + + $self->{Handler}->end_document($document); +} + +sub process_node { + my ($handler, $node) = @_; + + my $node_type = $node->getType(); + if ($node_type == XML_COMMENT_NODE) { + $handler->comment( { Data => $node->getData } ); + } + elsif ($node_type == XML_TEXT_NODE || $node_type == XML_CDATA_SECTION_NODE) { + # warn($node->getData . "\n"); + $handler->characters( { Data => $node->getData } ); + } + elsif ($node_type == XML_ELEMENT_NODE) { + # warn("<" . $node->getName . ">\n"); + process_element($handler, $node); + # warn("getName . ">\n"); + } + elsif ($node_type == XML_ENTITY_REF_NODE) { + foreach my $kid ($node->getChildnodes) { + # warn("child of entity ref: " . $kid->getType() . " called: " . $kid->getName . "\n"); + process_node($handler, $kid); + } + } + elsif ($node_type == XML_DOCUMENT_NODE) { + # just get root element. Ignore other cruft. + foreach my $kid ($node->getChildnodes) { + if ($kid->getType() == XML_ELEMENT_NODE) { + process_element($handler, $kid); + last; + } + } + } + else { + warn("unknown node type: $node_type"); + } +} + +sub process_element { + my ($handler, $element) = @_; + + my @attr; + + foreach my $attr ($element->getAttributes) { + push @attr, XML::LibXML::SAX::AttributeNode->new( + Name => $attr->getName, + Value => $attr->getData, + NamespaceURI => $attr->getNamespaceURI, + Prefix => $attr->getPrefix, + LocalName => $attr->getLocalName, + ); + } + + my $node = { + Name => $element->getName, + Attributes => { map { $_->{Name} => $_ } @attr }, + NamespaceURI => $element->getNamespaceURI, + Prefix => $element->getPrefix, + LocalName => $element->getLocalName, + }; + + $handler->start_element($node); + + foreach my $child ($element->getChildnodes) { + process_node($handler, $child); + } + + $handler->end_element($node); +} + +package XML::LibXML::SAX::AttributeNode; + +use overload '""' => "stringify"; + +sub new { + my $class = shift; + my %p = @_; + return bless \%p, $class; +} + +sub stringify { + my $self = shift; + return $self->{Value}; +} + +1; + +__END__ + +=head1 NAME + +XML::LibXML::SAX::Generator - Generate SAX events from a LibXML tree + +=head1 SYNOPSIS + + my $handler = MySAXHandler->new(); + my $generator = XML::LibXML::SAX::Generator->new(Handler => $handler); + my $dom = XML::LibXML->new->parse_file("foo.xml"); + + $generator->generate($dom); + +=head1 DESCRIPTION + +THIS CLASS IS DEPRECATED! Use XML::LibXML::SAX::Parser instead! + +This helper class allows you to generate SAX events from any XML::LibXML +node, and all it's sub-nodes. This basically gives you interop from +XML::LibXML to other modules that may implement SAX. + +It uses SAX2 style, but should be compatible with anything SAX1, by use +of stringification overloading. + +There is nothing to really know about, beyond the synopsis above, and +a general knowledge of how to use SAX, which is beyond the scope here. + +=cut diff --git a/lib/XML/LibXML/SAX/Parser.pm b/lib/XML/LibXML/SAX/Parser.pm new file mode 100644 index 0000000..0b2ba85 --- /dev/null +++ b/lib/XML/LibXML/SAX/Parser.pm @@ -0,0 +1,266 @@ +# $Id$ +# +# This is free software, you may use it and distribute it under the same terms as +# Perl itself. +# +# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas +# +# + +package XML::LibXML::SAX::Parser; + +use strict; +use warnings; +use vars qw($VERSION @ISA); + +use XML::LibXML; +use XML::LibXML::Common qw(:libxml); +use XML::SAX::Base; +use XML::SAX::DocumentLocator; + +$VERSION = "2.0207"; # VERSION TEMPLATE: DO NOT CHANGE +@ISA = ('XML::SAX::Base'); + +sub CLONE_SKIP { + return $XML::LibXML::__threads_shared ? 0 : 1; +} + +sub _parse_characterstream { + my ($self, $fh, $options) = @_; + die "parsing a characterstream is not supported at this time"; +} + +sub _parse_bytestream { + my ($self, $fh, $options) = @_; + my $parser = XML::LibXML->new(); + my $doc = exists($options->{Source}{SystemId}) ? $parser->parse_fh($fh, $options->{Source}{SystemId}) : $parser->parse_fh($fh); + $self->generate($doc); +} + +sub _parse_string { + my ($self, $str, $options) = @_; + my $parser = XML::LibXML->new(); + my $doc = exists($options->{Source}{SystemId}) ? $parser->parse_string($str, $options->{Source}{SystemId}) : $parser->parse_string($str); + $self->generate($doc); +} + +sub _parse_systemid { + my ($self, $sysid, $options) = @_; + my $parser = XML::LibXML->new(); + my $doc = $parser->parse_file($sysid); + $self->generate($doc); +} + +sub generate { + my $self = shift; + my ($node) = @_; + + my $doc = $node->ownerDocument(); + { + # precompute some DocumentLocator values + my %locator = ( + PublicId => undef, + SystemId => undef, + Encoding => undef, + XMLVersion => undef, + ); + my $dtd = defined $doc ? $doc->externalSubset() : undef; + if (defined $dtd) { + $locator{PublicId} = $dtd->publicId(); + $locator{SystemId} = $dtd->systemId(); + } + if (defined $doc) { + $locator{Encoding} = $doc->encoding(); + $locator{XMLVersion} = $doc->version(); + } + $self->set_document_locator( + XML::SAX::DocumentLocator->new( + sub { $locator{PublicId} }, + sub { $locator{SystemId} }, + sub { defined($self->{current_node}) ? $self->{current_node}->line_number() : undef }, + sub { 1 }, + sub { $locator{Encoding} }, + sub { $locator{XMLVersion} }, + ), + ); + } + + if ( $node->nodeType() == XML_DOCUMENT_NODE + || $node->nodeType == XML_HTML_DOCUMENT_NODE ) { + $self->start_document({}); + $self->xml_decl({Version => $node->getVersion, Encoding => $node->getEncoding}); + $self->process_node($node); + $self->end_document({}); + } +} + +sub process_node { + my ($self, $node) = @_; + + local $self->{current_node} = $node; + + my $node_type = $node->nodeType(); + if ($node_type == XML_COMMENT_NODE) { + $self->comment( { Data => $node->getData } ); + } + elsif ($node_type == XML_TEXT_NODE + || $node_type == XML_CDATA_SECTION_NODE) { + # warn($node->getData . "\n"); + $self->characters( { Data => $node->nodeValue } ); + } + elsif ($node_type == XML_ELEMENT_NODE) { + # warn("<" . $node->getName . ">\n"); + $self->process_element($node); + # warn("getName . ">\n"); + } + elsif ($node_type == XML_ENTITY_REF_NODE) { + foreach my $kid ($node->childNodes) { + # warn("child of entity ref: " . $kid->getType() . " called: " . $kid->getName . "\n"); + $self->process_node($kid); + } + } + elsif ($node_type == XML_DOCUMENT_NODE + || $node_type == XML_HTML_DOCUMENT_NODE + || $node_type == XML_DOCUMENT_FRAG_NODE) { + # sometimes it is just useful to generate SAX events from + # a document fragment (very good with filters). + foreach my $kid ($node->childNodes) { + $self->process_node($kid); + } + } + elsif ($node_type == XML_PI_NODE) { + $self->processing_instruction( { Target => $node->getName, Data => $node->getData } ); + } + elsif ($node_type == XML_COMMENT_NODE) { + $self->comment( { Data => $node->getData } ); + } + elsif ( $node_type == XML_XINCLUDE_START + || $node_type == XML_XINCLUDE_END ) { + # ignore! + # i may want to handle this one day, dunno yet + } + elsif ($node_type == XML_DTD_NODE ) { + # ignore! + # i will support DTDs, but had no time yet. + } + else { + # warn("unsupported node type: $node_type"); + } + +} + +sub process_element { + my ($self, $element) = @_; + + my $attribs = {}; + my @ns_maps = $element->getNamespaces; + + foreach my $ns (@ns_maps) { + $self->start_prefix_mapping( + { + NamespaceURI => $ns->href, + Prefix => ( defined $ns->localname ? $ns->localname : ''), + } + ); + } + + foreach my $attr ($element->attributes) { + my $key; + # warn("Attr: $attr -> ", $attr->getName, " = ", $attr->getData, "\n"); + # this isa dump thing... + if ($attr->isa('XML::LibXML::Namespace')) { + # TODO This needs fixing modulo agreeing on what + # is the right thing to do here. + unless ( defined $attr->name ) { + ## It's an atter like "xmlns='foo'" + $attribs->{"{}xmlns"} = + { + Name => "xmlns", + LocalName => "xmlns", + Prefix => "", + Value => $attr->href, + NamespaceURI => "", + }; + } + else { + my $prefix = "xmlns"; + my $localname = $attr->localname; + my $key = "{http://www.w3.org/2000/xmlns/}"; + my $name = "xmlns"; + + if ( defined $localname ) { + $key .= $localname; + $name.= ":".$localname; + } + + $attribs->{$key} = + { + Name => $name, + Value => $attr->href, + NamespaceURI => "http://www.w3.org/2000/xmlns/", + Prefix => $prefix, + LocalName => $localname, + }; + } + } + else { + my $ns = $attr->namespaceURI; + + $ns = '' unless defined $ns; + $key = "{$ns}".$attr->localname; + ## Not sure why, but $attr->name is coming through stripped + ## of its prefix, so we need to hand-assemble a real name. + my $name = $attr->name; + $name = "" unless defined $name; + + my $prefix = $attr->prefix; + $prefix = "" unless defined $prefix; + $name = "$prefix:$name" + if index( $name, ":" ) < 0 && length $prefix; + + $attribs->{$key} = + { + Name => $name, + Value => $attr->value, + NamespaceURI => $ns, + Prefix => $prefix, + LocalName => $attr->localname, + }; + } + # use Data::Dumper; + # warn("Attr made: ", Dumper($attribs->{$key}), "\n"); + } + + my $node = { + Name => $element->nodeName, + Attributes => $attribs, + NamespaceURI => $element->namespaceURI, + Prefix => $element->prefix || "", + LocalName => $element->localname, + }; + + $self->start_element($node); + + foreach my $child ($element->childNodes) { + $self->process_node($child); + } + + my $end_node = { %$node }; + + delete $end_node->{Attributes}; + + $self->end_element($end_node); + + foreach my $ns (@ns_maps) { + $self->end_prefix_mapping( + { + NamespaceURI => $ns->href, + Prefix => ( defined $ns->localname ? $ns->localname : ''), + } + ); + } +} + +1; + +__END__ diff --git a/lib/XML/LibXML/Schema.pod b/lib/XML/LibXML/Schema.pod new file mode 100644 index 0000000..5bfc958 --- /dev/null +++ b/lib/XML/LibXML/Schema.pod @@ -0,0 +1,89 @@ +=head1 NAME + +XML::LibXML::Schema - XML Schema Validation + +=head1 SYNOPSIS + + + + use XML::LibXML; + $doc = XML::LibXML->new->parse_file($url); + + $xmlschema = XML::LibXML::Schema->new( location => $filename_or_url, no_network => 1 ); + $xmlschema = XML::LibXML::Schema->new( string => $xmlschemastring, no_network => 1 ); + eval { $xmlschema->validate( $doc ); }; + +=head1 DESCRIPTION + +The XML::LibXML::Schema class is a tiny frontend to libxml2's XML Schema +implementation. Currently it supports only schema parsing and document +validation. As of 2.6.32, libxml2 only supports decimal types up to 24 digits +(the standard requires at least 18). + + +=head1 METHODS + +=over 4 + +=item new + + $xmlschema = XML::LibXML::Schema->new( location => $filename_or_url, no_network => 1 ); + $xmlschema = XML::LibXML::Schema->new( string => $xmlschemastring, no_network => 1 ); + +The constructor of XML::LibXML::Schema needs to be called with list of +parameters. At least location or string parameter is required to specify source +of schema. Optional parameter no_network set to 1 cause that parser would not +access network and optional parameter recover set 1 cause that parser would not +call die() on errors. + +It is important, that each schema only have a single source. + +The location parameter allows one to parse a schema from the filesystem or a +(non-HTTPS) URL. + +The string parameter will parse the schema from the given XML string. + +Note that the constructor will die() if the schema does not meed the +constraints of the XML Schema specification. + + +=item validate + + eval { $xmlschema->validate( $doc ); }; + +This function allows one to validate a (parsed) document against the given XML +Schema. The argument of this function should be a L<<<<<< XML::LibXML::Document >>>>>> object. If this function succeeds, it will return 0, otherwise it will die() +and report the errors found. Because of this validate() should be always +evaluated. + + + +=back + +=head1 AUTHORS + +Matt Sergeant, +Christian Glahn, +Petr Pajas + + +=head1 VERSION + +2.0207 + +=head1 COPYRIGHT + +2001-2007, AxKit.com Ltd. + +2002-2006, Christian Glahn. + +2006-2009, Petr Pajas. + +=cut + + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. + diff --git a/lib/XML/LibXML/Text.pod b/lib/XML/LibXML/Text.pod new file mode 100644 index 0000000..be65280 --- /dev/null +++ b/lib/XML/LibXML/Text.pod @@ -0,0 +1,190 @@ +=head1 NAME + +XML::LibXML::Text - XML::LibXML Class for Text Nodes + +=head1 SYNOPSIS + + + + use XML::LibXML; + # Only methods specific to Text nodes are listed here, + # see the XML::LibXML::Node manpage for other methods + + $text = XML::LibXML::Text->new( $content ); + $nodedata = $text->data; + $text->setData( $text_content ); + $text->substringData($offset, $length); + $text->appendData( $somedata ); + $text->insertData($offset, $string); + $text->deleteData($offset, $length); + $text->deleteDataString($remstring, $all); + $text->replaceData($offset, $length, $string); + $text->replaceDataString($old, $new, $flag); + $text->replaceDataRegEx( $search_cond, $replace_cond, $reflags ); + +=head1 DESCRIPTION + +Unlike the DOM specification, XML::LibXML implements the text node as the base +class of all character data node. Therefore there exists no CharacterData +class. This allows one to apply methods of text nodes also to Comments and +CDATA-sections. + + +=head1 METHODS + +The class inherits from L<<<<<< XML::LibXML::Node >>>>>>. The documentation for Inherited methods is not listed here. + +Many functions listed here are extensively documented in the DOM Level 3 specification (L<<<<<< http://www.w3.org/TR/DOM-Level-3-Core/ >>>>>>). Please refer to the specification for extensive documentation. + +=over 4 + +=item new + + $text = XML::LibXML::Text->new( $content ); + +The constructor of the class. It creates an unbound text node. + + +=item data + + $nodedata = $text->data; + +Although there exists the C<<<<<< nodeValue >>>>>> attribute in the Node class, the DOM specification defines data as a separate +attribute. C<<<<<< XML::LibXML >>>>>> implements these two attributes not as different attributes, but as aliases, +such as C<<<<<< libxml2 >>>>>> does. Therefore + + + + $text->data; + +and + + + + $text->nodeValue; + +will have the same result and are not different entities. + + +=item setData($string) + + $text->setData( $text_content ); + +This function sets or replaces text content to a node. The node has to be of +the type "text", "cdata" or "comment". + + +=item substringData($offset,$length) + + $text->substringData($offset, $length); + +Extracts a range of data from the node. (DOM Spec) This function takes the two +parameters $offset and $length and returns the sub-string, if available. + +If the node contains no data or $offset refers to an non-existing string index, +this function will return I<<<<<< undef >>>>>>. If $length is out of range C<<<<<< substringData >>>>>> will return the data starting at $offset instead of causing an error. + + +=item appendData($string) + + $text->appendData( $somedata ); + +Appends a string to the end of the existing data. If the current text node +contains no data, this function has the same effect as C<<<<<< setData >>>>>>. + + +=item insertData($offset,$string) + + $text->insertData($offset, $string); + +Inserts the parameter $string at the given $offset of the existing data of the +node. This operation will not remove existing data, but change the order of the +existing data. + +The $offset has to be a positive value. If $offset is out of range, C<<<<<< insertData >>>>>> will have the same behaviour as C<<<<<< appendData >>>>>>. + + +=item deleteData($offset, $length) + + $text->deleteData($offset, $length); + +This method removes a chunk from the existing node data at the given offset. +The $length parameter tells, how many characters should be removed from the +string. + + +=item deleteDataString($string, [$all]) + + $text->deleteDataString($remstring, $all); + +This method removes a chunk from the existing node data. Since the DOM spec is +quite unhandy if you already know C<<<<<< which >>>>>> string to remove from a text node, this method allows more perlish code :) + +The functions takes two parameters: I<<<<<< $string >>>>>> and optional the I<<<<<< $all >>>>>> flag. If $all is not set, I<<<<<< undef >>>>>> or I<<<<<< 0 >>>>>>, C<<<<<< deleteDataString >>>>>> will remove only the first occurrence of $string. If $all is I<<<<<< TRUE >>>>>>C<<<<<< deleteDataString >>>>>> will remove all occurrences of I<<<<<< $string >>>>>> from the node data. + + +=item replaceData($offset, $length, $string) + + $text->replaceData($offset, $length, $string); + +The DOM style version to replace node data. + + +=item replaceDataString($oldstring, $newstring, [$all]) + + $text->replaceDataString($old, $new, $flag); + +The more programmer friendly version of replaceData() :) + +Instead of giving offsets and length one can specify the exact string (I<<<<<< $oldstring >>>>>>) to be replaced. Additionally the I<<<<<< $all >>>>>> flag allows one to replace all occurrences of I<<<<<< $oldstring >>>>>>. + + +=item replaceDataRegEx( $search_cond, $replace_cond, $reflags ) + + $text->replaceDataRegEx( $search_cond, $replace_cond, $reflags ); + +This method replaces the node's data by a C<<<<<< simple >>>>>> regular expression. Optional, this function allows one to pass some flags that +will be added as flag to the replace statement. + +I<<<<<< NOTE: >>>>>> This is a shortcut for + + + + my $datastr = $node->getData(); + $datastr =~ s/somecond/replacement/g; # 'g' is just an example for any flag + $node->setData( $datastr ); + +This function can make things easier to read for simple replacements. For more +complex variants it is recommended to use the code snippet above. + + + +=back + +=head1 AUTHORS + +Matt Sergeant, +Christian Glahn, +Petr Pajas + + +=head1 VERSION + +2.0207 + +=head1 COPYRIGHT + +2001-2007, AxKit.com Ltd. + +2002-2006, Christian Glahn. + +2006-2009, Petr Pajas. + +=cut + + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. + diff --git a/lib/XML/LibXML/XPathContext.pm b/lib/XML/LibXML/XPathContext.pm new file mode 100644 index 0000000..10e62ed --- /dev/null +++ b/lib/XML/LibXML/XPathContext.pm @@ -0,0 +1,147 @@ +# $Id: XPathContext.pm 422 2002-11-08 17:10:30Z phish $ +# +# This is free software, you may use it and distribute it under the same terms as +# Perl itself. +# +# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas +# +# + +package XML::LibXML::XPathContext; + +use strict; +use warnings; +use vars qw($VERSION @ISA $USE_LIBXML_DATA_TYPES); + +use Carp; +use XML::LibXML; +use XML::LibXML::NodeList; + +$VERSION = "2.0207"; # VERSION TEMPLATE: DO NOT CHANGE + +# should LibXML XPath data types be used for simple objects +# when passing parameters to extension functions (default: no) +$USE_LIBXML_DATA_TYPES = 0; + +sub CLONE_SKIP { 1 } + +sub findnodes { + my ($self, $xpath, $node) = @_; + + my @nodes = $self->_guarded_find_call('_findnodes', $node, $xpath); + + if (wantarray) { + return @nodes; + } + else { + return XML::LibXML::NodeList->new(@nodes); + } +} + +sub find { + my ($self, $xpath, $node) = @_; + + my ($type, @params) = $self->_guarded_find_call('_find', $node, $xpath,0); + + if ($type) { + return $type->new(@params); + } + return undef; +} + +sub exists { + my ($self, $xpath, $node) = @_; + my (undef, $value) = $self->_guarded_find_call('_find', $node, $xpath,1); + return $value; +} + +sub findvalue { + my $self = shift; + return $self->find(@_)->to_literal->value; +} + +sub _guarded_find_call { + my ($self, $method, $node)=(shift,shift,shift); + + my $prev_node; + if (ref($node)) { + $prev_node = $self->getContextNode(); + $self->setContextNode($node); + } + my @ret; + eval { + @ret = $self->$method(@_); + }; + $self->_free_node_pool; + $self->setContextNode($prev_node) if ref($node); + + if ($@) { + my $err = $@; + chomp $err; + croak $err; + } + + return @ret; +} + +sub registerFunction { + my ($self, $name, $sub) = @_; + $self->registerFunctionNS($name, undef, $sub); + return; +} + +sub unregisterNs { + my ($self, $prefix) = @_; + $self->registerNs($prefix, undef); + return; +} + +sub unregisterFunction { + my ($self, $name) = @_; + $self->registerFunctionNS($name, undef, undef); + return; +} + +sub unregisterFunctionNS { + my ($self, $name, $ns) = @_; + $self->registerFunctionNS($name, $ns, undef); + return; +} + +sub unregisterVarLookupFunc { + my ($self) = @_; + $self->registerVarLookupFunc(undef, undef); + return; +} + +# extension function perl dispatcher +# borrowed from XML::LibXSLT + +sub _perl_dispatcher { + my $func = shift; + my @params = @_; + my @perlParams; + + my $i = 0; + while (@params) { + my $type = shift(@params); + if ($type eq 'XML::LibXML::Literal' or + $type eq 'XML::LibXML::Number' or + $type eq 'XML::LibXML::Boolean') + { + my $val = shift(@params); + unshift(@perlParams, $USE_LIBXML_DATA_TYPES ? $type->new($val) : $val); + } + elsif ($type eq 'XML::LibXML::NodeList') { + my $node_count = shift(@params); + unshift(@perlParams, $type->new(splice(@params, 0, $node_count))); + } + } + + $func = "main::$func" unless ref($func) || $func =~ /(.+)::/; + no strict 'refs'; + my $res = $func->(@perlParams); + return $res; +} + +1; diff --git a/lib/XML/LibXML/XPathContext.pod b/lib/XML/LibXML/XPathContext.pod new file mode 100644 index 0000000..0b8f81c --- /dev/null +++ b/lib/XML/LibXML/XPathContext.pod @@ -0,0 +1,382 @@ +=head1 NAME + +XML::LibXML::XPathContext - XPath Evaluation + +=head1 SYNOPSIS + + my $xpc = XML::LibXML::XPathContext->new(); + my $xpc = XML::LibXML::XPathContext->new($node); + $xpc->registerNs($prefix, $namespace_uri) + $xpc->unregisterNs($prefix) + $uri = $xpc->lookupNs($prefix) + $xpc->registerVarLookupFunc($callback, $data) + $data = $xpc->getVarLookupData(); + $callback = $xpc->getVarLookupFunc(); + $xpc->unregisterVarLookupFunc($name); + $xpc->registerFunctionNS($name, $uri, $callback) + $xpc->unregisterFunctionNS($name, $uri) + $xpc->registerFunction($name, $callback) + $xpc->unregisterFunction($name) + @nodes = $xpc->findnodes($xpath) + @nodes = $xpc->findnodes($xpath, $context_node ) + $nodelist = $xpc->findnodes($xpath, $context_node ) + $object = $xpc->find($xpath ) + $object = $xpc->find($xpath, $context_node ) + $value = $xpc->findvalue($xpath ) + $value = $xpc->findvalue($xpath, $context_node ) + $bool = $xpc->exists( $xpath_expression, $context_node ); + $xpc->setContextNode($node) + my $node = $xpc->getContextNode; + $xpc->setContextPosition($position) + my $position = $xpc->getContextPosition; + $xpc->setContextSize($size) + my $size = $xpc->getContextSize; + $xpc->setContextNode($node) + +=head1 DESCRIPTION + +The XML::LibXML::XPathContext class provides an almost complete interface to +libxml2's XPath implementation. With XML::LibXML::XPathContext, it is possible +to evaluate XPath expressions in the context of arbitrary node, context size, +and context position, with a user-defined namespace-prefix mapping, custom +XPath functions written in Perl, and even a custom XPath variable resolver. + + +=head1 EXAMPLES + + +=head2 Namespaces + +This example demonstrates C<<<<<< registerNs() >>>>>> method. It finds all paragraph nodes in an XHTML document. + + + + my $xc = XML::LibXML::XPathContext->new($xhtml_doc); + $xc->registerNs('xhtml', 'http://www.w3.org/1999/xhtml'); + my @nodes = $xc->findnodes('//xhtml:p'); + + +=head2 Custom XPath functions + +This example demonstrates C<<<<<< registerFunction() >>>>>> method by defining a function filtering nodes based on a Perl regular +expression: + + + + sub grep_nodes { + my ($nodelist,$regexp) = @_; + my $result = XML::LibXML::NodeList->new; + for my $node ($nodelist->get_nodelist()) { + $result->push($node) if $node->textContent =~ $regexp; + } + return $result; + }; + + my $xc = XML::LibXML::XPathContext->new($node); + $xc->registerFunction('grep_nodes', \&grep_nodes); + my @nodes = $xc->findnodes('//section[grep_nodes(para,"\bsearch(ing|es)?\b")]'); + + +=head2 Variables + +This example demonstrates C<<<<<< registerVarLookup() >>>>>> method. We use XPath variables to recycle results of previous evaluations: + + + + sub var_lookup { + my ($varname,$ns,$data)=@_; + return $data->{$varname}; + } + + my $areas = XML::LibXML->new->parse_file('areas.xml'); + my $empl = XML::LibXML->new->parse_file('employees.xml'); + + my $xc = XML::LibXML::XPathContext->new($empl); + + my %variables = ( + A => $xc->find('/employees/employee[@salary>10000]'), + B => $areas->find('/areas/area[district='Brooklyn']/street'), + ); + + # get names of employees from $A working in an area listed in $B + $xc->registerVarLookupFunc(\&var_lookup, \%variables); + my @nodes = $xc->findnodes('$A[work_area/street = $B]/name'); + + +=head1 METHODS + +=over 4 + +=item new + + my $xpc = XML::LibXML::XPathContext->new(); + +Creates a new XML::LibXML::XPathContext object without a context node. + + my $xpc = XML::LibXML::XPathContext->new($node); + +Creates a new XML::LibXML::XPathContext object with the context node set to C<<<<<< $node >>>>>>. + + +=item registerNs + + $xpc->registerNs($prefix, $namespace_uri) + +Registers namespace C<<<<<< $prefix >>>>>> to C<<<<<< $namespace_uri >>>>>>. + + +=item unregisterNs + + $xpc->unregisterNs($prefix) + +Unregisters namespace C<<<<<< $prefix >>>>>>. + + +=item lookupNs + + $uri = $xpc->lookupNs($prefix) + +Returns namespace URI registered with C<<<<<< $prefix >>>>>>. If C<<<<<< $prefix >>>>>> is not registered to any namespace URI returns C<<<<<< undef >>>>>>. + + +=item registerVarLookupFunc + + $xpc->registerVarLookupFunc($callback, $data) + +Registers variable lookup function C<<<<<< $callback >>>>>>. The registered function is executed by the XPath engine each time an XPath +variable is evaluated. It takes three arguments: C<<<<<< $data >>>>>>, variable name, and variable ns-URI and must return one value: a number or +string or any C<<<<<< XML::LibXML:: >>>>>> object that can be a result of findnodes: Boolean, Literal, Number, Node (e.g. +Document, Element, etc.), or NodeList. For convenience, simple (non-blessed) +array references containing only L<<<<<< XML::LibXML::Node >>>>>> objects can be used instead of an L<<<<<< XML::LibXML::NodeList >>>>>>. + + +=item getVarLookupData + + $data = $xpc->getVarLookupData(); + +Returns the data that have been associated with a variable lookup function +during a previous call to C<<<<<< registerVarLookupFunc >>>>>>. + + +=item getVarLookupFunc + + $callback = $xpc->getVarLookupFunc(); + +Returns the variable lookup function previously registered with C<<<<<< registerVarLookupFunc >>>>>>. + + +=item unregisterVarLookupFunc + + $xpc->unregisterVarLookupFunc($name); + +Unregisters variable lookup function and the associated lookup data. + + +=item registerFunctionNS + + $xpc->registerFunctionNS($name, $uri, $callback) + +Registers an extension function C<<<<<< $name >>>>>> in C<<<<<< $uri >>>>>> namespace. C<<<<<< $callback >>>>>> must be a CODE reference. The arguments of the callback function are either +simple scalars or C<<<<<< XML::LibXML::* >>>>>> objects depending on the XPath argument types. The function is responsible for +checking the argument number and types. Result of the callback code must be a +single value of the following types: a simple scalar (number, string) or an +arbitrary C<<<<<< XML::LibXML::* >>>>>> object that can be a result of findnodes: Boolean, Literal, Number, Node (e.g. +Document, Element, etc.), or NodeList. For convenience, simple (non-blessed) +array references containing only L<<<<<< XML::LibXML::Node >>>>>> objects can be used instead of a L<<<<<< XML::LibXML::NodeList >>>>>>. + + +=item unregisterFunctionNS + + $xpc->unregisterFunctionNS($name, $uri) + +Unregisters extension function C<<<<<< $name >>>>>> in C<<<<<< $uri >>>>>> namespace. Has the same effect as passing C<<<<<< undef >>>>>> as C<<<<<< $callback >>>>>> to registerFunctionNS. + + +=item registerFunction + + $xpc->registerFunction($name, $callback) + +Same as C<<<<<< registerFunctionNS >>>>>> but without a namespace. + + +=item unregisterFunction + + $xpc->unregisterFunction($name) + +Same as C<<<<<< unregisterFunctionNS >>>>>> but without a namespace. + + +=item findnodes + + @nodes = $xpc->findnodes($xpath) + + @nodes = $xpc->findnodes($xpath, $context_node ) + + $nodelist = $xpc->findnodes($xpath, $context_node ) + +Performs the xpath statement on the current node and returns the result as an +array. In scalar context, returns an L<<<<<< XML::LibXML::NodeList >>>>>> object. Optionally, a node may be passed as a second argument to set the +context node for the query. + +The xpath expression can be passed either as a string, or as a L<<<<<< XML::LibXML::XPathExpression >>>>>> object. + + +=item find + + $object = $xpc->find($xpath ) + + $object = $xpc->find($xpath, $context_node ) + +Performs the xpath expression using the current node as the context of the +expression, and returns the result depending on what type of result the XPath +expression had. For example, the XPath C<<<<<< 1 * 3 + 52 >>>>>> results in an L<<<<<< XML::LibXML::Number >>>>>> object being returned. Other expressions might return a L<<<<<< XML::LibXML::Boolean >>>>>> object, or a L<<<<<< XML::LibXML::Literal >>>>>> object (a string). Each of those objects uses Perl's overload feature to ``do +the right thing'' in different contexts. Optionally, a node may be passed as a +second argument to set the context node for the query. + +The xpath expression can be passed either as a string, or as a L<<<<<< XML::LibXML::XPathExpression >>>>>> object. + + +=item findvalue + + $value = $xpc->findvalue($xpath ) + + $value = $xpc->findvalue($xpath, $context_node ) + +Is exactly equivalent to: + + + + $xpc->find( $xpath, $context_node )->to_literal; + +That is, it returns the literal value of the results. This enables you to +ensure that you get a string back from your search, allowing certain shortcuts. +This could be used as the equivalent of . +Optionally, a node may be passed in the second argument to set the context node +for the query. + +The xpath expression can be passed either as a string, or as a L<<<<<< XML::LibXML::XPathExpression >>>>>> object. + + +=item exists + + $bool = $xpc->exists( $xpath_expression, $context_node ); + +This method behaves like I<<<<<< findnodes >>>>>>, except that it only returns a boolean value (1 if the expression matches a +node, 0 otherwise) and may be faster than I<<<<<< findnodes >>>>>>, because the XPath evaluation may stop early on the first match (this is true +for libxml2 >= 2.6.27). + +For XPath expressions that do not return node-set, the method returns true if +the returned value is a non-zero number or a non-empty string. + + +=item setContextNode + + $xpc->setContextNode($node) + +Set the current context node. + + +=item getContextNode + + my $node = $xpc->getContextNode; + +Get the current context node. + + +=item setContextPosition + + $xpc->setContextPosition($position) + +Set the current context position. By default, this value is -1 (and evaluating +XPath function C<<<<<< position() >>>>>> in the initial context raises an XPath error), but can be set to any value up +to context size. This usually only serves to cheat the XPath engine to return +given position when C<<<<<< position() >>>>>> XPath function is called. Setting this value to -1 restores the default +behavior. + + +=item getContextPosition + + my $position = $xpc->getContextPosition; + +Get the current context position. + + +=item setContextSize + + $xpc->setContextSize($size) + +Set the current context size. By default, this value is -1 (and evaluating +XPath function C<<<<<< last() >>>>>> in the initial context raises an XPath error), but can be set to any +non-negative value. This usually only serves to cheat the XPath engine to +return the given value when C<<<<<< last() >>>>>> XPath function is called. If context size is set to 0, position is +automatically also set to 0. If context size is positive, position is +automatically set to 1. Setting context size to -1 restores the default +behavior. + + +=item getContextSize + + my $size = $xpc->getContextSize; + +Get the current context size. + + +=item setContextNode + + $xpc->setContextNode($node) + +Set the current context node. + + + +=back + + +=head1 BUGS AND CAVEATS + +XML::LibXML::XPathContext objects I<<<<<< are >>>>>> reentrant, meaning that you can call methods of an XML::LibXML::XPathContext +even from XPath extension functions registered with the same object or from a +variable lookup function. On the other hand, you should rather avoid +registering new extension functions, namespaces and a variable lookup function +from within extension functions and a variable lookup function, unless you want +to experience untested behavior. + + +=head1 AUTHORS + +Ilya Martynov and Petr Pajas, based on XML::LibXML and XML::LibXSLT code by +Matt Sergeant and Christian Glahn. + + +=head1 HISTORICAL REMARK + +Prior to XML::LibXML 1.61 this module was distributed separately for +maintenance reasons. + +=head1 AUTHORS + +Matt Sergeant, +Christian Glahn, +Petr Pajas + + +=head1 VERSION + +2.0207 + +=head1 COPYRIGHT + +2001-2007, AxKit.com Ltd. + +2002-2006, Christian Glahn. + +2006-2009, Petr Pajas. + +=cut + + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. + diff --git a/lib/XML/LibXML/XPathExpression.pod b/lib/XML/LibXML/XPathExpression.pod new file mode 100644 index 0000000..5ff7b0d --- /dev/null +++ b/lib/XML/LibXML/XPathExpression.pod @@ -0,0 +1,72 @@ +=head1 NAME + +XML::LibXML::XPathExpression - XML::LibXML::XPathExpression - interface to libxml2 pre-compiled XPath expressions + +=head1 SYNOPSIS + + + + use XML::LibXML; + my $compiled_xpath = XML::LibXML::XPathExpression->new('//foo[@bar="baz"][position()<4]'); + + # interface from XML::LibXML::Node + + my $result = $node->find($compiled_xpath); + my @nodes = $node->findnodes($compiled_xpath); + my $value = $node->findvalue($compiled_xpath); + + # interface from XML::LibXML::XPathContext + + my $result = $xpc->find($compiled_xpath,$node); + my @nodes = $xpc->findnodes($compiled_xpath,$node); + my $value = $xpc->findvalue($compiled_xpath,$node); + + $compiled = XML::LibXML::XPathExpression->new( xpath_string ); + +=head1 DESCRIPTION + +This is a perl interface to libxml2's pre-compiled XPath expressions. +Pre-compiling an XPath expression can give in some performance benefit if the +same XPath query is evaluated many times. C<<<<<< XML::LibXML::XPathExpression >>>>>> objects can be passed to all C<<<<<< find... >>>>>> functions C<<<<<< XML::LibXML >>>>>> that expect an XPath expression. + +=over 4 + +=item new() + + $compiled = XML::LibXML::XPathExpression->new( xpath_string ); + +The constructor takes an XPath 1.0 expression as a string and returns an object +representing the pre-compiled expressions (the actual data structure is +internal to libxml2). + + + +=back + +=head1 AUTHORS + +Matt Sergeant, +Christian Glahn, +Petr Pajas + + +=head1 VERSION + +2.0207 + +=head1 COPYRIGHT + +2001-2007, AxKit.com Ltd. + +2002-2006, Christian Glahn. + +2006-2009, Petr Pajas. + +=cut + + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. + diff --git a/perl-libxml-mm.c b/perl-libxml-mm.c new file mode 100644 index 0000000..b433c5c --- /dev/null +++ b/perl-libxml-mm.c @@ -0,0 +1,1324 @@ +/** + * perl-libxml-mm.c + * $Id$ + * + * This is free software, you may use it and distribute it under the same terms as + * Perl itself. + * + * Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas +*/ + +/* + * + * Basic concept: + * perl varies in the implementation of UTF8 handling. this header (together + * with the c source) implements a few functions, that can be used from within + * the core module inorder to avoid cascades of c pragmas + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +#include "perl-libxml-mm.h" + +#include "XSUB.h" +#include "ppport.h" +#include + +#ifdef XML_LIBXML_GDOME_SUPPORT + +#include +#include + +#endif + +#include "perl-libxml-sax.h" + +#ifdef __cplusplus +} +#endif + +/** + * this is a wrapper function that does the type evaluation for the + * node. this makes the code a little more readable in the .XS + * + * the code is not really portable, but i think we'll avoid some + * memory leak problems that way. + **/ +const char* +PmmNodeTypeName( xmlNodePtr elem ){ + const char *name = "XML::LibXML::Node"; + + if ( elem != NULL ) { + switch ( elem->type ) { + case XML_ELEMENT_NODE: + name = "XML::LibXML::Element"; + break; + case XML_TEXT_NODE: + name = "XML::LibXML::Text"; + break; + case XML_COMMENT_NODE: + name = "XML::LibXML::Comment"; + break; + case XML_CDATA_SECTION_NODE: + name = "XML::LibXML::CDATASection"; + break; + case XML_ATTRIBUTE_NODE: + name = "XML::LibXML::Attr"; + break; + case XML_DOCUMENT_NODE: + case XML_HTML_DOCUMENT_NODE: + name = "XML::LibXML::Document"; + break; + case XML_DOCUMENT_FRAG_NODE: + name = "XML::LibXML::DocumentFragment"; + break; + case XML_NAMESPACE_DECL: + name = "XML::LibXML::Namespace"; + break; + case XML_DTD_NODE: + name = "XML::LibXML::Dtd"; + break; + case XML_PI_NODE: + name = "XML::LibXML::PI"; + break; + default: + name = "XML::LibXML::Node"; + break; + }; + return name; + } + return ""; +} + +/* + * free a hash table + */ +void +PmmFreeHashTable(xmlHashTablePtr table) +{ + if( xmlHashSize(table) > 0 ) { + warn("PmmFreeHashTable: not empty\n"); + /* PmmDumpRegistry(table); */ + } + /* warn("Freeing table %p with %d elements in\n", table, xmlHashSize(table)); */ + xmlHashFree(table, NULL); +} + +#ifdef XML_LIBXML_THREADS + +/* + * registry of all current proxy nodes + * + * other classes like XML::LibXSLT must get a pointer + * to this registry via XML::LibXML::__proxy_registry + * + */ +extern SV* PROXY_NODE_REGISTRY_MUTEX; + +/* Utility method used by PmmDumpRegistry */ +void PmmRegistryDumpHashScanner(void * payload, void * data, xmlChar * name) +{ + LocalProxyNodePtr lp = (LocalProxyNodePtr) payload; + ProxyNodePtr node = (ProxyNodePtr) lp->proxy; + const char * CLASS = PmmNodeTypeName( PmmNODE(node) ); + warn("%s=%p with %d references (%d perl)\n",CLASS,node,PmmREFCNT(node),lp->count); +} + +/* + * dump the current thread's node registry to STDERR + */ +void +PmmDumpRegistry(xmlHashTablePtr r) +{ + if( r ) + { + SvLOCK(PROXY_NODE_REGISTRY_MUTEX); + warn("%d total nodes\n", xmlHashSize(r)); + xmlHashScan(r, PmmRegistryDumpHashScanner, NULL); + SvUNLOCK(PROXY_NODE_REGISTRY_MUTEX); + } +} + +/* + * returns the address of the proxy registry + */ +xmlHashTablePtr* +PmmProxyNodeRegistryPtr(ProxyNodePtr proxy) +{ + croak("PmmProxyNodeRegistryPtr: TODO!\n"); + return NULL; + /* return &PmmREGISTRY; */ +} + +/* + * efficiently generate a string representation of the given pointer + */ +#define _PMM_HASH_NAME_SIZE(n) n+(n>>3)+(n%8>0 ? 1 : 0) +xmlChar * +PmmRegistryName(void * ptr) +{ + unsigned long int v = (unsigned long int) ptr; + int HASH_NAME_SIZE = _PMM_HASH_NAME_SIZE(sizeof(void*)); + xmlChar * name; + int i; + + name = (xmlChar *) safemalloc(HASH_NAME_SIZE+1); + + for(i = 0; i < HASH_NAME_SIZE; ++i) + { + name[i] = (xmlChar) (128 | v); + v >>= 7; + } + name[HASH_NAME_SIZE] = '\0'; + + return name; +} + +/* + * allocate and return a new LocalProxyNode structure + */ +LocalProxyNodePtr +PmmNewLocalProxyNode(ProxyNodePtr proxy) +{ + LocalProxyNodePtr lp; + Newc(0, lp, 1, LocalProxyNode, LocalProxyNode); + lp->proxy = proxy; + lp->count = 0; + return lp; +} + +/* + * @proxy: proxy node to register + * + * adds a proxy node to the proxy node registry + */ +LocalProxyNodePtr +PmmRegisterProxyNode(ProxyNodePtr proxy) +{ + xmlChar * name = PmmRegistryName( proxy ); + LocalProxyNodePtr lp = PmmNewLocalProxyNode( proxy ); + /* warn("LibXML registers proxy node with %p\n",PmmREGISTRY); */ + SvLOCK(PROXY_NODE_REGISTRY_MUTEX); + if( xmlHashAddEntry(PmmREGISTRY, name, lp) ) + croak("PmmRegisterProxyNode: error adding node to hash, hash size is %d\n",xmlHashSize(PmmREGISTRY)); + SvUNLOCK(PROXY_NODE_REGISTRY_MUTEX); + Safefree(name); + return lp; +} + +/* utility method for PmmUnregisterProxyNode */ +/* PP: originally this was static inline void, but on AIX the compiler + did not chew it, so I'm removing the inline */ +static void +PmmRegistryHashDeallocator(void *payload, xmlChar *name) +{ + Safefree((LocalProxyNodePtr) payload); +} + +/* + * @proxy: proxy node to remove + * + * removes a proxy node from the proxy node registry + */ +void +PmmUnregisterProxyNode(ProxyNodePtr proxy) +{ + xmlChar * name = PmmRegistryName( proxy ); + /* warn("LibXML unregistering proxy node with %p\n",PmmREGISTRY); */ + SvLOCK(PROXY_NODE_REGISTRY_MUTEX); + if( xmlHashRemoveEntry(PmmREGISTRY, name, PmmRegistryHashDeallocator) ) + croak("PmmUnregisterProxyNode: error removing node from hash\n"); + Safefree(name); + SvUNLOCK(PROXY_NODE_REGISTRY_MUTEX); +} + +/* + * lookup a LocalProxyNode in the registry + */ +LocalProxyNodePtr +PmmRegistryLookup(ProxyNodePtr proxy) +{ + xmlChar * name = PmmRegistryName( proxy ); + LocalProxyNodePtr lp = xmlHashLookup(PmmREGISTRY, name); + Safefree(name); + return lp; +} + +/* + * increment the local refcount for proxy + */ +void +PmmRegistryREFCNT_inc(ProxyNodePtr proxy) +{ + /* warn("Registry inc\n"); */ + LocalProxyNodePtr lp = PmmRegistryLookup( proxy ); + if( lp ) + lp->count++; + else + PmmRegisterProxyNode( proxy )->count++; +} + +/* + * decrement the local refcount for proxy and remove the local pointer if zero + */ +void +PmmRegistryREFCNT_dec(ProxyNodePtr proxy) +{ + /* warn("Registry dec\n"); */ + LocalProxyNodePtr lp = PmmRegistryLookup(proxy); + if( lp && --(lp->count) == 0 ) + PmmUnregisterProxyNode(proxy); +} + +/* + * internal, used by PmmCloneProxyNodes + */ +void * +PmmRegistryHashCopier(void *payload, xmlChar *name) +{ + ProxyNodePtr proxy = ((LocalProxyNodePtr) payload)->proxy; + LocalProxyNodePtr lp; + Newc(0, lp, 1, LocalProxyNode, LocalProxyNode); + memcpy(lp, payload, sizeof(LocalProxyNode)); + PmmREFCNT_inc(proxy); + return lp; +} + +/* + * increments all proxy node counters by one (called on thread spawn) + */ +void +PmmCloneProxyNodes() +{ + SV *sv_reg = get_sv("XML::LibXML::__PROXY_NODE_REGISTRY",0); + xmlHashTablePtr reg_copy; + SvLOCK(PROXY_NODE_REGISTRY_MUTEX); + reg_copy = xmlHashCopy(PmmREGISTRY, PmmRegistryHashCopier); + SvIV_set(SvRV(sv_reg), PTR2IV(reg_copy)); + SvUNLOCK(PROXY_NODE_REGISTRY_MUTEX); +} + +/* + * returns the current number of proxy nodes in the registry + */ +int +PmmProxyNodeRegistrySize() +{ + return xmlHashSize(PmmREGISTRY); +} + +#endif /* end of XML_LIBXML_THREADS */ + +/* creates a new proxy node from a given node. this function is aware + * about the fact that a node may already has a proxy structure. + */ +ProxyNodePtr +PmmNewNode(xmlNodePtr node) +{ + ProxyNodePtr proxy = NULL; + + if ( node == NULL ) { + xs_warn( "PmmNewNode: no node found\n" ); + return NULL; + } + + if ( node->_private == NULL ) { + switch ( node->type ) { + case XML_DOCUMENT_NODE: + case XML_HTML_DOCUMENT_NODE: + case XML_DOCB_DOCUMENT_NODE: + proxy = (ProxyNodePtr)xmlMalloc(sizeof(struct _DocProxyNode)); + if (proxy != NULL) { + ((DocProxyNodePtr)proxy)->psvi_status = Pmm_NO_PSVI; + SetPmmENCODING(proxy, XML_CHAR_ENCODING_NONE); + } + break; + default: + proxy = (ProxyNodePtr)xmlMalloc(sizeof(struct _ProxyNode)); + break; + } + if (proxy != NULL) { + proxy->node = node; + proxy->owner = NULL; + proxy->count = 0; + node->_private = (void*) proxy; + } + } + else { + proxy = (ProxyNodePtr)node->_private; + } + + return proxy; +} + +ProxyNodePtr +PmmNewFragment(xmlDocPtr doc) +{ + ProxyNodePtr retval = NULL; + xmlNodePtr frag = NULL; + + xs_warn("PmmNewFragment: new frag\n"); + frag = xmlNewDocFragment( doc ); + retval = PmmNewNode(frag); + /* fprintf(stderr, "REFCNT NOT incremented on frag: 0x%08.8X\n", retval); */ + + if ( doc != NULL ) { + xs_warn("PmmNewFragment: inc document\n"); + /* under rare circumstances _private is not set correctly? */ + if ( doc->_private != NULL ) { + xs_warn("PmmNewFragment: doc->_private being incremented!\n"); + PmmREFCNT_inc(((ProxyNodePtr)doc->_private)); + /* fprintf(stderr, "REFCNT incremented on doc: 0x%08.8X\n", doc->_private); */ + } + retval->owner = (xmlNodePtr)doc; + } + + return retval; +} + +/* frees the node if necessary. this method is aware that libxml2 + * has several different nodetypes. + */ +void +PmmFreeNode( xmlNodePtr node ) +{ + switch( node->type ) { + case XML_DOCUMENT_NODE: + case XML_HTML_DOCUMENT_NODE: + xs_warn("PmmFreeNode: XML_DOCUMENT_NODE\n"); + xmlFreeDoc( (xmlDocPtr) node ); + break; + case XML_ATTRIBUTE_NODE: + xs_warn("PmmFreeNode: XML_ATTRIBUTE_NODE\n"); + if ( node->parent == NULL ) { + xs_warn( "PmmFreeNode: free node!\n"); + node->ns = NULL; + xmlFreeProp( (xmlAttrPtr) node ); + } + break; + case XML_DTD_NODE: + if ( node->doc != NULL ) { + if ( node->doc->extSubset != (xmlDtdPtr)node + && node->doc->intSubset != (xmlDtdPtr)node ) { + xs_warn( "PmmFreeNode: XML_DTD_NODE\n"); + node->doc = NULL; + xmlFreeDtd( (xmlDtdPtr)node ); + } + } else { + xs_warn( "PmmFreeNode: XML_DTD_NODE (no doc)\n"); + xmlFreeDtd( (xmlDtdPtr)node ); + } + break; + case XML_DOCUMENT_FRAG_NODE: + xs_warn("PmmFreeNode: XML_DOCUMENT_FRAG_NODE\n"); + default: + xs_warn( "PmmFreeNode: normal node\n" ); + xmlFreeNode( node); + break; + } +} + +/* decrements the proxy counter. if the counter becomes zero or less, + this method will free the proxy node. If the node is part of a + subtree, PmmREFCNT_dec will fix the reference counts and delete + the subtree if it is not required any more. + */ +int +PmmREFCNT_dec( ProxyNodePtr node ) +{ + xmlNodePtr libnode = NULL; + ProxyNodePtr owner = NULL; + int retval = 0; + + if ( node != NULL ) { + retval = PmmREFCNT(node)--; + /* fprintf(stderr, "REFCNT on 0x%08.8X decremented to %d\n", node, PmmREFCNT(node)); */ + if ( PmmREFCNT(node) < 0 ) + warn( "PmmREFCNT_dec: REFCNT decremented below 0 for %p!", node ); + if ( PmmREFCNT(node) <= 0 ) { + xs_warn( "PmmREFCNT_dec: NODE DELETION\n" ); + + libnode = PmmNODE( node ); + if ( libnode != NULL ) { + if ( libnode->_private != node ) { + xs_warn( "PmmREFCNT_dec: lost node\n" ); + libnode = NULL; + } + else { + libnode->_private = NULL; + } + } + + PmmNODE( node ) = NULL; + if ( PmmOWNER(node) && PmmOWNERPO(node) ) { + xs_warn( "PmmREFCNT_dec: DOC NODE!\n" ); + owner = PmmOWNERPO(node); + PmmOWNER( node ) = NULL; + if( libnode != NULL && libnode->parent == NULL ) { + /* this is required if the node does not directly + * belong to the document tree + */ + xs_warn( "PmmREFCNT_dec: REAL DELETE\n" ); + PmmFreeNode( libnode ); + } + xs_warn( "PmmREFCNT_dec: decrease owner\n" ); + PmmREFCNT_dec( owner ); + } + else if ( libnode != NULL ) { + xs_warn( "PmmREFCNT_dec: STANDALONE REAL DELETE\n" ); + + PmmFreeNode( libnode ); + } + else { + xs_warn( "PmmREFCNT_dec: NO OWNER\n" ); + } + xmlFree( node ); + } + } + else { + xs_warn("PmmREFCNT_dec: lost node\n" ); + } + return retval; +} + +/* @node: the node that should be wrapped into a SV + * @owner: perl instance of the owner node (may be NULL) + * + * This function will create a real perl instance of a given node. + * the function is called directly by the XS layer, to generate a perl + * instance of the node. All node reference counts are updated within + * this function. Therefore this function returns a node that can + * directly be used as output. + * + * if @ower is NULL or undefined, the node is ment to be the root node + * of the tree. this node will later be used as an owner of other + * nodes. + */ +SV* +PmmNodeToSv( xmlNodePtr node, ProxyNodePtr owner ) +{ + ProxyNodePtr dfProxy= NULL; + dTHX; + SV * retval = &PL_sv_undef; + const char * CLASS = "XML::LibXML::Node"; + + if ( node != NULL ) { +#ifdef XML_LIBXML_THREADS + if( PmmUSEREGISTRY ) + SvLOCK(PROXY_NODE_REGISTRY_MUTEX); +#endif + /* find out about the class */ + CLASS = PmmNodeTypeName( node ); + xs_warn("PmmNodeToSv: return new perl node of class:\n"); + xs_warn( CLASS ); + + if ( node->_private != NULL ) { + dfProxy = PmmNewNode(node); + /* fprintf(stderr, " at 0x%08.8X\n", dfProxy); */ + } + else { + dfProxy = PmmNewNode(node); + /* fprintf(stderr, " at 0x%08.8X\n", dfProxy); */ + if ( dfProxy != NULL ) { + if ( owner != NULL ) { + dfProxy->owner = PmmNODE( owner ); + PmmREFCNT_inc( owner ); + /* fprintf(stderr, "REFCNT incremented on owner: 0x%08.8X\n", owner); */ + } + else { + xs_warn("PmmNodeToSv: node contains itself (owner==NULL)\n"); + } + } + else { + croak("XML::LibXML: failed to create a proxy node (out of memory?)\n"); + } + } + + retval = NEWSV(0,0); + sv_setref_pv( retval, CLASS, (void*)dfProxy ); +#ifdef XML_LIBXML_THREADS + if( PmmUSEREGISTRY ) + PmmRegistryREFCNT_inc(dfProxy); +#endif + PmmREFCNT_inc(dfProxy); + /* fprintf(stderr, "REFCNT incremented on node: 0x%08.8X\n", dfProxy); */ + + switch ( node->type ) { + case XML_DOCUMENT_NODE: + case XML_HTML_DOCUMENT_NODE: + case XML_DOCB_DOCUMENT_NODE: + if ( ((xmlDocPtr)node)->encoding != NULL ) { + SetPmmENCODING(dfProxy, (int)xmlParseCharEncoding( (const char*)((xmlDocPtr)node)->encoding )); + } + break; + default: + break; + } +#ifdef XML_LIBXML_THREADS + if( PmmUSEREGISTRY ) + SvUNLOCK(PROXY_NODE_REGISTRY_MUTEX); +#endif + } + else { + xs_warn( "PmmNodeToSv: no node found!\n" ); + } + + return retval; +} + + +xmlNodePtr +PmmCloneNode( xmlNodePtr node, int recursive ) +{ + xmlNodePtr retval = NULL; + + if ( node != NULL ) { + switch ( node->type ) { + case XML_ELEMENT_NODE: + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_ENTITY_REF_NODE: + case XML_PI_NODE: + case XML_COMMENT_NODE: + case XML_DOCUMENT_FRAG_NODE: + case XML_ENTITY_DECL: + retval = xmlCopyNode( node, recursive ? 1 : 2 ); + break; + case XML_ATTRIBUTE_NODE: + retval = (xmlNodePtr) xmlCopyProp( NULL, (xmlAttrPtr) node ); + break; + case XML_DOCUMENT_NODE: + case XML_HTML_DOCUMENT_NODE: + retval = (xmlNodePtr) xmlCopyDoc( (xmlDocPtr)node, recursive ); + break; + case XML_DOCUMENT_TYPE_NODE: + case XML_DTD_NODE: + retval = (xmlNodePtr) xmlCopyDtd( (xmlDtdPtr)node ); + break; + case XML_NAMESPACE_DECL: + retval = ( xmlNodePtr ) xmlCopyNamespace( (xmlNsPtr) node ); + break; + default: + break; + } + } + + return retval; +} + +/* extracts the libxml2 node from a perl reference + */ + +xmlNodePtr +PmmSvNodeExt( SV* perlnode, int copy ) +{ + xmlNodePtr retval = NULL; + ProxyNodePtr proxy = NULL; + dTHX; + + if ( perlnode != NULL && perlnode != &PL_sv_undef ) { +/* if ( sv_derived_from(perlnode, "XML::LibXML::Node") */ +/* && SvPROXYNODE(perlnode) != NULL ) { */ +/* retval = PmmNODE( SvPROXYNODE(perlnode) ) ; */ +/* } */ + xs_warn("PmmSvNodeExt: perlnode found\n" ); + if ( sv_derived_from(perlnode, "XML::LibXML::Node") ) { + proxy = SvPROXYNODE(perlnode); + if ( proxy != NULL ) { + xs_warn( "PmmSvNodeExt: is a xmlNodePtr structure\n" ); + retval = PmmNODE( proxy ) ; + } + + if ( retval != NULL + && ((ProxyNodePtr)retval->_private) != proxy ) { + xs_warn( "PmmSvNodeExt: no node in proxy node\n" ); + PmmNODE( proxy ) = NULL; + retval = NULL; + } + } +#ifdef XML_LIBXML_GDOME_SUPPORT + else if ( sv_derived_from( perlnode, "XML::GDOME::Node" ) ) { + GdomeNode* gnode = (GdomeNode*)SvIV((SV*)SvRV( perlnode )); + if ( gnode == NULL ) { + warn( "no XML::GDOME data found (datastructure empty)" ); + } + else { + retval = gdome_xml_n_get_xmlNode( gnode ); + if ( retval == NULL ) { + xs_warn( "PmmSvNodeExt: no XML::LibXML node found in GDOME object\n" ); + } + else if ( copy == 1 ) { + retval = PmmCloneNode( retval, 1 ); + } + } + } +#endif + } + + return retval; +} + +/* extracts the libxml2 owner node from a perl reference + */ +xmlNodePtr +PmmSvOwner( SV* perlnode ) +{ + dTHX; + xmlNodePtr retval = NULL; + if ( perlnode != NULL + && perlnode != &PL_sv_undef + && SvPROXYNODE(perlnode) != NULL ) { + retval = PmmOWNER( SvPROXYNODE(perlnode) ); + } + return retval; +} + +/* reverse to PmmSvOwner(). sets the owner of the current node. this + * will increase the proxy count of the owner. + */ +SV* +PmmSetSvOwner( SV* perlnode, SV* extra ) +{ + dTHX; + if ( perlnode != NULL && perlnode != &PL_sv_undef ) { + PmmOWNER( SvPROXYNODE(perlnode)) = PmmNODE( SvPROXYNODE(extra) ); + PmmREFCNT_inc( SvPROXYNODE(extra) ); + /* fprintf(stderr, "REFCNT incremented on new owner: 0x%08.8X\n", SvPROXYNODE(extra)); */ + } + return perlnode; +} + +void PmmFixOwnerList( xmlNodePtr list, ProxyNodePtr parent ); + +/** + * this functions fixes the reference counts for an entire subtree. + * it is very important to fix an entire subtree after node operations + * where the documents or the owner node may get changed. this method is + * aware about nodes that already belong to a certain owner node. + * + * the method uses the internal methods PmmFixNode and PmmChildNodes to + * do the real updates. + * + * in the worst case this traverses the subtree twice during a node + * operation. this case is only given when the node has to be + * adopted by the document. Since the ownerdocument and the effective + * owner may differ this double traversing makes sense. + */ +int +PmmFixOwner( ProxyNodePtr nodetofix, ProxyNodePtr parent ) +{ + ProxyNodePtr oldParent = NULL; + + if ( nodetofix != NULL ) { + switch ( PmmNODE(nodetofix)->type ) { + case XML_ENTITY_DECL: + case XML_ATTRIBUTE_DECL: + case XML_NAMESPACE_DECL: + case XML_ELEMENT_DECL: + case XML_DOCUMENT_NODE: + xs_warn( "PmmFixOwner: don't need to fix this type of node\n" ); + return(0); + default: + break; + } + + if ( PmmOWNER(nodetofix) != NULL ) { + oldParent = PmmOWNERPO(nodetofix); + } + + /* The owner data is only fixed if the node is neither a + * fragment nor a document. Also no update will happen if + * the node is already his owner or the owner has not + * changed during previous operations. + */ + if( oldParent != parent ) { + xs_warn( "PmmFixOwner: re-parenting node\n" ); + /* fprintf(stderr, " 0x%08.8X (%s)\n", nodetofix, PmmNODE(nodetofix)->name); */ + if ( parent && parent != nodetofix ){ + PmmOWNER(nodetofix) = PmmNODE(parent); + PmmREFCNT_inc( parent ); + /* fprintf(stderr, "REFCNT incremented on new parent: 0x%08.8X\n", parent); */ + } + else { + PmmOWNER(nodetofix) = NULL; + } + + if ( oldParent != NULL && oldParent != nodetofix ) + PmmREFCNT_dec(oldParent); + + if ( PmmNODE(nodetofix)->type != XML_ATTRIBUTE_NODE + && PmmNODE(nodetofix)->type != XML_DTD_NODE + && PmmNODE(nodetofix)->properties != NULL ) { + PmmFixOwnerList( (xmlNodePtr)PmmNODE(nodetofix)->properties, + parent ); + } + + if ( parent == NULL || PmmNODE(nodetofix)->parent == NULL ) { + /* fix to self */ + parent = nodetofix; + } + + PmmFixOwnerList(PmmNODE(nodetofix)->children, parent); + } + else { + xs_warn( "PmmFixOwner: node doesn't need to get fixed\n" ); + } + return(1); + } + return(0); +} + +void +PmmFixOwnerList( xmlNodePtr list, ProxyNodePtr parent ) +{ + if ( list != NULL ) { + xmlNodePtr iterator = list; + while ( iterator != NULL ) { + switch ( iterator->type ) { + case XML_ENTITY_DECL: + case XML_ATTRIBUTE_DECL: + case XML_NAMESPACE_DECL: + case XML_ELEMENT_DECL: + xs_warn( "PmmFixOwnerList: don't need to fix this type of node\n" ); + iterator = iterator->next; + continue; + break; + default: + break; + } + + if ( iterator->_private != NULL ) { + PmmFixOwner( (ProxyNodePtr)iterator->_private, parent ); + } + else { + if ( iterator->type != XML_ATTRIBUTE_NODE + && iterator->properties != NULL ){ + PmmFixOwnerList( (xmlNodePtr)iterator->properties, parent ); + } + PmmFixOwnerList(iterator->children, parent); + } + iterator = iterator->next; + } + } +} + +void +PmmFixOwnerNode( xmlNodePtr node, ProxyNodePtr parent ) +{ + if ( node != NULL && parent != NULL ) { + if ( node->_private != NULL ) { + xs_warn( "PmmFixOwnerNode: calling PmmFixOwner\n" ); + PmmFixOwner( node->_private, parent ); + } + else { + xs_warn( "PmmFixOwnerNode: calling PmmFixOwnerList\n" ); + PmmFixOwnerList(node->children, parent ); + } + } +} + +ProxyNodePtr +PmmNewContext(xmlParserCtxtPtr node) +{ + ProxyNodePtr proxy = NULL; + + proxy = (ProxyNodePtr)xmlMalloc(sizeof(ProxyNode)); + if (proxy != NULL) { + proxy->node = (xmlNodePtr)node; + proxy->owner = NULL; + proxy->count = 0; + } + else { + warn( "empty context" ); + } + return proxy; +} + +int +PmmContextREFCNT_dec( ProxyNodePtr node ) +{ + xmlParserCtxtPtr libnode = NULL; + int retval = 0; + if ( node != NULL ) { + retval = PmmREFCNT(node)--; + /* fprintf(stderr, "REFCNT on context %p decremented to %d\n", node, PmmREFCNT(node)); */ + if ( PmmREFCNT(node) <= 0 ) { + xs_warn( "PmmContextREFCNT_dec: NODE DELETION\n" ); + libnode = (xmlParserCtxtPtr)PmmNODE( node ); + if ( libnode != NULL ) { + if (libnode->_private != NULL ) { + if ( libnode->_private != (void*)node ) { + PmmSAXCloseContext( libnode ); + } + else { + xmlFree( libnode->_private ); + } + libnode->_private = NULL; + } + PmmNODE( node ) = NULL; + xmlFreeParserCtxt(libnode); + } + } + xmlFree( node ); + } + return retval; +} + +SV* +PmmContextSv( xmlParserCtxtPtr ctxt ) +{ + ProxyNodePtr dfProxy= NULL; + dTHX; + SV * retval = &PL_sv_undef; + const char * CLASS = "XML::LibXML::ParserContext"; + + if ( ctxt != NULL ) { + dfProxy = PmmNewContext(ctxt); + + retval = NEWSV(0,0); + sv_setref_pv( retval, CLASS, (void*)dfProxy ); + PmmREFCNT_inc(dfProxy); + /* fprintf(stderr, "REFCNT incremented on new context: 0x%08.8X\n", dfProxy); */ + } + else { + xs_warn( "PmmContextSv: no node found!\n" ); + } + + return retval; +} + +xmlParserCtxtPtr +PmmSvContext( SV * scalar ) +{ + xmlParserCtxtPtr retval = NULL; + dTHX; + + if ( scalar != NULL + && scalar != &PL_sv_undef + && sv_isa( scalar, "XML::LibXML::ParserContext" ) + && SvPROXYNODE(scalar) != NULL ) { + retval = (xmlParserCtxtPtr)PmmNODE( SvPROXYNODE(scalar) ); + } + else { + if ( scalar == NULL + && scalar == &PL_sv_undef ) { + xs_warn( "PmmSvContext: no scalar!\n" ); + } + else if ( ! sv_isa( scalar, "XML::LibXML::ParserContext" ) ) { + xs_warn( "PmmSvContext: bad object\n" ); + } + else if (SvPROXYNODE(scalar) == NULL) { + xs_warn( "PmmSvContext: empty object\n" ); + } + else { + xs_warn( "PmmSvContext: nothing was wrong!\n"); + } + } + return retval; +} + +xmlChar* +PmmFastEncodeString( int charset, + const xmlChar *string, + const xmlChar *encoding, + STRLEN len ) +{ + xmlCharEncodingHandlerPtr coder = NULL; + xmlChar *retval = NULL; + xmlBufferPtr in = NULL, out = NULL; + + int i; + /* first check that the input is not ascii */ + /* since we do not want to recode ascii as, say, UTF-16 */ + if (len == 0) + len=xmlStrlen(string); + for (i=0; i=len) return xmlStrdup( string ); + xs_warn("PmmFastEncodeString: string is non-ascii\n"); + + if ( charset == XML_CHAR_ENCODING_ERROR){ + if (xmlStrcmp(encoding,(const xmlChar*)"UTF-16LE")==0) { + charset = XML_CHAR_ENCODING_UTF16LE; + } else if (xmlStrcmp(encoding,(const xmlChar*) "UTF-16BE")==0) { + charset = XML_CHAR_ENCODING_UTF16BE; + } + } + if ( charset == XML_CHAR_ENCODING_UTF8 ) { + /* warn("use UTF8 for encoding ... %s ", string); */ + return xmlStrdup( string ); + } + else if ( charset == XML_CHAR_ENCODING_UTF16LE || charset == XML_CHAR_ENCODING_UTF16BE ){ + /* detect and strip BOM, if any */ + if (len>=2 && (char)string[0]=='\xFE' && (char)string[1]=='\xFF') { + xs_warn("detected BE BOM\n"); + string += 2; + len -= 2; + coder = xmlGetCharEncodingHandler( XML_CHAR_ENCODING_UTF16BE ); + } else if (len>=2 && (char)string[0]=='\xFF' && (char)string[1]=='\xFE') { + xs_warn("detected LE BOM\n"); + string += 2; + len -= 2; + coder = xmlGetCharEncodingHandler( XML_CHAR_ENCODING_UTF16LE ); + } else { + coder= xmlGetCharEncodingHandler( charset ); + } + } + else if ( charset == XML_CHAR_ENCODING_ERROR ){ + /* warn("no standard encoding %s\n", encoding); */ + coder =xmlFindCharEncodingHandler( (const char *)encoding ); + } + else if ( charset == XML_CHAR_ENCODING_NONE ){ + xs_warn("PmmFastEncodeString: no encoding found\n"); + } + else { + /* warn( "use document encoding %s (%d)", encoding, charset ); */ + coder= xmlGetCharEncodingHandler( charset ); + } + + if ( coder != NULL ) { + xs_warn("PmmFastEncodeString: coding machine found \n"); + in = xmlBufferCreateStatic((void*)string, len); + out = xmlBufferCreate(); + if ( xmlCharEncInFunc( coder, out, in ) >= 0 ) { + retval = xmlStrdup( out->content ); + /* warn( "encoded string is %s" , retval); */ + } + else { + /* warn( "b0rked encoiding!\n"); */ + } + + xmlBufferFree( in ); + xmlBufferFree( out ); + xmlCharEncCloseFunc( coder ); + } + return retval; +} + +xmlChar* +PmmFastDecodeString( int charset, + const xmlChar *string, + const xmlChar *encoding, + STRLEN* len ) +{ + xmlCharEncodingHandlerPtr coder = NULL; + xmlChar *retval = NULL; + xmlBufferPtr in = NULL, out = NULL; + if (len==NULL) return NULL; + *len = 0; + if ( charset == XML_CHAR_ENCODING_ERROR){ + if (xmlStrcmp(encoding,(const xmlChar*)"UTF-16LE")==0) { + charset = XML_CHAR_ENCODING_UTF16LE; + } else if (xmlStrcmp(encoding,(const xmlChar*) "UTF-16BE")==0) { + charset = XML_CHAR_ENCODING_UTF16BE; + } + } + + if ( charset == XML_CHAR_ENCODING_UTF8 ) { + retval = xmlStrdup( string ); + *len = xmlStrlen(retval); + } + else if ( charset == XML_CHAR_ENCODING_ERROR ){ + coder = xmlFindCharEncodingHandler( (const char *) encoding ); + } + else if ( charset == XML_CHAR_ENCODING_NONE ){ + warn("PmmFastDecodeString: no encoding found\n"); + } + else { + coder= xmlGetCharEncodingHandler( charset ); + } + + if ( coder != NULL ) { + /* warn( "do encoding %s", string ); */ + in = xmlBufferCreateStatic((void*)string,xmlStrlen(string)); + out = xmlBufferCreate(); + if ( xmlCharEncOutFunc( coder, out, in ) >= 0 ) { + *len = xmlBufferLength(out); + retval = xmlStrndup(xmlBufferContent(out), *len); + } + else { + /* xs_warn("PmmFastEncodeString: decoding error\n"); */ + } + + xmlBufferFree( in ); + xmlBufferFree( out ); + xmlCharEncCloseFunc( coder ); + } + return retval; +} + +/** + * encodeString returns an UTF-8 encoded String + * while the encodig has the name of the encoding of string + **/ +xmlChar* +PmmEncodeString( const char *encoding, const xmlChar *string, STRLEN len ){ + xmlCharEncoding enc; + xmlChar *ret = NULL; + + if ( string != NULL ) { + if( encoding != NULL ) { + xs_warn("PmmEncodeString: encoding to UTF-8 from:\n"); + xs_warn( encoding ); + enc = xmlParseCharEncoding( encoding ); + ret = PmmFastEncodeString( enc, string, (const xmlChar *)encoding,len); + } + else { + /* if utf-8 is requested we do nothing */ + ret = xmlStrdup( string ); + } + } + return ret; +} + +SV* +C2Sv( const xmlChar *string, const xmlChar *encoding ) +{ + SV *retval = &PL_sv_undef; + xmlCharEncoding enc; + + if ( string != NULL ) { + if ( encoding != NULL ) { + enc = xmlParseCharEncoding( (const char*)encoding ); + } + else { + enc = 0; + } + if ( enc == 0 ) { + /* this happens if the encoding is "" or NULL */ + enc = XML_CHAR_ENCODING_UTF8; + } + + retval = newSVpvn( (const char *)string, (STRLEN) xmlStrlen(string) ); + + if ( enc == XML_CHAR_ENCODING_UTF8 ) { + /* create an UTF8 string. */ +#ifdef HAVE_UTF8 + xs_warn("C2Sv: set UTF8-SV-flag\n"); + SvUTF8_on(retval); +#endif + } + } + + return retval; +} + +xmlChar * +Sv2C( SV* scalar, const xmlChar *encoding ) +{ + xmlChar *retval = NULL; + dTHX; + + xs_warn("SV2C: start!\n"); + if ( scalar != NULL && SvOK(scalar) ) { + STRLEN len = 0; + char * t_pv =SvPV(scalar, len); + xmlChar* ts = NULL; + xmlChar* string = xmlStrdup((xmlChar*)t_pv); + if ( xmlStrlen(string) > 0 ) { + xs_warn( "SV2C: no undefs\n" ); +#ifdef HAVE_UTF8 + xs_warn( "SV2C: use UTF8\n" ); + if( !DO_UTF8(scalar) && encoding != NULL ) { +#else + if ( encoding != NULL ) { +#endif + xs_warn( "SV2C: domEncodeString!\n" ); + ts= PmmEncodeString( (const char *)encoding, string, len ); + xs_warn( "SV2C: done encoding!\n" ); + if ( string != NULL ) { + xmlFree(string); + } + string=ts; + } + } + + retval = xmlStrdup(string); + if (string != NULL ) { + xmlFree(string); + } + } + xs_warn("SV2C: end!\n"); + return retval; +} + +SV* +nodeC2Sv( const xmlChar * string, xmlNodePtr refnode ) +{ + /* this is a little helper function to avoid to much redundand + code in LibXML.xs */ + dTHX; + SV* retval = &PL_sv_undef; + STRLEN len = 0; + xmlChar * decoded = NULL; + + if ( refnode != NULL ) { + xmlDocPtr real_doc = refnode->doc; + if ( real_doc != NULL && real_doc->encoding != NULL ) { + xs_warn( " encode node !!" ); + /* The following statement is to handle bad + values set by XML::LibXSLT */ + + if ( PmmNodeEncoding(real_doc) == XML_CHAR_ENCODING_NONE ) { + SetPmmNodeEncoding(real_doc, XML_CHAR_ENCODING_UTF8); + } + + decoded = PmmFastDecodeString( PmmNodeEncoding(real_doc), + (const xmlChar *)string, + (const xmlChar *)real_doc->encoding, + &len ); + + xs_warn( "push decoded string into SV" ); + retval = newSVpvn( (const char *)decoded, len ); + xmlFree( decoded ); + + if ( PmmNodeEncoding( real_doc ) == XML_CHAR_ENCODING_UTF8 ) { +#ifdef HAVE_UTF8 + xs_warn("nodeC2Sv: set UTF8-SV-flag\n"); + SvUTF8_on(retval); +#endif + } + + return retval; + } + } + + return C2Sv(string, NULL ); +} + +xmlChar * +nodeSv2C( SV * scalar, xmlNodePtr refnode ) +{ + /* this function requires conditionized compiling, because we + request a function, that does not exists in earlier versions of + perl. in this cases the library assumes, all strings are in + UTF8. if a programmer likes to have the intelligent code, he + needs to upgrade perl */ + + if ( refnode != NULL ) { + xmlDocPtr real_dom = refnode->doc; + xs_warn("nodeSv2C: have node!\n"); + if (real_dom != NULL && real_dom->encoding != NULL ) { + dTHX; + xs_warn("nodeSv2C: encode string!\n"); + /* speed things a bit up.... */ + if ( scalar != NULL && scalar != &PL_sv_undef ) { + STRLEN len = 0; + char * t_pv =SvPV(scalar, len); + xmlChar* string = NULL; + if ( t_pv && len > 0 ) { + xs_warn( "nodeSv2C: no undefs\n" ); +#ifdef HAVE_UTF8 + xs_warn( "nodeSv2C: use UTF8\n" ); + if( !DO_UTF8(scalar) ) { +#endif + xs_warn( "nodeSv2C: domEncodeString!\n" ); + /* The following statement is to handle bad + values set by XML::LibXSLT */ + if ( PmmNodeEncoding(real_dom) == XML_CHAR_ENCODING_NONE ) { + SetPmmNodeEncoding(real_dom, XML_CHAR_ENCODING_UTF8); + } + /* the following allocates a new string (by xmlStrdup if no conversion is done) */ + string= PmmFastEncodeString( PmmNodeEncoding(real_dom), + (xmlChar*) t_pv, + (const xmlChar*)real_dom->encoding, + len); + xs_warn( "nodeSv2C: done!\n" ); +#ifdef HAVE_UTF8 + } else { + xs_warn( "nodeSv2C: no encoding set, use UTF8!\n" ); + } +#endif + } + if (string==NULL) { + return xmlStrndup((xmlChar*)t_pv,len); + } else { + return string; + } + /* if ( string == NULL ) warn( "nodeSv2C: string is NULL\n" ); */ + } + else { + xs_warn( "nodeSv2C: return NULL\n" ); + return NULL; + } + } + else { + xs_warn( "nodeSv2C: document has no encoding defined! use simple SV extraction\n" ); + } + } + xs_warn("nodeSv2C: no encoding !!\n"); + + return Sv2C( scalar, NULL ); +} + +SV * +PmmNodeToGdomeSv( xmlNodePtr node ) +{ + dTHX; + SV * retval = &PL_sv_undef; + +#ifdef XML_LIBXML_GDOME_SUPPORT + GdomeNode * gnode = NULL; + GdomeException exc; + const char * CLASS = ""; + + if ( node != NULL ) { + gnode = gdome_xml_n_mkref( node ); + if ( gnode != NULL ) { + switch (gdome_n_nodeType(gnode, &exc)) { + case GDOME_ELEMENT_NODE: + CLASS = "XML::GDOME::Element"; + break; + case GDOME_ATTRIBUTE_NODE: + CLASS = "XML::GDOME::Attr"; + break; + case GDOME_TEXT_NODE: + CLASS = "XML::GDOME::Text"; + break; + case GDOME_CDATA_SECTION_NODE: + CLASS = "XML::GDOME::CDATASection"; + break; + case GDOME_ENTITY_REFERENCE_NODE: + CLASS = "XML::GDOME::EntityReference"; + break; + case GDOME_ENTITY_NODE: + CLASS = "XML::GDOME::Entity"; + break; + case GDOME_PROCESSING_INSTRUCTION_NODE: + CLASS = "XML::GDOME::ProcessingInstruction"; + break; + case GDOME_COMMENT_NODE: + CLASS = "XML::GDOME::Comment"; + break; + case GDOME_DOCUMENT_TYPE_NODE: + CLASS = "XML::GDOME::DocumentType"; + break; + case GDOME_DOCUMENT_FRAGMENT_NODE: + CLASS = "XML::GDOME::DocumentFragment"; + break; + case GDOME_NOTATION_NODE: + CLASS = "XML::GDOME::Notation"; + break; + case GDOME_DOCUMENT_NODE: + CLASS = "XML::GDOME::Document"; + break; + default: + break; + } + + retval = NEWSV(0,0); + sv_setref_pv( retval, CLASS, gnode); + } + } +#endif + + return retval; +} diff --git a/perl-libxml-mm.h b/perl-libxml-mm.h new file mode 100644 index 0000000..aaeb719 --- /dev/null +++ b/perl-libxml-mm.h @@ -0,0 +1,372 @@ +/** + * perl-libxml-mm.h + * $Id$ + * + * Basic concept: + * perl varies in the implementation of UTF8 handling. this header (together + * with the c source) implements a few functions, that can be used from within + * the core module in order to avoid cascades of c pragmas + */ + +#ifndef __PERL_LIBXML_MM_H__ +#define __PERL_LIBXML_MM_H__ + +#ifdef __cplusplus +extern "C" { +#endif +#include "EXTERN.h" +#include "perl.h" + +#include + +#ifdef __cplusplus +} +#endif + +/* + * NAME xs_warn + * TYPE MACRO + * + * this makro is for XML::LibXML development and debugging. + * + * SYNOPSIS + * xs_warn("my warning") + * + * this makro takes only a single string(!) and passes it to perls + * warn function if the XS_WARNRINGS pragma is used at compile time + * otherwise any xs_warn call is ignored. + * + * pay attention, that xs_warn does not implement a complete wrapper + * for warn!! + */ +#ifdef XS_WARNINGS +#define xs_warn(string) warn("%s",string) +#else +#define xs_warn(string) +#endif + +/* + * @node: Reference to the node the structure proxies + * @owner: libxml defines only the document, but not the node owner + * (in case of document fragments, they are not the same!) + * @count: this is the internal reference count! + * @encoding: this value is missing in libxml2's doc structure + * + * Since XML::LibXML will not know, is a certain node is already + * defined in the perl layer, it can't surely tell when a node can be + * safely be removed from the memory. This structure helps to keep + * track how intense the nodes of a document are used and will not + * delete the nodes unless they are not referred from somewhere else. + */ +struct _ProxyNode { + xmlNodePtr node; + xmlNodePtr owner; + int count; +}; + +struct _DocProxyNode { + xmlNodePtr node; + xmlNodePtr owner; + int count; + int encoding; /* only used for proxies of xmlDocPtr */ + int psvi_status; /* see below ... */ +}; + +/* the psvi_status flag requires some explanation: + + each time libxml2 validates a document (using DTD, Schema or + RelaxNG) it stores a pointer to a last successfully applied grammar + rule in node->psvi. Upon next validation, if libxml2 wants to check + that node matches some grammar rule, it first compares the rule + pointer and node->psvi. If these are equal, the validation of the + node's subtree is skipped and the node is assumed to match the + rule. + + This causes problems when the tree is modified and then + re-validated or when the schema is freed and the document is + revalidated using a different schema and by bad chance a rule + tested against some node got allocated to the exact same location + as the rule from the schema used for the prior validation, already + freed, but still pointed to by node->psvi). + + Thus, the node->psvi values can't be trusted at all and we want to + make sure all psvi slots are NULL before each validation. To aviod + traversing the tree in the most common case, when each document is + validated just once, we maintain the psvi_status flag. + + Validating a document triggers this flag (sets it to 1). The + document with psvi_status==1 is traversed and psvi slots are nulled + prior to any validation. When the flag is triggered, it remains + triggered for the rest of the document's life, there is no way to + null it (even nulling up the psvi's does not null the flag, because + there may be unlinked parts of the document floating around which + we don't know about and thus cannot null their psvi pointers; these + unlinked document parts would cause inconsistency when re-attached + to the document tree). + + Also, importing a node from a document with psvi_status==1 to a + document with psvi_status==0 automatically triggers psvi_status on + the target document. + + NOTE: We could alternatively just null psvis from any imported + subtrees, but that would add an O(n) cleanup operation (n the size + of the imported subtree) on every importNode (possibly needlessly + since the target document may not ever be revalidated) whereas + triggering the flag is O(1) and possibly adds one O(N) cleanup + operation (N the size of the document) to the first validation of + the target document (any subsequent re-validation of the document + would have to perform the operation anyway). The sum of all n's may + be less then N, but OTH, there is a great chance that the O(N) + cleanup will never be performed. (BTW, validation is at least + O(N), probably O(Nlog N) anyway, so the cleanup has little impact; + similarly, importNode does xmlSetTreeDoc which is also O(n). So in + fact, neither solution should have significant performance impact + overall....). + +*/ + +#define Pmm_NO_PSVI 0 +#define Pmm_PSVI_TAINTED 1 + +/* helper type for the proxy structure */ +typedef struct _DocProxyNode DocProxyNode; +typedef struct _ProxyNode ProxyNode; + +/* pointer to the proxy structure */ +typedef ProxyNode* ProxyNodePtr; +typedef DocProxyNode* DocProxyNodePtr; + +/* this my go only into the header used by the xs */ +#define SvPROXYNODE(x) (INT2PTR(ProxyNodePtr,SvIV(SvRV(x)))) +#define PmmPROXYNODE(x) (INT2PTR(ProxyNodePtr,x->_private)) +#define SvNAMESPACE(x) (INT2PTR(xmlNsPtr,SvIV(SvRV(x)))) + +#define PmmREFCNT(node) node->count +#define PmmREFCNT_inc(node) node->count++ +#define PmmNODE(xnode) xnode->node +#define PmmOWNER(node) node->owner +#define PmmOWNERPO(node) ((node && PmmOWNER(node)) ? (ProxyNodePtr)PmmOWNER(node)->_private : node) + +#define PmmENCODING(node) ((DocProxyNodePtr)(node))->encoding +#define PmmNodeEncoding(node) ((DocProxyNodePtr)(node->_private))->encoding + +#define SetPmmENCODING(node,code) PmmENCODING(node)=(code) +#define SetPmmNodeEncoding(node,code) PmmNodeEncoding(node)=(code) + +#define PmmInvalidatePSVI(doc) if (doc && doc->_private) ((DocProxyNodePtr)(doc->_private))->psvi_status = Pmm_PSVI_TAINTED; +#define PmmIsPSVITainted(doc) (doc && doc->_private && (((DocProxyNodePtr)(doc->_private))->psvi_status == Pmm_PSVI_TAINTED)) + +#define PmmClearPSVI(node) if (node && node->doc && node->doc->_private && \ + ((DocProxyNodePtr)(node->doc->_private))->psvi_status == Pmm_PSVI_TAINTED) \ + domClearPSVI((xmlNodePtr) node) + +#ifndef NO_XML_LIBXML_THREADS +#ifdef USE_ITHREADS +#define XML_LIBXML_THREADS +#endif +#endif + +#ifdef XML_LIBXML_THREADS + +/* structure for storing thread-local refcount */ +struct _LocalProxyNode { + ProxyNodePtr proxy; + int count; +}; +typedef struct _LocalProxyNode LocalProxyNode; +typedef LocalProxyNode* LocalProxyNodePtr; + +#define PmmUSEREGISTRY (PROXY_NODE_REGISTRY_MUTEX != NULL) +#define PmmREGISTRY (INT2PTR(xmlHashTablePtr,SvIV(SvRV(get_sv("XML::LibXML::__PROXY_NODE_REGISTRY",0))))) +/* #define PmmREGISTRY (INT2PTR(xmlHashTablePtr,SvIV(SvRV(PROXY_NODE_REGISTRY)))) */ + +void +PmmCloneProxyNodes(); +int +PmmProxyNodeRegistrySize(); +void +PmmDumpRegistry(xmlHashTablePtr r); +void +PmmRegistryREFCNT_dec(ProxyNodePtr proxy); + +#endif + +void +PmmFreeHashTable(xmlHashTablePtr table); + +ProxyNodePtr +PmmNewNode(xmlNodePtr node); + +ProxyNodePtr +PmmNewFragment(xmlDocPtr document); + +SV* +PmmCreateDocNode( unsigned int type, ProxyNodePtr pdoc, ...); + +int +PmmREFCNT_dec( ProxyNodePtr node ); + +SV* +PmmNodeToSv( xmlNodePtr node, ProxyNodePtr owner ); + +/* PmmFixProxyEncoding + * TYPE + * Method + * PARAMETER + * @dfProxy: The proxystructure to fix. + * + * DESCRIPTION + * + * This little helper allows to fix the proxied encoding information + * after a not standard operation was done. This is required for + * XML::LibXSLT + */ +void +PmmFixProxyEncoding( ProxyNodePtr dfProxy ); + +/* PmmSvNodeExt + * TYPE + * Function + * PARAMETER + * @perlnode: the perl reference that holds the scalar. + * @copy : copy flag + * + * DESCRIPTION + * + * The function recognizes XML::LibXML and XML::GDOME + * nodes as valid input data. The second parameter 'copy' + * indicates if in case of GDOME nodes the libxml2 node + * should be copied. In some cases, where the node is + * cloned anyways, this flag has to be set to '0', while + * the default value should be allways '1'. + */ +xmlNodePtr +PmmSvNodeExt( SV * perlnode, int copy ); + +/* PmmSvNode + * TYPE + * Macro + * PARAMETER + * @perlnode: a perl reference that holds a libxml node + * + * DESCRIPTION + * + * PmmSvNode fetches the libxml node such as PmmSvNodeExt does. It is + * a wrapper, that sets the copy always to 1, which is good for all + * cases XML::LibXML uses. + */ +#define PmmSvNode(n) PmmSvNodeExt(n,1) + + +xmlNodePtr +PmmSvOwner( SV * perlnode ); + +SV* +PmmSetSvOwner(SV * perlnode, SV * owner ); + +int +PmmFixOwner(ProxyNodePtr node, ProxyNodePtr newOwner ); + +void +PmmFixOwnerNode(xmlNodePtr node, ProxyNodePtr newOwner ); + +int +PmmContextREFCNT_dec( ProxyNodePtr node ); + +SV* +PmmContextSv( xmlParserCtxtPtr ctxt ); + +xmlParserCtxtPtr +PmmSvContext( SV * perlctxt ); + +/** + * NAME PmmCopyNode + * TYPE function + * + * returns libxml2 node + * + * DESCRIPTION + * This function implements a nodetype independent node cloning. + * + * Note that this function has to stay in this module, since + * XML::LibXSLT reuses it. + */ +xmlNodePtr +PmmCloneNode( xmlNodePtr node , int deep ); + +/** + * NAME PmmNodeToGdomeSv + * TYPE function + * + * returns XML::GDOME node + * + * DESCRIPTION + * creates an Gdome node from our XML::LibXML node. + * this function is very useful for the parser. + * + * the function will only work, if XML::LibXML is compiled with + * XML::GDOME support. + * + */ +SV * +PmmNodeToGdomeSv( xmlNodePtr node ); + +/** + * NAME PmmNodeTypeName + * TYPE function + * + * returns the perl class name for the given node + * + * SYNOPSIS + * CLASS = PmmNodeTypeName( node ); + */ +const char* +PmmNodeTypeName( xmlNodePtr elem ); + +xmlChar* +PmmEncodeString( const char *encoding, const xmlChar *string, STRLEN len ); + +char* +PmmDecodeString( const char *encoding, const xmlChar *string, STRLEN* len); + +/* string manipulation will go elsewhere! */ + +/* + * NAME c_string_to_sv + * TYPE function + * SYNOPSIS + * SV *my_sv = c_string_to_sv( "my string", encoding ); + * + * this function converts a libxml2 string to a SV*. although the + * string is copied, the func does not free the c-string for you! + * + * encoding is either NULL or a encoding string such as provided by + * the documents encoding. if encoding is NULL UTF8 is assumed. + * + */ +SV* +C2Sv( const xmlChar *string, const xmlChar *encoding ); + +/* + * NAME sv_to_c_string + * TYPE function + * SYNOPSIS + * SV *my_sv = sv_to_c_string( my_sv, encoding ); + * + * this function converts a SV* to a libxml string. the SV-value will + * be copied into a *newly* allocated string. (don't forget to free it!) + * + * encoding is either NULL or a encoding string such as provided by + * the documents encoding. if encoding is NULL UTF8 is assumed. + * + */ +xmlChar * +Sv2C( SV* scalar, const xmlChar *encoding ); + +SV* +nodeC2Sv( const xmlChar * string, xmlNodePtr refnode ); + +xmlChar * +nodeSv2C( SV * scalar, xmlNodePtr refnode ); + +#endif diff --git a/perl-libxml-sax.c b/perl-libxml-sax.c new file mode 100644 index 0000000..b949d3c --- /dev/null +++ b/perl-libxml-sax.c @@ -0,0 +1,1685 @@ +/** + * perl-libxml-sax.c + * $Id$ + * + * This is free software, you may use it and distribute it under the same terms as + * Perl itself. + * + * Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas +*/ + +#ifdef __cplusplus +extern "C" { +#endif +/* Disable this and use a threaded perl to test MSVC compilation errors */ +#define PERL_NO_GET_CONTEXT /* we want efficiency */ + +#include "EXTERN.h" +#include "perl.h" +#include "XSUB.h" +#include "ppport.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "perl-libxml-sax.h" + +#ifdef __cplusplus +} +#endif + + +/* + we must call CLEAR_SERROR_HANDLER upon each excurse from + perl +*/ +#define WITH_SERRORS + +#ifdef WITH_SERRORS +#define CLEAR_SERROR_HANDLER /*xmlSetStructuredErrorFunc(NULL,NULL);*/ +#else +#define CLEAR_SERROR_HANDLER +#endif + +#define NSDELIM ':' +/* #define NSDEFAULTURI "http://www.w3.org/XML/1998/namespace" */ +#define NSDEFAULTURI "http://www.w3.org/2000/xmlns/" +typedef struct { + SV * parser; + xmlNodePtr ns_stack; + HV * locator; + xmlDocPtr ns_stack_root; + SV * handler; + SV * saved_error; + struct CBuffer *charbuf; + int joinchars; +} PmmSAXVector; + +typedef PmmSAXVector* PmmSAXVectorPtr; + +struct CBufferChunk { + struct CBufferChunk *next; + xmlChar *data; + int len; +}; + +struct CBuffer { + struct CBufferChunk *head; + struct CBufferChunk *tail; +}; + +static U32 PrefixHash; /* pre-computed */ +static U32 NsURIHash; +static U32 NameHash; +static U32 LocalNameHash; +static U32 AttributesHash; +static U32 ValueHash; +static U32 DataHash; +static U32 TargetHash; +static U32 VersionHash; +static U32 EncodingHash; +static U32 PublicIdHash; +static U32 SystemIdHash; + +/* helper function C2Sv is ment to work faster than the perl-libxml-mm + version. this shortcut is useful, because SAX handles only UTF8 + strings, so there is no conversion logic required. +*/ +SV* +_C2Sv( const xmlChar *string, const xmlChar *dummy ) +{ + + dTHX; + SV *retval = &PL_sv_undef; + STRLEN len; + + if ( string != NULL ) { + len = xmlStrlen( string ); + retval = NEWSV(0, len+1); + sv_setpvn(retval, (const char *)string, len ); +#ifdef HAVE_UTF8 + SvUTF8_on( retval ); +#endif + } + + return retval; +} + +SV* +_C2Sv_len( const xmlChar *string, int len ) +{ + + dTHX; + SV *retval = &PL_sv_undef; + + if ( string != NULL ) { + retval = NEWSV(0, len+1); + sv_setpvn(retval, (const char *)string, (STRLEN) len ); +#ifdef HAVE_UTF8 + SvUTF8_on( retval ); +#endif + } + + return retval; +} + +void +PmmSAXInitialize(pTHX) +{ + PERL_HASH(PrefixHash, "Prefix", 6); + PERL_HASH(NsURIHash, "NamespaceURI", 12); + PERL_HASH(NameHash, "Name", 4); + PERL_HASH(LocalNameHash, "LocalName", 9); + PERL_HASH(AttributesHash, "Attributes", 10); + PERL_HASH(ValueHash, "Value", 5); + PERL_HASH(DataHash, "Data", 4); + PERL_HASH(TargetHash, "Target", 6); + PERL_HASH(VersionHash, "Version", 7); + PERL_HASH(EncodingHash, "Encoding", 8); + PERL_HASH(PublicIdHash, "PublicId", 8); + PERL_HASH(SystemIdHash, "SystemId", 8); +} + +xmlSAXHandlerPtr PSaxGetHandler(); +int PSaxCharactersFlush(void *, struct CBuffer *); + + +/* Character buffering functions */ + +struct CBufferChunk * CBufferChunkNew(void) { + struct CBufferChunk *newchunk = xmlMalloc(sizeof(struct CBufferChunk)); + memset(newchunk, 0, sizeof(struct CBufferChunk)); + return newchunk; +} + +struct CBuffer * CBufferNew(void) { + struct CBuffer *new = xmlMalloc(sizeof(struct CBuffer)); + struct CBufferChunk *newchunk = CBufferChunkNew(); + + memset(new, 0, sizeof(struct CBuffer)); + + new->head = newchunk; + new->tail = newchunk; + + return new; +} + +void CBufferPurge(struct CBuffer *buffer) { + struct CBufferChunk *p1; + struct CBufferChunk *p2; + + if (buffer == NULL || buffer->head->data == NULL) { + return; + } + + if ((p1 = buffer->head)) { + + while(p1) { + p2 = p1->next; + + if (p1->data) { + xmlFree(p1->data); + } + + xmlFree(p1); + + p1 = p2; + } + } + + buffer->head = CBufferChunkNew(); + buffer->tail = buffer->head; +} + +void CBufferFree(struct CBuffer *buffer) { + struct CBufferChunk *p1; + struct CBufferChunk *p2; + + if (buffer == NULL) { + return; + } + + if ((p1 = buffer->head)) { + + while(p1) { + p2 = p1->next; + + if (p1->data) { + xmlFree(p1->data); + } + + xmlFree(p1); + + p1 = p2; + } + } + + xmlFree(buffer); + + return; +} + +int CBufferLength(struct CBuffer *buffer) { + int length = 0; + struct CBufferChunk *cur; + + for(cur = buffer->head; cur; cur = cur->next) { + length += cur->len; + } + + return length; +} + +void CBufferAppend(struct CBuffer *buffer, const xmlChar *newstring, int len) { + xmlChar *copy = xmlMalloc(len); + + memcpy(copy, newstring, len); + + buffer->tail->data = copy; + buffer->tail->len = len; + buffer->tail->next = CBufferChunkNew(); + buffer->tail = buffer->tail->next; +} + +xmlChar * CBufferCharacters(struct CBuffer *buffer) { + int length = CBufferLength(buffer); + xmlChar *new = xmlMalloc(length + 1); + xmlChar *p = new; + int copied = 0; + struct CBufferChunk *cur; + + /* We need this because stderr on some perls requires + * my_perl. See: + * + * https://rt.cpan.org/Public/Bug/Display.html?id=69082 + * + * */ + dTHX; + + if (buffer->head->data == NULL) { + return NULL; + } + + for(cur = buffer->head;cur;cur = cur->next) { + if (! cur->data) { + continue; + } + + if ((copied = copied + cur->len) > length) { + fprintf(stderr, "string overflow\n"); + abort(); + } + + memcpy(p, cur->data, cur->len); + p += cur->len; + } + + new[length] = '\0'; + + return new; +} + +/* end character buffering functions */ + + +void +PmmSAXInitContext( xmlParserCtxtPtr ctxt, SV * parser, SV * saved_error ) +{ + PmmSAXVectorPtr vec = NULL; + SV ** th; + SV ** joinchars; + + dTHX; + + CLEAR_SERROR_HANDLER + vec = (PmmSAXVector*) xmlMalloc( sizeof(PmmSAXVector) ); + + vec->ns_stack_root = xmlNewDoc(NULL); + vec->ns_stack = xmlNewDocNode(vec->ns_stack_root, + NULL, + (const xmlChar*)"stack", + NULL ); + + xmlAddChild((xmlNodePtr)vec->ns_stack_root, vec->ns_stack); + + vec->locator = NULL; + + vec->saved_error = saved_error; + + vec->parser = SvREFCNT_inc( parser ); + th = hv_fetch( (HV*)SvRV(parser), "HANDLER", 7, 0 ); + if ( th != NULL && SvTRUE(*th) ) { + vec->handler = SvREFCNT_inc(*th) ; + } + else { + vec->handler = NULL; + } + + joinchars = hv_fetch((HV*)SvRV(parser), "JOIN_CHARACTERS", 15, 0); + + if (joinchars != NULL) { + vec->joinchars = (SvIV(*joinchars)); + } else { + vec->joinchars = 0; + } + + if (vec->joinchars) { + vec->charbuf = CBufferNew(); + } else { + vec->charbuf = NULL; + } + + if ( ctxt->sax ) { + xmlFree( ctxt->sax ); + } + ctxt->sax = PSaxGetHandler(); + + ctxt->_private = (void*)vec; +} + +void +PmmSAXCloseContext( xmlParserCtxtPtr ctxt ) +{ + PmmSAXVector * vec = (PmmSAXVectorPtr) ctxt->_private; + dTHX; + + if ( vec->handler != NULL ) { + SvREFCNT_dec( vec->handler ); + vec->handler = NULL; + } + + CBufferFree(vec->charbuf); + vec->charbuf = NULL; + + xmlFree( ctxt->sax ); + ctxt->sax = NULL; + + SvREFCNT_dec( vec->parser ); + vec->parser = NULL; + + xmlFreeDoc( vec->ns_stack_root ); + vec->ns_stack_root = NULL; + + if ( vec->locator != NULL ) { + SvREFCNT_dec( vec->locator ); + vec->locator = NULL; + } + + xmlFree( vec ); + ctxt->_private = NULL; +} + + +xmlNsPtr +PmmGetNsMapping( xmlNodePtr ns_stack, const xmlChar * prefix ) +{ + if ( ns_stack != NULL ) { + return xmlSearchNs( ns_stack->doc, ns_stack, prefix ); + } + + return NULL; +} + + +void +PSaxStartPrefix( PmmSAXVectorPtr sax, const xmlChar * prefix, + const xmlChar * uri, SV * handler ) +{ + dTHX; + HV * param; + SV * rv; + + dSP; + + ENTER; + SAVETMPS; + + param = newHV(); + + (void) hv_store(param, "NamespaceURI", 12, + _C2Sv(uri, NULL), NsURIHash); + + if ( prefix != NULL ) { + (void) hv_store(param, "Prefix", 6, + _C2Sv(prefix, NULL), PrefixHash); + } + else { + (void) hv_store(param, "Prefix", 6, + _C2Sv((const xmlChar*)"", NULL), PrefixHash); + } + + PUSHMARK(SP) ; + XPUSHs(handler); + + rv = newRV_noinc((SV*)param); + + XPUSHs(rv); + PUTBACK; + + call_method( "start_prefix_mapping", G_SCALAR | G_EVAL | G_DISCARD ); + sv_2mortal(rv); + if (SvTRUE(ERRSV)) { + croak_obj; + } + FREETMPS ; + LEAVE ; + CLEAR_SERROR_HANDLER +} + +void +PSaxEndPrefix( PmmSAXVectorPtr sax, const xmlChar * prefix, + const xmlChar * uri, SV * handler ) +{ + dTHX; + HV * param; + SV * rv; + + dSP; + + ENTER; + SAVETMPS; + param = newHV(); + (void) hv_store(param, "NamespaceURI", 12, + _C2Sv(uri, NULL), NsURIHash); + + if ( prefix != NULL ) { + (void) hv_store(param, "Prefix", 6, + _C2Sv(prefix, NULL), PrefixHash); + } + else { + (void) hv_store(param, "Prefix", 6, + _C2Sv((const xmlChar *)"", NULL), PrefixHash); + } + + PUSHMARK(SP) ; + XPUSHs(handler); + + + rv = newRV_noinc((SV*)param); + + XPUSHs(rv); + PUTBACK; + + call_method( "end_prefix_mapping", G_SCALAR | G_EVAL | G_DISCARD ); + sv_2mortal(rv); + if (SvTRUE(ERRSV)) { + croak_obj; + } + + FREETMPS ; + LEAVE ; + CLEAR_SERROR_HANDLER +} + +void +PmmExtendNsStack( PmmSAXVectorPtr sax , const xmlChar * name) { + xmlNodePtr newNS = NULL; + xmlChar * localname = NULL; + xmlChar * prefix = NULL; + + localname = xmlSplitQName( NULL, name, &prefix ); + if ( prefix != NULL ) { + /* check if we can find a namespace with that prefix... */ + xmlNsPtr ns = xmlSearchNs( sax->ns_stack->doc, sax->ns_stack, prefix ); + + if ( ns != NULL ) { + newNS = xmlNewDocNode( sax->ns_stack_root, ns, localname, NULL ); + } + else { + newNS = xmlNewDocNode( sax->ns_stack_root, NULL, name, NULL ); + } + } + else { + newNS = xmlNewDocNode( sax->ns_stack_root, NULL, name, NULL ); + } + + if ( newNS != NULL ) { + xmlAddChild(sax->ns_stack, newNS); + sax->ns_stack = newNS; + } + + if ( localname != NULL ) { + xmlFree( localname ) ; + } + if ( prefix != NULL ) { + xmlFree( prefix ); + } +} + +void +PmmNarrowNsStack( PmmSAXVectorPtr sax, SV *handler ) +{ + xmlNodePtr parent = sax->ns_stack->parent; + xmlNsPtr list = sax->ns_stack->nsDef; + + while ( list ) { + if ( !xmlStrEqual(list->prefix, (const xmlChar*)"xml") ) { + PSaxEndPrefix( sax, list->prefix, list->href, handler ); + } + list = list->next; + } + xmlUnlinkNode(sax->ns_stack); + xmlFreeNode(sax->ns_stack); + sax->ns_stack = parent; +} + +void +PmmAddNamespace( PmmSAXVectorPtr sax, const xmlChar * name, + const xmlChar * href, SV *handler) +{ + xmlNsPtr ns = NULL; + xmlChar * prefix = NULL; + xmlChar * localname = NULL; + + + if ( sax->ns_stack == NULL ) { + return; + } + + ns = xmlNewNs( sax->ns_stack, href, name ); + + if ( sax->ns_stack->ns == NULL ) { + localname = xmlSplitQName( NULL, sax->ns_stack->name, &prefix ); + + if ( name != NULL ) { + if ( xmlStrEqual( prefix , name ) ) { + xmlChar * oname = (xmlChar*)(sax->ns_stack->name); + sax->ns_stack->ns = ns; + xmlFree( oname ); + sax->ns_stack->name = (const xmlChar*) xmlStrdup( localname ); + } + } + else if ( prefix == NULL ) { + sax->ns_stack->ns = ns; + } + } + + if ( prefix ) { + xmlFree( prefix ); + } + if ( localname ) { + xmlFree( localname ); + } + + PSaxStartPrefix( sax, name, href, handler ); +} + +#define XML_STR_NOT_EMPTY(s) ((s)[0] != 0) + +HV * +PmmGenElementSV( pTHX_ PmmSAXVectorPtr sax, const xmlChar * name ) +{ + HV * retval = newHV(); + xmlChar * localname = NULL; + xmlChar * prefix = NULL; + + xmlNsPtr ns = NULL; + + if ( name != NULL && XML_STR_NOT_EMPTY( name ) ) { + (void) hv_store(retval, "Name", 4, + _C2Sv(name, NULL), NameHash); + + localname = xmlSplitQName(NULL, name, &prefix); + if (localname != NULL) xmlFree(localname); + ns = PmmGetNsMapping( sax->ns_stack, prefix ); + if (prefix != NULL) xmlFree(prefix); + + if ( ns != NULL ) { + (void) hv_store(retval, "NamespaceURI", 12, + _C2Sv(ns->href, NULL), NsURIHash); + if ( ns->prefix ) { + (void) hv_store(retval, "Prefix", 6, + _C2Sv(ns->prefix, NULL), PrefixHash); + } + else { + (void) hv_store(retval, "Prefix", 6, + _C2Sv((const xmlChar *)"",NULL), PrefixHash); + } + + (void) hv_store(retval, "LocalName", 9, + _C2Sv(sax->ns_stack->name, NULL), LocalNameHash); + } + else { + (void) hv_store(retval, "NamespaceURI", 12, + _C2Sv((const xmlChar *)"",NULL), NsURIHash); + (void) hv_store(retval, "Prefix", 6, + _C2Sv((const xmlChar *)"",NULL), PrefixHash); + (void) hv_store(retval, "LocalName", 9, + _C2Sv(name, NULL), LocalNameHash); + } + } + + return retval; +} + +xmlChar * +PmmGenNsName( const xmlChar * name, const xmlChar * nsURI ) +{ + int namelen = 0; + int urilen = 0; + xmlChar * retval = NULL; + + if ( name == NULL ) { + return NULL; + } + namelen = xmlStrlen( name ); + + retval = xmlStrncat( retval, (const xmlChar *)"{", 1 ); + if ( nsURI != NULL ) { + urilen = xmlStrlen( nsURI ); + retval = xmlStrncat( retval, nsURI, urilen ); + } + retval = xmlStrncat( retval, (const xmlChar *)"}", 1 ); + retval = xmlStrncat( retval, name, namelen ); + return retval; +} + +HV * +PmmGenAttributeHashSV( pTHX_ PmmSAXVectorPtr sax, + const xmlChar **attr, SV * handler ) +{ + HV * retval = NULL; + HV * atV = NULL; + xmlNsPtr ns = NULL; + + U32 atnameHash = 0; + int len = 0; + + const xmlChar * nsURI = NULL; + const xmlChar **ta = attr; + const xmlChar * name = NULL; + const xmlChar * value = NULL; + + xmlChar * keyname = NULL; + xmlChar * localname = NULL; + xmlChar * prefix = NULL; + + retval = newHV(); + + if ( ta != NULL ) { + while ( *ta != NULL ) { + atV = newHV(); + name = *ta; ta++; + value = *ta; ta++; + + if ( name != NULL && XML_STR_NOT_EMPTY( name ) ) { + localname = xmlSplitQName(NULL, name, &prefix); + + (void) hv_store(atV, "Name", 4, + _C2Sv(name, NULL), NameHash); + if ( value != NULL ) { + (void) hv_store(atV, "Value", 5, + _C2Sv(value, NULL), ValueHash); + } + + if ( xmlStrEqual( (const xmlChar *)"xmlns", name ) ) { + /* a default namespace */ + PmmAddNamespace( sax, NULL, value, handler); + /* nsURI = (const xmlChar*)NSDEFAULTURI; */ + nsURI = NULL; + (void) hv_store(atV, "Name", 4, + _C2Sv(name, NULL), NameHash); + + (void) hv_store(atV, "Prefix", 6, + _C2Sv((const xmlChar *)"", NULL), PrefixHash); + (void) hv_store(atV, "LocalName", 9, + _C2Sv(name,NULL), LocalNameHash); + (void) hv_store(atV, "NamespaceURI", 12, + _C2Sv((const xmlChar *)"", NULL), NsURIHash); + + } + else if (xmlStrncmp((const xmlChar *)"xmlns:", name, 6 ) == 0 ) { + PmmAddNamespace( sax, + localname, + value, + handler); + + nsURI = (const xmlChar*)NSDEFAULTURI; + + (void) hv_store(atV, "Prefix", 6, + _C2Sv(prefix, NULL), PrefixHash); + (void) hv_store(atV, "LocalName", 9, + _C2Sv(localname, NULL), LocalNameHash); + (void) hv_store(atV, "NamespaceURI", 12, + _C2Sv((const xmlChar *)NSDEFAULTURI,NULL), + NsURIHash); + } + else if ( prefix != NULL + && (ns = PmmGetNsMapping( sax->ns_stack, prefix ) ) ) { + nsURI = ns->href; + + (void) hv_store(atV, "NamespaceURI", 12, + _C2Sv(ns->href, NULL), NsURIHash); + (void) hv_store(atV, "Prefix", 6, + _C2Sv(ns->prefix, NULL), PrefixHash); + (void) hv_store(atV, "LocalName", 9, + _C2Sv(localname, NULL), LocalNameHash); + } + else { + nsURI = NULL; + (void) hv_store(atV, "NamespaceURI", 12, + _C2Sv((const xmlChar *)"", NULL), NsURIHash); + (void) hv_store(atV, "Prefix", 6, + _C2Sv((const xmlChar *)"", NULL), PrefixHash); + (void) hv_store(atV, "LocalName", 9, + _C2Sv(name, NULL), LocalNameHash); + } + + keyname = PmmGenNsName( localname != NULL ? localname : name, + nsURI ); + + len = xmlStrlen( keyname ); + PERL_HASH( atnameHash, (const char *)keyname, len ); + (void) hv_store(retval, + (const char *)keyname, + len, + newRV_noinc((SV*)atV), + atnameHash ); + + if ( keyname != NULL ) { + xmlFree( keyname ); + } + if ( localname != NULL ) { + xmlFree(localname); + } + localname = NULL; + if ( prefix != NULL ) { + xmlFree( prefix ); + } + prefix = NULL; + + } + } + } + + return retval; +} + +HV * +PmmGenCharDataSV( pTHX_ PmmSAXVectorPtr sax, const xmlChar * data, int len ) +{ + HV * retval = newHV(); + + if ( data != NULL && XML_STR_NOT_EMPTY( data ) ) { + (void) hv_store(retval, "Data", 4, + _C2Sv_len(data, len), DataHash); + } + + return retval; +} + +HV * +PmmGenPISV( pTHX_ PmmSAXVectorPtr sax, + const xmlChar * target, + const xmlChar * data ) +{ + HV * retval = newHV(); + + if ( target != NULL && XML_STR_NOT_EMPTY( target ) ) { + (void) hv_store(retval, "Target", 6, + _C2Sv(target, NULL), TargetHash); + + if ( data != NULL && XML_STR_NOT_EMPTY( data ) ) { + (void) hv_store(retval, "Data", 4, + _C2Sv(data, NULL), DataHash); + } + else { + (void) hv_store(retval, "Data", 4, + _C2Sv((const xmlChar *)"", NULL), DataHash); + } + } + + return retval; +} + +HV * +PmmGenDTDSV( pTHX_ PmmSAXVectorPtr sax, + const xmlChar * name, + const xmlChar * publicId, + const xmlChar * systemId ) +{ + HV * retval = newHV(); + if ( name != NULL && XML_STR_NOT_EMPTY( name ) ) { + (void) hv_store(retval, "Name", 4, + _C2Sv(name, NULL), NameHash); + } + if ( publicId != NULL && XML_STR_NOT_EMPTY( publicId ) ) { + (void) hv_store(retval, "PublicId", 8, + _C2Sv(publicId, NULL), PublicIdHash); + } + if ( systemId != NULL && XML_STR_NOT_EMPTY( systemId ) ) { + (void) hv_store(retval, "SystemId", 8, + _C2Sv(systemId, NULL), SystemIdHash); + } + return retval; +} + +HV * +PmmGenLocator( xmlSAXLocatorPtr loc) +{ + dTHX; + HV * locator = newHV(); + + const xmlChar * PublicId = loc->getPublicId(NULL); + const xmlChar * SystemId = loc->getSystemId(NULL); + + if ( PublicId != NULL && XML_STR_NOT_EMPTY( PublicId ) ) { + (void) hv_store(locator, "PublicId", 8, + newSVpv((char *)PublicId, 0), 0); + } + + if ( SystemId != NULL && XML_STR_NOT_EMPTY( SystemId ) ) { + (void) hv_store(locator, "SystemId", 8, + newSVpv((char *)SystemId, 0), 0); + } + + return locator; +} + + +void +PmmUpdateLocator( xmlParserCtxtPtr ctxt ) +{ + dTHX; + const xmlChar * encoding; + const xmlChar * version; + PmmSAXVectorPtr sax = (PmmSAXVectorPtr)ctxt->_private; + + if (sax->locator == NULL) { + return; + } + + (void) hv_store(sax->locator, "LineNumber", 10, + newSViv(ctxt->input->line), 0); + + (void) hv_store(sax->locator, "ColumnNumber", 12, + newSViv(ctxt->input->col), 0); + + encoding = ctxt->input->encoding; + version = ctxt->input->version; + + if ( encoding != NULL && XML_STR_NOT_EMPTY( encoding ) ) { + (void) hv_store(sax->locator, "Encoding", 8, + newSVpv((char *)encoding, 0), 0); + } + + if ( version != NULL && XML_STR_NOT_EMPTY( version ) ) { + (void) hv_store(sax->locator, "XMLVersion", 10, + newSVpv((char *)version, 0), 0); + } +} + +int +PSaxSetDocumentLocator(void *ctx, xmlSAXLocatorPtr loc) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx; + PmmSAXVectorPtr sax = (PmmSAXVectorPtr)ctxt->_private; + dTHX; + HV* empty; + SV * handler = sax->handler; + SV * rv; + + dSP; + + if (sax->joinchars) + { + PSaxCharactersFlush(ctxt, sax->charbuf); + } + + ENTER; + SAVETMPS; + + PUSHMARK(SP) ; + + XPUSHs(handler); + + sax->locator = PmmGenLocator(loc); + + rv = newRV_inc((SV*)sax->locator); + XPUSHs( rv); + + PUTBACK; + + call_method( "set_document_locator", G_SCALAR | G_EVAL | G_DISCARD ); + sv_2mortal(rv) ; + + if (SvTRUE(ERRSV)) { + croak_obj; + } + + FREETMPS ; + LEAVE ; + CLEAR_SERROR_HANDLER + return 1; +} + +int +PSaxStartDocument(void * ctx) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx; + PmmSAXVectorPtr sax = (PmmSAXVectorPtr)ctxt->_private; + dTHX; + HV* empty; + SV * handler = sax->handler; + + SV * rv; + if ( handler != NULL ) { + + dSP; + PmmUpdateLocator(ctx); + + ENTER; + SAVETMPS; + + empty = newHV(); + PUSHMARK(SP) ; + XPUSHs(handler); + XPUSHs(sv_2mortal(newRV_noinc((SV*)empty))); + PUTBACK; + + call_method( "start_document", G_SCALAR | G_EVAL | G_DISCARD ); + if (SvTRUE(ERRSV)) { + croak_obj; + } + + SPAGAIN; + + PUSHMARK(SP) ; + + + XPUSHs(handler); + + empty = newHV(); + if ( ctxt->version != NULL ) { + (void) hv_store(empty, "Version", 7, + _C2Sv(ctxt->version, NULL), VersionHash); + } + else { + (void) hv_store(empty, "Version", 7, + _C2Sv((const xmlChar *)"1.0", NULL), VersionHash); + } + + if ( ctxt->input->encoding != NULL ) { + (void) hv_store(empty, "Encoding", 8, + _C2Sv(ctxt->input->encoding, NULL), EncodingHash); + } + + rv = newRV_noinc((SV*)empty); + XPUSHs( rv); + + PUTBACK; + + call_method( "xml_decl", G_SCALAR | G_EVAL | G_DISCARD ); + CLEAR_SERROR_HANDLER + sv_2mortal(rv); + if (SvTRUE(ERRSV)) { + croak_obj; + } + + FREETMPS ; + LEAVE ; + } + CLEAR_SERROR_HANDLER + return 1; +} + +int +PSaxEndDocument(void * ctx) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx; + PmmSAXVectorPtr sax = (PmmSAXVectorPtr)ctxt->_private; + + dTHX; + dSP; + + PmmUpdateLocator(ctx); + + if (sax->joinchars) + { + PSaxCharactersFlush(ctxt, sax->charbuf); + } + + + ENTER; + SAVETMPS; + + PUSHMARK(SP) ; + XPUSHs(sax->parser); + PUTBACK; + + call_pv( "XML::LibXML::_SAXParser::end_document", G_SCALAR | G_EVAL | G_DISCARD ); + if (SvTRUE(ERRSV)) { + croak_obj; + } + + FREETMPS ; + LEAVE ; + CLEAR_SERROR_HANDLER + return 1; +} + +int +PSaxStartElement(void *ctx, const xmlChar * name, const xmlChar** attr) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx; + PmmSAXVectorPtr sax = (PmmSAXVectorPtr)ctxt->_private; + dTHX; + HV * attrhash = NULL; + HV * element = NULL; + SV * handler = sax->handler; + SV * rv; + SV * arv; + + dSP; + + PmmUpdateLocator(ctx); + + if (sax->joinchars) + { + PSaxCharactersFlush(ctxt, sax->charbuf); + } + + ENTER; + SAVETMPS; + + PmmExtendNsStack(sax, name); + + attrhash = PmmGenAttributeHashSV(aTHX_ sax, attr, handler ); + element = PmmGenElementSV(aTHX_ sax, name); + + arv = newRV_noinc((SV*)attrhash); + (void) hv_store( element, + "Attributes", + 10, + arv, + AttributesHash ); + + PUSHMARK(SP) ; + + XPUSHs(handler); + rv = newRV_noinc((SV*)element); + XPUSHs(rv); + PUTBACK; + + call_method( "start_element", G_SCALAR | G_EVAL | G_DISCARD ); + sv_2mortal(rv) ; + + if (SvTRUE(ERRSV)) { + croak_obj; + } + + FREETMPS ; + LEAVE ; + CLEAR_SERROR_HANDLER + return 1; +} + +int +PSaxEndElement(void *ctx, const xmlChar * name) { + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx; + PmmSAXVectorPtr sax = (PmmSAXVectorPtr)ctxt->_private; + dTHX; + SV * handler = sax->handler; + SV * rv; + HV * element; + + dSP; + + PmmUpdateLocator(ctx); + + if (sax->joinchars) + { + PSaxCharactersFlush(ctxt, sax->charbuf); + } + + ENTER; + SAVETMPS; + + PUSHMARK(SP) ; + XPUSHs(handler); + + element = PmmGenElementSV(aTHX_ sax, name); + rv = newRV_noinc((SV*)element); + + XPUSHs(rv); + PUTBACK; + + call_method( "end_element", G_SCALAR | G_EVAL | G_DISCARD ); + sv_2mortal(rv); + + if (SvTRUE(ERRSV)) { + croak_obj; + } + + FREETMPS ; + LEAVE ; + + PmmNarrowNsStack(sax, handler); + CLEAR_SERROR_HANDLER + return 1; +} + +int +PSaxCharactersDispatch(void *ctx, const xmlChar * ch, int len) { + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx; + PmmSAXVectorPtr sax = (PmmSAXVectorPtr)ctxt->_private; + dTHX; + HV* element; + SV * handler; + SV * rv = NULL; + + if ( sax == NULL ) { +/* warn( "lost my sax context!? ( %s, %d )\n", ch, len ); */ + return 0; + } + + handler = sax->handler; + + if ( ch != NULL && handler != NULL ) { + + dSP; + + ENTER; + SAVETMPS; + + PUSHMARK(SP) ; + + XPUSHs(handler); + element = PmmGenCharDataSV(aTHX_ sax, ch, len ); + + rv = newRV_noinc((SV*)element); + XPUSHs(rv); + sv_2mortal(rv); + + PUTBACK; + + call_method( "characters", G_SCALAR | G_EVAL | G_DISCARD ); + + if (SvTRUE(ERRSV)) { + croak_obj; + } + FREETMPS ; + LEAVE ; + + } + CLEAR_SERROR_HANDLER; + return 1; +} + +int PSaxCharactersFlush (void *ctx, struct CBuffer *buffer) { + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx; + PmmSAXVectorPtr sax = (PmmSAXVectorPtr)ctxt->_private; + xmlChar *ch; + int len; + + if (buffer->head->data == NULL) { + return 1; + } + + ch = CBufferCharacters(sax->charbuf); + len = CBufferLength(sax->charbuf); + + CBufferPurge(buffer); + + return PSaxCharactersDispatch(ctx, ch, len); +} + +int PSaxCharacters (void *ctx, const xmlChar * ch, int len) { + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx; + PmmSAXVectorPtr sax = (PmmSAXVectorPtr)ctxt->_private; + + PmmUpdateLocator(ctx); + + if (sax->joinchars) { + struct CBuffer *buffer = sax->charbuf; + CBufferAppend(buffer, ch, len); + return 1; + } + + return PSaxCharactersDispatch(ctx, ch, len); +} + +int +PSaxComment(void *ctx, const xmlChar * ch) { + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx; + PmmSAXVectorPtr sax = (PmmSAXVectorPtr)ctxt->_private; + dTHX; + HV* element; + SV * handler = sax->handler; + SV * rv = NULL; + + PmmUpdateLocator(ctx); + + if ( ch != NULL && handler != NULL ) { + dSP; + + int len = xmlStrlen( ch ); + + if (sax->joinchars) + { + PSaxCharactersFlush(ctxt, sax->charbuf); + } + + ENTER; + SAVETMPS; + + PUSHMARK(SP) ; + XPUSHs(handler); + element = PmmGenCharDataSV(aTHX_ sax, ch, len); + + rv = newRV_noinc((SV*)element); + XPUSHs(rv); + PUTBACK; + + call_method( "comment", G_SCALAR | G_EVAL | G_DISCARD ); + sv_2mortal(rv); + + if (SvTRUE(ERRSV)) { + croak_obj; + } + + FREETMPS ; + LEAVE ; + } + CLEAR_SERROR_HANDLER + return 1; +} + +int +PSaxCDATABlock(void *ctx, const xmlChar * ch, int len) { + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx; + PmmSAXVectorPtr sax = (PmmSAXVectorPtr)ctxt->_private; + dTHX; + HV* element; + SV * handler = sax->handler; + SV * rv = NULL; + + PmmUpdateLocator(ctx); + + if ( ch != NULL && handler != NULL ) { + dSP; + + if (sax->joinchars) + { + PSaxCharactersFlush(ctxt, sax->charbuf); + } + + + ENTER; + SAVETMPS; + + PUSHMARK(SP) ; + XPUSHs(handler); + PUTBACK; + call_method( "start_cdata", G_SCALAR | G_EVAL | G_DISCARD ); + if (SvTRUE(ERRSV)) { + croak_obj; + } + + SPAGAIN; + PUSHMARK(SP) ; + + XPUSHs(handler); + element = PmmGenCharDataSV(aTHX_ sax, ch, len); + + rv = newRV_noinc((SV*)element); + XPUSHs(rv); + PUTBACK; + + call_method( "characters", G_SCALAR | G_EVAL | G_DISCARD); + if (SvTRUE(ERRSV)) { + croak_obj; + } + + SPAGAIN; + PUSHMARK(SP) ; + + XPUSHs(handler); + PUTBACK; + + call_method( "end_cdata", G_SCALAR | G_EVAL | G_DISCARD ); + sv_2mortal(rv); + + if (SvTRUE(ERRSV)) { + croak_obj; + } + + FREETMPS ; + LEAVE ; + + } + CLEAR_SERROR_HANDLER + return 1; + +} + +int +PSaxProcessingInstruction( void * ctx, const xmlChar * target, const xmlChar * data ) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx; + PmmSAXVectorPtr sax = (PmmSAXVectorPtr)ctxt->_private; + dTHX; + SV * handler = sax->handler; + SV * element; + SV * rv = NULL; + + PmmUpdateLocator(ctx); + + if ( handler != NULL ) { + dSP; + + if (sax->joinchars) + { + PSaxCharactersFlush(ctxt, sax->charbuf); + } + + ENTER; + SAVETMPS; + + PUSHMARK(SP) ; + XPUSHs(handler); + element = (SV*)PmmGenPISV(aTHX_ sax, (const xmlChar *)target, data); + rv = newRV_noinc((SV*)element); + XPUSHs(rv); + + PUTBACK; + + call_method( "processing_instruction", G_SCALAR | G_EVAL | G_DISCARD ); + + sv_2mortal(rv); + + if (SvTRUE(ERRSV)) { + croak_obj; + } + + FREETMPS ; + LEAVE ; + } + CLEAR_SERROR_HANDLER + return 1; +} + +void PSaxExternalSubset (void * ctx, + const xmlChar * name, + const xmlChar * ExternalID, + const xmlChar * SystemID) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx; + PmmSAXVectorPtr sax = (PmmSAXVectorPtr)ctxt->_private; + + dTHX; + SV * handler = sax->handler; + SV * element; + SV * rv = NULL; + + PmmUpdateLocator(ctx); + + if ( handler != NULL ) { + dSP; + + ENTER; + SAVETMPS; + + PUSHMARK(SP) ; + XPUSHs(handler); + element = (SV*)PmmGenDTDSV(aTHX_ sax, + name, + ExternalID, + SystemID); + rv = newRV_noinc((SV*)element); + XPUSHs(rv); + + PUTBACK; + + call_method( "start_dtd", G_SCALAR | G_EVAL | G_DISCARD ); + sv_2mortal(rv); + + if (SvTRUE(ERRSV)) { + croak_obj; + } + + PUSHMARK(SP) ; + XPUSHs(handler); + rv = newRV_noinc((SV*)newHV()); /* empty */ + XPUSHs(rv); + + PUTBACK; + + call_method( "end_dtd", G_SCALAR | G_EVAL | G_DISCARD ); + + FREETMPS ; + LEAVE ; + } + CLEAR_SERROR_HANDLER + return; +} + + +/* + +void PSaxInternalSubset (void * ctx, + const xmlChar * name, + const xmlChar * ExternalID, + const xmlChar * SystemID) +{ + // called before ExternalSubset + // if used, how do we generate the correct start_dtd ? +} + +void PSaxElementDecl (void *ctx, const xmlChar *name, + int type, + xmlElementContentPtr content) { + // this one is not easy to implement + // since libxml2 has no (reliable) public method + // for dumping xmlElementContent :-( +} + +void +PSaxAttributeDecl (void * ctx, + const xmlChar * elem, + const xmlChar * fullname, + int type, + int def, + const xmlChar * defaultValue, + xmlEnumerationPtr tree) +{ +} + +void +PSaxEntityDecl (void * ctx, + const xmlChar * name, + int type, + const xmlChar * publicId, + const xmlChar * systemId, + xmlChar * content) +{ +} + +void +PSaxNotationDecl (void * ctx, + const xmlChar * name, + const xmlChar * publicId, + const xmlChar * systemId) +{ +} + +void +PSaxUnparsedEntityDecl (void * ctx, + const xmlChar * name, + const xmlChar * publicId, + const xmlChar * systemId, + const xmlChar * notationName) +{ +} +*/ + +int +PmmSaxWarning(void * ctx, const char * msg, ...) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx; + PmmSAXVectorPtr sax = (PmmSAXVectorPtr)ctxt->_private; + + va_list args; + SV * svMessage; + + dTHX; + dSP; + svMessage = NEWSV(0,512); + + va_start(args, msg); + sv_vsetpvfn(svMessage, + msg, + xmlStrlen((const xmlChar *)msg), + &args, + NULL, + 0, + NULL); + va_end(args); + + ENTER; + SAVETMPS; + + PUSHMARK(SP) ; + XPUSHs(sax->parser); + + XPUSHs(sv_2mortal(svMessage)); + XPUSHs(sv_2mortal(newSViv(ctxt->input->line))); + XPUSHs(sv_2mortal(newSViv(ctxt->input->col))); + + PUTBACK; + + call_pv( "XML::LibXML::_SAXParser::warning", G_SCALAR | G_EVAL | G_DISCARD ); + + if (SvTRUE(ERRSV)) { + croak_obj; + } + + FREETMPS ; + LEAVE ; + CLEAR_SERROR_HANDLER + return 1; +} + + +int +PmmSaxError(void * ctx, const char * msg, ...) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx; + PmmSAXVectorPtr sax = (PmmSAXVectorPtr)ctxt->_private; + + va_list args; + SV * svMessage; + +#if LIBXML_VERSION > 20600 + xmlErrorPtr last_err = xmlCtxtGetLastError( ctxt ); +#endif + dTHX; + dSP; + + ENTER; + SAVETMPS; + + PUSHMARK(SP) ; + + XPUSHs(sax->parser); + + svMessage = NEWSV(0,512); + + va_start(args, msg); + sv_vsetpvfn(svMessage, msg, xmlStrlen((const xmlChar *)msg), &args, NULL, 0, NULL); + va_end(args); + if (SvOK(sax->saved_error)) { + sv_catsv( sax->saved_error, svMessage ); + } else { + sv_setsv( sax->saved_error, svMessage ); + } + XPUSHs(sv_2mortal(svMessage)); + XPUSHs(sv_2mortal(newSViv(ctxt->input->line))); + XPUSHs(sv_2mortal(newSViv(ctxt->input->col))); + + PUTBACK; +#if LIBXML_VERSION > 20600 + /* + this is a workaround: at least some versions of libxml2 didn't not call + the fatalError callback at all + */ + if (last_err && last_err->level == XML_ERR_FATAL) { + call_pv( "XML::LibXML::_SAXParser::fatal_error", G_SCALAR | G_EVAL | G_DISCARD ); + } else { + call_pv( "XML::LibXML::_SAXParser::error", G_SCALAR | G_EVAL | G_DISCARD ); + } +#else + /* actually, we do not know if it is a fatal error or not */ + call_pv( "XML::LibXML::_SAXParser::fatal_error", G_SCALAR | G_EVAL | G_DISCARD ); +#endif + if (SvTRUE(ERRSV)) { + croak_obj; + } + + FREETMPS ; + LEAVE ; + CLEAR_SERROR_HANDLER + return 1; +} + + +int +PmmSaxFatalError(void * ctx, const char * msg, ...) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx; + PmmSAXVectorPtr sax = (PmmSAXVectorPtr)ctxt->_private; + + va_list args; + SV * svMessage; + + dTHX; + dSP; + + svMessage = NEWSV(0,512); + + va_start(args, msg); + sv_vsetpvfn(svMessage, msg, xmlStrlen((const xmlChar *)msg), &args, NULL, 0, NULL); + va_end(args); + + ENTER; + SAVETMPS; + + PUSHMARK(SP) ; + XPUSHs(sax->parser); + + if (SvOK(sax->saved_error)) { + sv_catsv( sax->saved_error, svMessage ); + } else { + sv_setsv( sax->saved_error, svMessage ); + } + + XPUSHs(sv_2mortal(svMessage)); + XPUSHs(sv_2mortal(newSViv(ctxt->input->line))); + XPUSHs(sv_2mortal(newSViv(ctxt->input->col))); + + PUTBACK; + call_pv( "XML::LibXML::_SAXParser::fatal_error", G_SCALAR | G_EVAL | G_DISCARD ); + if (SvTRUE(ERRSV)) { + croak_obj; + } + + FREETMPS ; + LEAVE ; + CLEAR_SERROR_HANDLER + return 1; +} + +/* NOTE: + * end document is not handled by the parser itself! use + * XML::LibXML::SAX instead! + */ +xmlSAXHandlerPtr +PSaxGetHandler() +{ + xmlSAXHandlerPtr retval = (xmlSAXHandlerPtr)xmlMalloc(sizeof(xmlSAXHandler)); + memset(retval, 0, sizeof(xmlSAXHandler)); + + retval->setDocumentLocator = (setDocumentLocatorSAXFunc)&PSaxSetDocumentLocator; + + retval->startDocument = (startDocumentSAXFunc)&PSaxStartDocument; + + /* libxml2 will not handle perls returnvalue correctly, so we have + * to end the document ourselfes + */ + retval->endDocument = NULL; /* (endDocumentSAXFunc)&PSaxEndDocument; */ + + retval->startElement = (startElementSAXFunc)&PSaxStartElement; + retval->endElement = (endElementSAXFunc)&PSaxEndElement; + + retval->characters = (charactersSAXFunc)&PSaxCharacters; + retval->ignorableWhitespace = (ignorableWhitespaceSAXFunc)&PSaxCharacters; + + retval->comment = (commentSAXFunc)&PSaxComment; + retval->cdataBlock = (cdataBlockSAXFunc)&PSaxCDATABlock; + + retval->processingInstruction = (processingInstructionSAXFunc)&PSaxProcessingInstruction; + + /* warning functions should be internal */ + retval->warning = (warningSAXFunc)&PmmSaxWarning; + retval->error = (errorSAXFunc)&PmmSaxError; + retval->fatalError = (fatalErrorSAXFunc)&PmmSaxFatalError; + + retval->externalSubset = (externalSubsetSAXFunc)&PSaxExternalSubset; + + /* + retval->internalSubset = (internalSubsetSAXFunc)&PSaxInternalSubset; + retval->elementDecl = (elementDeclSAXFunc)&PSaxElementDecl; + retval->entityDecl = (entityDeclSAXFunc)&PSaxEntityDecl; + retval->notationDecl = (notationDeclSAXFunc)&PSaxNotationDecl; + retval->attributeDecl = (attributeDeclSAXFunc)&PSaxAttributeDecl; + retval->unparsedEntityDecl = (unparsedEntityDeclSAXFunc)&PSaxUnparsedEntityDecl; + */ + + return retval; +} + diff --git a/perl-libxml-sax.h b/perl-libxml-sax.h new file mode 100644 index 0000000..f2fa1df --- /dev/null +++ b/perl-libxml-sax.h @@ -0,0 +1,43 @@ +/** + * perl-libxml-sax.h + * $Id$ + */ + +#ifndef __PERL_LIBXML_SAX_H__ +#define __PERL_LIBXML_SAX_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +#ifdef __cplusplus +} +#endif + + +/* + * auxiliary macro to serve as an croak(NULL) + * unlike croak(NULL), this version does not produce + * a warning (see the perlapi for the meaning of croak(NULL)) + * + */ + +#define croak_obj Perl_croak(aTHX_ NULL) + + +/* has to be called in BOOT sequence */ +void +PmmSAXInitialize(pTHX); + +void +PmmSAXInitContext( xmlParserCtxtPtr ctxt, SV * parser, SV * saved_error ); + +void +PmmSAXCloseContext( xmlParserCtxtPtr ctxt ); + +xmlSAXHandlerPtr +PSaxGetHandler(); + +#endif /* __PERL_LIBXML_SAX_H__ */ diff --git a/ppport.h b/ppport.h new file mode 100644 index 0000000..f8117f4 --- /dev/null +++ b/ppport.h @@ -0,0 +1,6376 @@ +#if 0 +<<'SKIP'; +#endif +/* +---------------------------------------------------------------------- + + ppport.h -- Perl/Pollution/Portability Version 3.13 + + Automatically created by Devel::PPPort running under perl 5.010000. + + Do NOT edit this file directly! -- Edit PPPort_pm.PL and the + includes in parts/inc/ instead. + + Use 'perldoc ppport.h' to view the documentation below. + +---------------------------------------------------------------------- + +SKIP + +=pod + +=head1 NAME + +ppport.h - Perl/Pollution/Portability version 3.13 + +=head1 SYNOPSIS + + perl ppport.h [options] [source files] + + Searches current directory for files if no [source files] are given + + --help show short help + + --version show version + + --patch=file write one patch file with changes + --copy=suffix write changed copies with suffix + --diff=program use diff program and options + + --compat-version=version provide compatibility with Perl version + --cplusplus accept C++ comments + + --quiet don't output anything except fatal errors + --nodiag don't show diagnostics + --nohints don't show hints + --nochanges don't suggest changes + --nofilter don't filter input files + + --strip strip all script and doc functionality from + ppport.h + + --list-provided list provided API + --list-unsupported list unsupported API + --api-info=name show Perl API portability information + +=head1 COMPATIBILITY + +This version of F is designed to support operation with Perl +installations back to 5.003, and has been tested up to 5.10.0. + +=head1 OPTIONS + +=head2 --help + +Display a brief usage summary. + +=head2 --version + +Display the version of F. + +=head2 --patch=I + +If this option is given, a single patch file will be created if +any changes are suggested. This requires a working diff program +to be installed on your system. + +=head2 --copy=I + +If this option is given, a copy of each file will be saved with +the given suffix that contains the suggested changes. This does +not require any external programs. Note that this does not +automagially add a dot between the original filename and the +suffix. If you want the dot, you have to include it in the option +argument. + +If neither C<--patch> or C<--copy> are given, the default is to +simply print the diffs for each file. This requires either +C or a C program to be installed. + +=head2 --diff=I + +Manually set the diff program and options to use. The default +is to use C, when installed, and output unified +context diffs. + +=head2 --compat-version=I + +Tell F to check for compatibility with the given +Perl version. The default is to check for compatibility with Perl +version 5.003. You can use this option to reduce the output +of F if you intend to be backward compatible only +down to a certain Perl version. + +=head2 --cplusplus + +Usually, F will detect C++ style comments and +replace them with C style comments for portability reasons. +Using this option instructs F to leave C++ +comments untouched. + +=head2 --quiet + +Be quiet. Don't print anything except fatal errors. + +=head2 --nodiag + +Don't output any diagnostic messages. Only portability +alerts will be printed. + +=head2 --nohints + +Don't output any hints. Hints often contain useful portability +notes. Warnings will still be displayed. + +=head2 --nochanges + +Don't suggest any changes. Only give diagnostic output and hints +unless these are also deactivated. + +=head2 --nofilter + +Don't filter the list of input files. By default, files not looking +like source code (i.e. not *.xs, *.c, *.cc, *.cpp or *.h) are skipped. + +=head2 --strip + +Strip all script and documentation functionality from F. +This reduces the size of F dramatically and may be useful +if you want to include F in smaller modules without +increasing their distribution size too much. + +The stripped F will have a C<--unstrip> option that allows +you to undo the stripping, but only if an appropriate C +module is installed. + +=head2 --list-provided + +Lists the API elements for which compatibility is provided by +F. Also lists if it must be explicitly requested, +if it has dependencies, and if there are hints or warnings for it. + +=head2 --list-unsupported + +Lists the API elements that are known not to be supported by +F and below which version of Perl they probably +won't be available or work. + +=head2 --api-info=I + +Show portability information for API elements matching I. +If I is surrounded by slashes, it is interpreted as a regular +expression. + +=head1 DESCRIPTION + +In order for a Perl extension (XS) module to be as portable as possible +across differing versions of Perl itself, certain steps need to be taken. + +=over 4 + +=item * + +Including this header is the first major one. This alone will give you +access to a large part of the Perl API that hasn't been available in +earlier Perl releases. Use + + perl ppport.h --list-provided + +to see which API elements are provided by ppport.h. + +=item * + +You should avoid using deprecated parts of the API. For example, using +global Perl variables without the C prefix is deprecated. Also, +some API functions used to have a C prefix. Using this form is +also deprecated. You can safely use the supported API, as F +will provide wrappers for older Perl versions. + +=item * + +If you use one of a few functions or variables that were not present in +earlier versions of Perl, and that can't be provided using a macro, you +have to explicitly request support for these functions by adding one or +more C<#define>s in your source code before the inclusion of F. + +These functions or variables will be marked C in the list shown +by C<--list-provided>. + +Depending on whether you module has a single or multiple files that +use such functions or variables, you want either C or global +variants. + +For a C function or variable (used only in a single source +file), use: + + #define NEED_function + #define NEED_variable + +For a global function or variable (used in multiple source files), +use: + + #define NEED_function_GLOBAL + #define NEED_variable_GLOBAL + +Note that you mustn't have more than one global request for the +same function or variable in your project. + + Function / Variable Static Request Global Request + ----------------------------------------------------------------------------------------- + PL_signals NEED_PL_signals NEED_PL_signals_GLOBAL + eval_pv() NEED_eval_pv NEED_eval_pv_GLOBAL + grok_bin() NEED_grok_bin NEED_grok_bin_GLOBAL + grok_hex() NEED_grok_hex NEED_grok_hex_GLOBAL + grok_number() NEED_grok_number NEED_grok_number_GLOBAL + grok_numeric_radix() NEED_grok_numeric_radix NEED_grok_numeric_radix_GLOBAL + grok_oct() NEED_grok_oct NEED_grok_oct_GLOBAL + load_module() NEED_load_module NEED_load_module_GLOBAL + my_snprintf() NEED_my_snprintf NEED_my_snprintf_GLOBAL + my_strlcat() NEED_my_strlcat NEED_my_strlcat_GLOBAL + my_strlcpy() NEED_my_strlcpy NEED_my_strlcpy_GLOBAL + newCONSTSUB() NEED_newCONSTSUB NEED_newCONSTSUB_GLOBAL + newRV_noinc() NEED_newRV_noinc NEED_newRV_noinc_GLOBAL + newSVpvn_share() NEED_newSVpvn_share NEED_newSVpvn_share_GLOBAL + sv_2pv_flags() NEED_sv_2pv_flags NEED_sv_2pv_flags_GLOBAL + sv_2pvbyte() NEED_sv_2pvbyte NEED_sv_2pvbyte_GLOBAL + sv_catpvf_mg() NEED_sv_catpvf_mg NEED_sv_catpvf_mg_GLOBAL + sv_catpvf_mg_nocontext() NEED_sv_catpvf_mg_nocontext NEED_sv_catpvf_mg_nocontext_GLOBAL + sv_pvn_force_flags() NEED_sv_pvn_force_flags NEED_sv_pvn_force_flags_GLOBAL + sv_setpvf_mg() NEED_sv_setpvf_mg NEED_sv_setpvf_mg_GLOBAL + sv_setpvf_mg_nocontext() NEED_sv_setpvf_mg_nocontext NEED_sv_setpvf_mg_nocontext_GLOBAL + vload_module() NEED_vload_module NEED_vload_module_GLOBAL + vnewSVpvf() NEED_vnewSVpvf NEED_vnewSVpvf_GLOBAL + warner() NEED_warner NEED_warner_GLOBAL + +To avoid namespace conflicts, you can change the namespace of the +explicitly exported functions / variables using the C +macro. Just C<#define> the macro before including C: + + #define DPPP_NAMESPACE MyOwnNamespace_ + #include "ppport.h" + +The default namespace is C. + +=back + +The good thing is that most of the above can be checked by running +F on your source code. See the next section for +details. + +=head1 EXAMPLES + +To verify whether F is needed for your module, whether you +should make any changes to your code, and whether any special defines +should be used, F can be run as a Perl script to check your +source code. Simply say: + + perl ppport.h + +The result will usually be a list of patches suggesting changes +that should at least be acceptable, if not necessarily the most +efficient solution, or a fix for all possible problems. + +If you know that your XS module uses features only available in +newer Perl releases, if you're aware that it uses C++ comments, +and if you want all suggestions as a single patch file, you could +use something like this: + + perl ppport.h --compat-version=5.6.0 --cplusplus --patch=test.diff + +If you only want your code to be scanned without any suggestions +for changes, use: + + perl ppport.h --nochanges + +You can specify a different C program or options, using +the C<--diff> option: + + perl ppport.h --diff='diff -C 10' + +This would output context diffs with 10 lines of context. + +If you want to create patched copies of your files instead, use: + + perl ppport.h --copy=.new + +To display portability information for the C function, +use: + + perl ppport.h --api-info=newSVpvn + +Since the argument to C<--api-info> can be a regular expression, +you can use + + perl ppport.h --api-info=/_nomg$/ + +to display portability information for all C<_nomg> functions or + + perl ppport.h --api-info=/./ + +to display information for all known API elements. + +=head1 BUGS + +If this version of F is causing failure during +the compilation of this module, please check if newer versions +of either this module or C are available on CPAN +before sending a bug report. + +If F was generated using the latest version of +C and is causing failure of this module, please +file a bug report using the CPAN Request Tracker at L. + +Please include the following information: + +=over 4 + +=item 1. + +The complete output from running "perl -V" + +=item 2. + +This file. + +=item 3. + +The name and version of the module you were trying to build. + +=item 4. + +A full log of the build that failed. + +=item 5. + +Any other information that you think could be relevant. + +=back + +For the latest version of this code, please get the C +module from CPAN. + +=head1 COPYRIGHT + +Version 3.x, Copyright (c) 2004-2007, Marcus Holland-Moritz. + +Version 2.x, Copyright (C) 2001, Paul Marquess. + +Version 1.x, Copyright (C) 1999, Kenneth Albanowski. + +This program is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. + +=head1 SEE ALSO + +See L. + +=cut + +use strict; + +# Disable broken TRIE-optimization +BEGIN { eval '${^RE_TRIE_MAXBUF} = -1' if $] >= 5.009004 && $] <= 5.009005 } + +my $VERSION = 3.13; + +my %opt = ( + quiet => 0, + diag => 1, + hints => 1, + changes => 1, + cplusplus => 0, + filter => 1, + strip => 0, + version => 0, +); + +my($ppport) = $0 =~ /([\w.]+)$/; +my $LF = '(?:\r\n|[\r\n])'; # line feed +my $HS = "[ \t]"; # horizontal whitespace + +# Never use C comments in this file! +my $ccs = '/'.'*'; +my $cce = '*'.'/'; +my $rccs = quotemeta $ccs; +my $rcce = quotemeta $cce; + +eval { + require Getopt::Long; + Getopt::Long::GetOptions(\%opt, qw( + help quiet diag! filter! hints! changes! cplusplus strip version + patch=s copy=s diff=s compat-version=s + list-provided list-unsupported api-info=s + )) or usage(); +}; + +if ($@ and grep /^-/, @ARGV) { + usage() if "@ARGV" =~ /^--?h(?:elp)?$/; + die "Getopt::Long not found. Please don't use any options.\n"; +} + +if ($opt{version}) { + print "This is $0 $VERSION.\n"; + exit 0; +} + +usage() if $opt{help}; +strip() if $opt{strip}; + +if (exists $opt{'compat-version'}) { + my($r,$v,$s) = eval { parse_version($opt{'compat-version'}) }; + if ($@) { + die "Invalid version number format: '$opt{'compat-version'}'\n"; + } + die "Only Perl 5 is supported\n" if $r != 5; + die "Invalid version number: $opt{'compat-version'}\n" if $v >= 1000 || $s >= 1000; + $opt{'compat-version'} = sprintf "%d.%03d%03d", $r, $v, $s; +} +else { + $opt{'compat-version'} = 5; +} + +my %API = map { /^(\w+)\|([^|]*)\|([^|]*)\|(\w*)$/ + ? ( $1 => { + ($2 ? ( base => $2 ) : ()), + ($3 ? ( todo => $3 ) : ()), + (index($4, 'v') >= 0 ? ( varargs => 1 ) : ()), + (index($4, 'p') >= 0 ? ( provided => 1 ) : ()), + (index($4, 'n') >= 0 ? ( nothxarg => 1 ) : ()), + } ) + : die "invalid spec: $_" } qw( +AvFILLp|5.004050||p +AvFILL||| +CLASS|||n +CX_CURPAD_SAVE||| +CX_CURPAD_SV||| +CopFILEAV|5.006000||p +CopFILEGV_set|5.006000||p +CopFILEGV|5.006000||p +CopFILESV|5.006000||p +CopFILE_set|5.006000||p +CopFILE|5.006000||p +CopSTASHPV_set|5.006000||p +CopSTASHPV|5.006000||p +CopSTASH_eq|5.006000||p +CopSTASH_set|5.006000||p +CopSTASH|5.006000||p +CopyD|5.009002||p +Copy||| +CvPADLIST||| +CvSTASH||| +CvWEAKOUTSIDE||| +DEFSV|5.004050||p +END_EXTERN_C|5.005000||p +ENTER||| +ERRSV|5.004050||p +EXTEND||| +EXTERN_C|5.005000||p +F0convert|||n +FREETMPS||| +GIMME_V||5.004000|n +GIMME|||n +GROK_NUMERIC_RADIX|5.007002||p +G_ARRAY||| +G_DISCARD||| +G_EVAL||| +G_NOARGS||| +G_SCALAR||| +G_VOID||5.004000| +GetVars||| +GvSV||| +Gv_AMupdate||| +HEf_SVKEY||5.004000| +HeHASH||5.004000| +HeKEY||5.004000| +HeKLEN||5.004000| +HePV||5.004000| +HeSVKEY_force||5.004000| +HeSVKEY_set||5.004000| +HeSVKEY||5.004000| +HeVAL||5.004000| +HvNAME||| +INT2PTR|5.006000||p +IN_LOCALE_COMPILETIME|5.007002||p +IN_LOCALE_RUNTIME|5.007002||p +IN_LOCALE|5.007002||p +IN_PERL_COMPILETIME|5.008001||p +IS_NUMBER_GREATER_THAN_UV_MAX|5.007002||p +IS_NUMBER_INFINITY|5.007002||p +IS_NUMBER_IN_UV|5.007002||p +IS_NUMBER_NAN|5.007003||p +IS_NUMBER_NEG|5.007002||p +IS_NUMBER_NOT_INT|5.007002||p +IVSIZE|5.006000||p +IVTYPE|5.006000||p +IVdf|5.006000||p +LEAVE||| +LVRET||| +MARK||| +MULTICALL||5.009005| +MY_CXT_CLONE|5.009002||p +MY_CXT_INIT|5.007003||p +MY_CXT|5.007003||p +MoveD|5.009002||p +Move||| +NOOP|5.005000||p +NUM2PTR|5.006000||p +NVTYPE|5.006000||p +NVef|5.006001||p +NVff|5.006001||p +NVgf|5.006001||p +Newxc|5.009003||p +Newxz|5.009003||p +Newx|5.009003||p +Nullav||| +Nullch||| +Nullcv||| +Nullhv||| +Nullsv||| +ORIGMARK||| +PAD_BASE_SV||| +PAD_CLONE_VARS||| +PAD_COMPNAME_FLAGS||| +PAD_COMPNAME_GEN_set||| +PAD_COMPNAME_GEN||| +PAD_COMPNAME_OURSTASH||| +PAD_COMPNAME_PV||| +PAD_COMPNAME_TYPE||| +PAD_RESTORE_LOCAL||| +PAD_SAVE_LOCAL||| +PAD_SAVE_SETNULLPAD||| +PAD_SETSV||| +PAD_SET_CUR_NOSAVE||| +PAD_SET_CUR||| +PAD_SVl||| +PAD_SV||| +PERL_ABS|5.008001||p +PERL_BCDVERSION|5.009005||p +PERL_GCC_BRACE_GROUPS_FORBIDDEN|5.008001||p +PERL_HASH|5.004000||p +PERL_INT_MAX|5.004000||p +PERL_INT_MIN|5.004000||p +PERL_LONG_MAX|5.004000||p +PERL_LONG_MIN|5.004000||p +PERL_MAGIC_arylen|5.007002||p +PERL_MAGIC_backref|5.007002||p +PERL_MAGIC_bm|5.007002||p +PERL_MAGIC_collxfrm|5.007002||p +PERL_MAGIC_dbfile|5.007002||p +PERL_MAGIC_dbline|5.007002||p +PERL_MAGIC_defelem|5.007002||p +PERL_MAGIC_envelem|5.007002||p +PERL_MAGIC_env|5.007002||p +PERL_MAGIC_ext|5.007002||p +PERL_MAGIC_fm|5.007002||p +PERL_MAGIC_glob|5.009005||p +PERL_MAGIC_isaelem|5.007002||p +PERL_MAGIC_isa|5.007002||p +PERL_MAGIC_mutex|5.009005||p +PERL_MAGIC_nkeys|5.007002||p +PERL_MAGIC_overload_elem|5.007002||p +PERL_MAGIC_overload_table|5.007002||p +PERL_MAGIC_overload|5.007002||p +PERL_MAGIC_pos|5.007002||p +PERL_MAGIC_qr|5.007002||p +PERL_MAGIC_regdata|5.007002||p +PERL_MAGIC_regdatum|5.007002||p +PERL_MAGIC_regex_global|5.007002||p +PERL_MAGIC_shared_scalar|5.007003||p +PERL_MAGIC_shared|5.007003||p +PERL_MAGIC_sigelem|5.007002||p +PERL_MAGIC_sig|5.007002||p +PERL_MAGIC_substr|5.007002||p +PERL_MAGIC_sv|5.007002||p +PERL_MAGIC_taint|5.007002||p +PERL_MAGIC_tiedelem|5.007002||p +PERL_MAGIC_tiedscalar|5.007002||p +PERL_MAGIC_tied|5.007002||p +PERL_MAGIC_utf8|5.008001||p +PERL_MAGIC_uvar_elem|5.007003||p +PERL_MAGIC_uvar|5.007002||p +PERL_MAGIC_vec|5.007002||p +PERL_MAGIC_vstring|5.008001||p +PERL_QUAD_MAX|5.004000||p +PERL_QUAD_MIN|5.004000||p +PERL_REVISION|5.006000||p +PERL_SCAN_ALLOW_UNDERSCORES|5.007003||p +PERL_SCAN_DISALLOW_PREFIX|5.007003||p +PERL_SCAN_GREATER_THAN_UV_MAX|5.007003||p +PERL_SCAN_SILENT_ILLDIGIT|5.008001||p +PERL_SHORT_MAX|5.004000||p +PERL_SHORT_MIN|5.004000||p +PERL_SIGNALS_UNSAFE_FLAG|5.008001||p +PERL_SUBVERSION|5.006000||p +PERL_UCHAR_MAX|5.004000||p +PERL_UCHAR_MIN|5.004000||p +PERL_UINT_MAX|5.004000||p +PERL_UINT_MIN|5.004000||p +PERL_ULONG_MAX|5.004000||p +PERL_ULONG_MIN|5.004000||p +PERL_UNUSED_ARG|5.009003||p +PERL_UNUSED_CONTEXT|5.009004||p +PERL_UNUSED_DECL|5.007002||p +PERL_UNUSED_VAR|5.007002||p +PERL_UQUAD_MAX|5.004000||p +PERL_UQUAD_MIN|5.004000||p +PERL_USE_GCC_BRACE_GROUPS|5.009004||p +PERL_USHORT_MAX|5.004000||p +PERL_USHORT_MIN|5.004000||p +PERL_VERSION|5.006000||p +PL_DBsignal|5.005000||p +PL_DBsingle|||pn +PL_DBsub|||pn +PL_DBtrace|||pn +PL_Sv|5.005000||p +PL_compiling|5.004050||p +PL_copline|5.009005||p +PL_curcop|5.004050||p +PL_curstash|5.004050||p +PL_debstash|5.004050||p +PL_defgv|5.004050||p +PL_diehook|5.004050||p +PL_dirty|5.004050||p +PL_dowarn|||pn +PL_errgv|5.004050||p +PL_expect|5.009005||p +PL_hexdigit|5.005000||p +PL_hints|5.005000||p +PL_last_in_gv|||n +PL_laststatval|5.005000||p +PL_modglobal||5.005000|n +PL_na|5.004050||pn +PL_no_modify|5.006000||p +PL_ofs_sv|||n +PL_perl_destruct_level|5.004050||p +PL_perldb|5.004050||p +PL_ppaddr|5.006000||p +PL_rsfp_filters|5.004050||p +PL_rsfp|5.004050||p +PL_rs|||n +PL_signals|5.008001||p +PL_stack_base|5.004050||p +PL_stack_sp|5.004050||p +PL_statcache|5.005000||p +PL_stdingv|5.004050||p +PL_sv_arenaroot|5.004050||p +PL_sv_no|5.004050||pn +PL_sv_undef|5.004050||pn +PL_sv_yes|5.004050||pn +PL_tainted|5.004050||p +PL_tainting|5.004050||p +POP_MULTICALL||5.009005| +POPi|||n +POPl|||n +POPn|||n +POPpbytex||5.007001|n +POPpx||5.005030|n +POPp|||n +POPs|||n +PTR2IV|5.006000||p +PTR2NV|5.006000||p +PTR2UV|5.006000||p +PTR2ul|5.007001||p +PTRV|5.006000||p +PUSHMARK||| +PUSH_MULTICALL||5.009005| +PUSHi||| +PUSHmortal|5.009002||p +PUSHn||| +PUSHp||| +PUSHs||| +PUSHu|5.004000||p +PUTBACK||| +PerlIO_clearerr||5.007003| +PerlIO_close||5.007003| +PerlIO_context_layers||5.009004| +PerlIO_eof||5.007003| +PerlIO_error||5.007003| +PerlIO_fileno||5.007003| +PerlIO_fill||5.007003| +PerlIO_flush||5.007003| +PerlIO_get_base||5.007003| +PerlIO_get_bufsiz||5.007003| +PerlIO_get_cnt||5.007003| +PerlIO_get_ptr||5.007003| +PerlIO_read||5.007003| +PerlIO_seek||5.007003| +PerlIO_set_cnt||5.007003| +PerlIO_set_ptrcnt||5.007003| +PerlIO_setlinebuf||5.007003| +PerlIO_stderr||5.007003| +PerlIO_stdin||5.007003| +PerlIO_stdout||5.007003| +PerlIO_tell||5.007003| +PerlIO_unread||5.007003| +PerlIO_write||5.007003| +Perl_signbit||5.009005|n +PoisonFree|5.009004||p +PoisonNew|5.009004||p +PoisonWith|5.009004||p +Poison|5.008000||p +RETVAL|||n +Renewc||| +Renew||| +SAVECLEARSV||| +SAVECOMPPAD||| +SAVEPADSV||| +SAVETMPS||| +SAVE_DEFSV|5.004050||p +SPAGAIN||| +SP||| +START_EXTERN_C|5.005000||p +START_MY_CXT|5.007003||p +STMT_END|||p +STMT_START|||p +STR_WITH_LEN|5.009003||p +ST||| +SV_CONST_RETURN|5.009003||p +SV_COW_DROP_PV|5.008001||p +SV_COW_SHARED_HASH_KEYS|5.009005||p +SV_GMAGIC|5.007002||p +SV_HAS_TRAILING_NUL|5.009004||p +SV_IMMEDIATE_UNREF|5.007001||p +SV_MUTABLE_RETURN|5.009003||p +SV_NOSTEAL|5.009002||p +SV_SMAGIC|5.009003||p +SV_UTF8_NO_ENCODING|5.008001||p +SVf|5.006000||p +SVt_IV||| +SVt_NV||| +SVt_PVAV||| +SVt_PVCV||| +SVt_PVHV||| +SVt_PVMG||| +SVt_PV||| +Safefree||| +Slab_Alloc||| +Slab_Free||| +Slab_to_rw||| +StructCopy||| +SvCUR_set||| +SvCUR||| +SvEND||| +SvGAMAGIC||5.006001| +SvGETMAGIC|5.004050||p +SvGROW||| +SvIOK_UV||5.006000| +SvIOK_notUV||5.006000| +SvIOK_off||| +SvIOK_only_UV||5.006000| +SvIOK_only||| +SvIOK_on||| +SvIOKp||| +SvIOK||| +SvIVX||| +SvIV_nomg|5.009001||p +SvIV_set||| +SvIVx||| +SvIV||| +SvIsCOW_shared_hash||5.008003| +SvIsCOW||5.008003| +SvLEN_set||| +SvLEN||| +SvLOCK||5.007003| +SvMAGIC_set|5.009003||p +SvNIOK_off||| +SvNIOKp||| +SvNIOK||| +SvNOK_off||| +SvNOK_only||| +SvNOK_on||| +SvNOKp||| +SvNOK||| +SvNVX||| +SvNV_set||| +SvNVx||| +SvNV||| +SvOK||| +SvOOK||| +SvPOK_off||| +SvPOK_only_UTF8||5.006000| +SvPOK_only||| +SvPOK_on||| +SvPOKp||| +SvPOK||| +SvPVX_const|5.009003||p +SvPVX_mutable|5.009003||p +SvPVX||| +SvPV_const|5.009003||p +SvPV_flags_const_nolen|5.009003||p +SvPV_flags_const|5.009003||p +SvPV_flags_mutable|5.009003||p +SvPV_flags|5.007002||p +SvPV_force_flags_mutable|5.009003||p +SvPV_force_flags_nolen|5.009003||p +SvPV_force_flags|5.007002||p +SvPV_force_mutable|5.009003||p +SvPV_force_nolen|5.009003||p +SvPV_force_nomg_nolen|5.009003||p +SvPV_force_nomg|5.007002||p +SvPV_force|||p +SvPV_mutable|5.009003||p +SvPV_nolen_const|5.009003||p +SvPV_nolen|5.006000||p +SvPV_nomg_const_nolen|5.009003||p +SvPV_nomg_const|5.009003||p +SvPV_nomg|5.007002||p +SvPV_set||| +SvPVbyte_force||5.009002| +SvPVbyte_nolen||5.006000| +SvPVbytex_force||5.006000| +SvPVbytex||5.006000| +SvPVbyte|5.006000||p +SvPVutf8_force||5.006000| +SvPVutf8_nolen||5.006000| +SvPVutf8x_force||5.006000| +SvPVutf8x||5.006000| +SvPVutf8||5.006000| +SvPVx||| +SvPV||| +SvREFCNT_dec||| +SvREFCNT_inc_NN|5.009004||p +SvREFCNT_inc_simple_NN|5.009004||p +SvREFCNT_inc_simple_void_NN|5.009004||p +SvREFCNT_inc_simple_void|5.009004||p +SvREFCNT_inc_simple|5.009004||p +SvREFCNT_inc_void_NN|5.009004||p +SvREFCNT_inc_void|5.009004||p +SvREFCNT_inc|||p +SvREFCNT||| +SvROK_off||| +SvROK_on||| +SvROK||| +SvRV_set|5.009003||p +SvRV||| +SvRXOK||5.009005| +SvRX||5.009005| +SvSETMAGIC||| +SvSHARED_HASH|5.009003||p +SvSHARE||5.007003| +SvSTASH_set|5.009003||p +SvSTASH||| +SvSetMagicSV_nosteal||5.004000| +SvSetMagicSV||5.004000| +SvSetSV_nosteal||5.004000| +SvSetSV||| +SvTAINTED_off||5.004000| +SvTAINTED_on||5.004000| +SvTAINTED||5.004000| +SvTAINT||| +SvTRUE||| +SvTYPE||| +SvUNLOCK||5.007003| +SvUOK|5.007001|5.006000|p +SvUPGRADE||| +SvUTF8_off||5.006000| +SvUTF8_on||5.006000| +SvUTF8||5.006000| +SvUVXx|5.004000||p +SvUVX|5.004000||p +SvUV_nomg|5.009001||p +SvUV_set|5.009003||p +SvUVx|5.004000||p +SvUV|5.004000||p +SvVOK||5.008001| +SvVSTRING_mg|5.009004||p +THIS|||n +UNDERBAR|5.009002||p +UTF8_MAXBYTES|5.009002||p +UVSIZE|5.006000||p +UVTYPE|5.006000||p +UVXf|5.007001||p +UVof|5.006000||p +UVuf|5.006000||p +UVxf|5.006000||p +WARN_ALL|5.006000||p +WARN_AMBIGUOUS|5.006000||p +WARN_ASSERTIONS|5.009005||p +WARN_BAREWORD|5.006000||p +WARN_CLOSED|5.006000||p +WARN_CLOSURE|5.006000||p +WARN_DEBUGGING|5.006000||p +WARN_DEPRECATED|5.006000||p +WARN_DIGIT|5.006000||p +WARN_EXEC|5.006000||p +WARN_EXITING|5.006000||p +WARN_GLOB|5.006000||p +WARN_INPLACE|5.006000||p +WARN_INTERNAL|5.006000||p +WARN_IO|5.006000||p +WARN_LAYER|5.008000||p +WARN_MALLOC|5.006000||p +WARN_MISC|5.006000||p +WARN_NEWLINE|5.006000||p +WARN_NUMERIC|5.006000||p +WARN_ONCE|5.006000||p +WARN_OVERFLOW|5.006000||p +WARN_PACK|5.006000||p +WARN_PARENTHESIS|5.006000||p +WARN_PIPE|5.006000||p +WARN_PORTABLE|5.006000||p +WARN_PRECEDENCE|5.006000||p +WARN_PRINTF|5.006000||p +WARN_PROTOTYPE|5.006000||p +WARN_QW|5.006000||p +WARN_RECURSION|5.006000||p +WARN_REDEFINE|5.006000||p +WARN_REGEXP|5.006000||p +WARN_RESERVED|5.006000||p +WARN_SEMICOLON|5.006000||p +WARN_SEVERE|5.006000||p +WARN_SIGNAL|5.006000||p +WARN_SUBSTR|5.006000||p +WARN_SYNTAX|5.006000||p +WARN_TAINT|5.006000||p +WARN_THREADS|5.008000||p +WARN_UNINITIALIZED|5.006000||p +WARN_UNOPENED|5.006000||p +WARN_UNPACK|5.006000||p +WARN_UNTIE|5.006000||p +WARN_UTF8|5.006000||p +WARN_VOID|5.006000||p +XCPT_CATCH|5.009002||p +XCPT_RETHROW|5.009002||p +XCPT_TRY_END|5.009002||p +XCPT_TRY_START|5.009002||p +XPUSHi||| +XPUSHmortal|5.009002||p +XPUSHn||| +XPUSHp||| +XPUSHs||| +XPUSHu|5.004000||p +XSRETURN_EMPTY||| +XSRETURN_IV||| +XSRETURN_NO||| +XSRETURN_NV||| +XSRETURN_PV||| +XSRETURN_UNDEF||| +XSRETURN_UV|5.008001||p +XSRETURN_YES||| +XSRETURN|||p +XST_mIV||| +XST_mNO||| +XST_mNV||| +XST_mPV||| +XST_mUNDEF||| +XST_mUV|5.008001||p +XST_mYES||| +XS_VERSION_BOOTCHECK||| +XS_VERSION||| +XSprePUSH|5.006000||p +XS||| +ZeroD|5.009002||p +Zero||| +_aMY_CXT|5.007003||p +_pMY_CXT|5.007003||p +aMY_CXT_|5.007003||p +aMY_CXT|5.007003||p +aTHXR_|5.009005||p +aTHXR|5.009005||p +aTHX_|5.006000||p +aTHX|5.006000||p +add_data|||n +addmad||| +allocmy||| +amagic_call||| +amagic_cmp_locale||| +amagic_cmp||| +amagic_i_ncmp||| +amagic_ncmp||| +any_dup||| +ao||| +append_elem||| +append_list||| +append_madprops||| +apply_attrs_my||| +apply_attrs_string||5.006001| +apply_attrs||| +apply||| +atfork_lock||5.007003|n +atfork_unlock||5.007003|n +av_arylen_p||5.009003| +av_clear||| +av_create_and_push||5.009005| +av_create_and_unshift_one||5.009005| +av_delete||5.006000| +av_exists||5.006000| +av_extend||| +av_fake||| +av_fetch||| +av_fill||| +av_len||| +av_make||| +av_pop||| +av_push||| +av_reify||| +av_shift||| +av_store||| +av_undef||| +av_unshift||| +ax|||n +bad_type||| +bind_match||| +block_end||| +block_gimme||5.004000| +block_start||| +boolSV|5.004000||p +boot_core_PerlIO||| +boot_core_UNIVERSAL||| +boot_core_mro||| +boot_core_xsutils||| +bytes_from_utf8||5.007001| +bytes_to_uni|||n +bytes_to_utf8||5.006001| +call_argv|5.006000||p +call_atexit||5.006000| +call_list||5.004000| +call_method|5.006000||p +call_pv|5.006000||p +call_sv|5.006000||p +calloc||5.007002|n +cando||| +cast_i32||5.006000| +cast_iv||5.006000| +cast_ulong||5.006000| +cast_uv||5.006000| +check_type_and_open||| +check_uni||| +checkcomma||| +checkposixcc||| +ckWARN|5.006000||p +ck_anoncode||| +ck_bitop||| +ck_concat||| +ck_defined||| +ck_delete||| +ck_die||| +ck_eof||| +ck_eval||| +ck_exec||| +ck_exists||| +ck_exit||| +ck_ftst||| +ck_fun||| +ck_glob||| +ck_grep||| +ck_index||| +ck_join||| +ck_lengthconst||| +ck_lfun||| +ck_listiob||| +ck_match||| +ck_method||| +ck_null||| +ck_open||| +ck_readline||| +ck_repeat||| +ck_require||| +ck_retarget||| +ck_return||| +ck_rfun||| +ck_rvconst||| +ck_sassign||| +ck_select||| +ck_shift||| +ck_sort||| +ck_spair||| +ck_split||| +ck_subr||| +ck_substr||| +ck_svconst||| +ck_trunc||| +ck_unpack||| +ckwarn_d||5.009003| +ckwarn||5.009003| +cl_and|||n +cl_anything|||n +cl_init_zero|||n +cl_init|||n +cl_is_anything|||n +cl_or|||n +clear_placeholders||| +closest_cop||| +convert||| +cop_free||| +cr_textfilter||| +create_eval_scope||| +croak_nocontext|||vn +croak|||v +csighandler||5.009003|n +curmad||| +custom_op_desc||5.007003| +custom_op_name||5.007003| +cv_ckproto_len||| +cv_ckproto||| +cv_clone||| +cv_const_sv||5.004000| +cv_dump||| +cv_undef||| +cx_dump||5.005000| +cx_dup||| +cxinc||| +dAXMARK|5.009003||p +dAX|5.007002||p +dITEMS|5.007002||p +dMARK||| +dMULTICALL||5.009003| +dMY_CXT_SV|5.007003||p +dMY_CXT|5.007003||p +dNOOP|5.006000||p +dORIGMARK||| +dSP||| +dTHR|5.004050||p +dTHXR|5.009005||p +dTHXa|5.006000||p +dTHXoa|5.006000||p +dTHX|5.006000||p +dUNDERBAR|5.009002||p +dVAR|5.009003||p +dXCPT|5.009002||p +dXSARGS||| +dXSI32||| +dXSTARG|5.006000||p +deb_curcv||| +deb_nocontext|||vn +deb_stack_all||| +deb_stack_n||| +debop||5.005000| +debprofdump||5.005000| +debprof||| +debstackptrs||5.007003| +debstack||5.007003| +debug_start_match||| +deb||5.007003|v +del_sv||| +delete_eval_scope||| +delimcpy||5.004000| +deprecate_old||| +deprecate||| +despatch_signals||5.007001| +destroy_matcher||| +die_nocontext|||vn +die_where||| +die|||v +dirp_dup||| +div128||| +djSP||| +do_aexec5||| +do_aexec||| +do_aspawn||| +do_binmode||5.004050| +do_chomp||| +do_chop||| +do_close||| +do_dump_pad||| +do_eof||| +do_exec3||| +do_execfree||| +do_exec||| +do_gv_dump||5.006000| +do_gvgv_dump||5.006000| +do_hv_dump||5.006000| +do_ipcctl||| +do_ipcget||| +do_join||| +do_kv||| +do_magic_dump||5.006000| +do_msgrcv||| +do_msgsnd||| +do_oddball||| +do_op_dump||5.006000| +do_op_xmldump||| +do_open9||5.006000| +do_openn||5.007001| +do_open||5.004000| +do_pipe||| +do_pmop_dump||5.006000| +do_pmop_xmldump||| +do_print||| +do_readline||| +do_seek||| +do_semop||| +do_shmio||| +do_smartmatch||| +do_spawn_nowait||| +do_spawn||| +do_sprintf||| +do_sv_dump||5.006000| +do_sysseek||| +do_tell||| +do_trans_complex_utf8||| +do_trans_complex||| +do_trans_count_utf8||| +do_trans_count||| +do_trans_simple_utf8||| +do_trans_simple||| +do_trans||| +do_vecget||| +do_vecset||| +do_vop||| +docatch_body||| +docatch||| +doeval||| +dofile||| +dofindlabel||| +doform||| +doing_taint||5.008001|n +dooneliner||| +doopen_pm||| +doparseform||| +dopoptoeval||| +dopoptogiven||| +dopoptolabel||| +dopoptoloop||| +dopoptosub_at||| +dopoptosub||| +dopoptowhen||| +doref||5.009003| +dounwind||| +dowantarray||| +dump_all||5.006000| +dump_eval||5.006000| +dump_exec_pos||| +dump_fds||| +dump_form||5.006000| +dump_indent||5.006000|v +dump_mstats||| +dump_packsubs||5.006000| +dump_sub||5.006000| +dump_sv_child||| +dump_trie_interim_list||| +dump_trie_interim_table||| +dump_trie||| +dump_vindent||5.006000| +dumpuntil||| +dup_attrlist||| +emulate_cop_io||| +emulate_eaccess||| +eval_pv|5.006000||p +eval_sv|5.006000||p +exec_failed||| +expect_number||| +fbm_compile||5.005000| +fbm_instr||5.005000| +fd_on_nosuid_fs||| +feature_is_enabled||| +filter_add||| +filter_del||| +filter_gets||| +filter_read||| +find_and_forget_pmops||| +find_array_subscript||| +find_beginning||| +find_byclass||| +find_hash_subscript||| +find_in_my_stash||| +find_runcv||5.008001| +find_rundefsvoffset||5.009002| +find_script||| +find_uninit_var||| +first_symbol|||n +fold_constants||| +forbid_setid||| +force_ident||| +force_list||| +force_next||| +force_version||| +force_word||| +forget_pmop||| +form_nocontext|||vn +form||5.004000|v +fp_dup||| +fprintf_nocontext|||vn +free_global_struct||| +free_tied_hv_pool||| +free_tmps||| +gen_constant_list||| +get_arena||| +get_av|5.006000||p +get_context||5.006000|n +get_cvn_flags||5.009005| +get_cv|5.006000||p +get_db_sub||| +get_debug_opts||| +get_hash_seed||| +get_hv|5.006000||p +get_mstats||| +get_no_modify||| +get_num||| +get_op_descs||5.005000| +get_op_names||5.005000| +get_opargs||| +get_ppaddr||5.006000| +get_re_arg||| +get_sv|5.006000||p +get_vtbl||5.005030| +getcwd_sv||5.007002| +getenv_len||| +glob_2number||| +glob_2pv||| +glob_assign_glob||| +glob_assign_ref||| +gp_dup||| +gp_free||| +gp_ref||| +grok_bin|5.007003||p +grok_hex|5.007003||p +grok_number|5.007002||p +grok_numeric_radix|5.007002||p +grok_oct|5.007003||p +group_end||| +gv_AVadd||| +gv_HVadd||| +gv_IOadd||| +gv_SVadd||| +gv_autoload4||5.004000| +gv_check||| +gv_const_sv||5.009003| +gv_dump||5.006000| +gv_efullname3||5.004000| +gv_efullname4||5.006001| +gv_efullname||| +gv_ename||| +gv_fetchfile_flags||5.009005| +gv_fetchfile||| +gv_fetchmeth_autoload||5.007003| +gv_fetchmethod_autoload||5.004000| +gv_fetchmethod||| +gv_fetchmeth||| +gv_fetchpvn_flags||5.009002| +gv_fetchpv||| +gv_fetchsv||5.009002| +gv_fullname3||5.004000| +gv_fullname4||5.006001| +gv_fullname||| +gv_handler||5.007001| +gv_init_sv||| +gv_init||| +gv_name_set||5.009004| +gv_stashpvn|5.004000||p +gv_stashpvs||5.009003| +gv_stashpv||| +gv_stashsv||| +he_dup||| +hek_dup||| +hfreeentries||| +hsplit||| +hv_assert||5.009005| +hv_auxinit|||n +hv_backreferences_p||| +hv_clear_placeholders||5.009001| +hv_clear||| +hv_copy_hints_hv||| +hv_delayfree_ent||5.004000| +hv_delete_common||| +hv_delete_ent||5.004000| +hv_delete||| +hv_eiter_p||5.009003| +hv_eiter_set||5.009003| +hv_exists_ent||5.004000| +hv_exists||| +hv_fetch_common||| +hv_fetch_ent||5.004000| +hv_fetchs|5.009003||p +hv_fetch||| +hv_free_ent||5.004000| +hv_iterinit||| +hv_iterkeysv||5.004000| +hv_iterkey||| +hv_iternext_flags||5.008000| +hv_iternextsv||| +hv_iternext||| +hv_iterval||| +hv_kill_backrefs||| +hv_ksplit||5.004000| +hv_magic_check|||n +hv_magic_uvar_xkey||| +hv_magic||| +hv_name_set||5.009003| +hv_notallowed||| +hv_placeholders_get||5.009003| +hv_placeholders_p||5.009003| +hv_placeholders_set||5.009003| +hv_riter_p||5.009003| +hv_riter_set||5.009003| +hv_scalar||5.009001| +hv_store_ent||5.004000| +hv_store_flags||5.008000| +hv_stores|5.009004||p +hv_store||| +hv_undef||| +ibcmp_locale||5.004000| +ibcmp_utf8||5.007003| +ibcmp||| +incl_perldb||| +incline||| +incpush_if_exists||| +incpush||| +ingroup||| +init_argv_symbols||| +init_debugger||| +init_global_struct||| +init_i18nl10n||5.006000| +init_i18nl14n||5.006000| +init_ids||| +init_interp||| +init_main_stash||| +init_perllib||| +init_postdump_symbols||| +init_predump_symbols||| +init_stacks||5.005000| +init_tm||5.007002| +instr||| +intro_my||| +intuit_method||| +intuit_more||| +invert||| +io_close||| +isALNUM||| +isALPHA||| +isDIGIT||| +isLOWER||| +isSPACE||| +isUPPER||| +is_an_int||| +is_gv_magical_sv||| +is_gv_magical||| +is_handle_constructor|||n +is_list_assignment||| +is_lvalue_sub||5.007001| +is_uni_alnum_lc||5.006000| +is_uni_alnumc_lc||5.006000| +is_uni_alnumc||5.006000| +is_uni_alnum||5.006000| +is_uni_alpha_lc||5.006000| +is_uni_alpha||5.006000| +is_uni_ascii_lc||5.006000| +is_uni_ascii||5.006000| +is_uni_cntrl_lc||5.006000| +is_uni_cntrl||5.006000| +is_uni_digit_lc||5.006000| +is_uni_digit||5.006000| +is_uni_graph_lc||5.006000| +is_uni_graph||5.006000| +is_uni_idfirst_lc||5.006000| +is_uni_idfirst||5.006000| +is_uni_lower_lc||5.006000| +is_uni_lower||5.006000| +is_uni_print_lc||5.006000| +is_uni_print||5.006000| +is_uni_punct_lc||5.006000| +is_uni_punct||5.006000| +is_uni_space_lc||5.006000| +is_uni_space||5.006000| +is_uni_upper_lc||5.006000| +is_uni_upper||5.006000| +is_uni_xdigit_lc||5.006000| +is_uni_xdigit||5.006000| +is_utf8_alnumc||5.006000| +is_utf8_alnum||5.006000| +is_utf8_alpha||5.006000| +is_utf8_ascii||5.006000| +is_utf8_char_slow|||n +is_utf8_char||5.006000| +is_utf8_cntrl||5.006000| +is_utf8_common||| +is_utf8_digit||5.006000| +is_utf8_graph||5.006000| +is_utf8_idcont||5.008000| +is_utf8_idfirst||5.006000| +is_utf8_lower||5.006000| +is_utf8_mark||5.006000| +is_utf8_print||5.006000| +is_utf8_punct||5.006000| +is_utf8_space||5.006000| +is_utf8_string_loclen||5.009003| +is_utf8_string_loc||5.008001| +is_utf8_string||5.006001| +is_utf8_upper||5.006000| +is_utf8_xdigit||5.006000| +isa_lookup||| +items|||n +ix|||n +jmaybe||| +join_exact||| +keyword||| +leave_scope||| +lex_end||| +lex_start||| +linklist||| +listkids||| +list||| +load_module_nocontext|||vn +load_module|5.006000||pv +localize||| +looks_like_bool||| +looks_like_number||| +lop||| +mPUSHi|5.009002||p +mPUSHn|5.009002||p +mPUSHp|5.009002||p +mPUSHu|5.009002||p +mXPUSHi|5.009002||p +mXPUSHn|5.009002||p +mXPUSHp|5.009002||p +mXPUSHu|5.009002||p +mad_free||| +madlex||| +madparse||| +magic_clear_all_env||| +magic_clearenv||| +magic_clearhint||| +magic_clearpack||| +magic_clearsig||| +magic_dump||5.006000| +magic_existspack||| +magic_freearylen_p||| +magic_freeovrld||| +magic_freeregexp||| +magic_getarylen||| +magic_getdefelem||| +magic_getnkeys||| +magic_getpack||| +magic_getpos||| +magic_getsig||| +magic_getsubstr||| +magic_gettaint||| +magic_getuvar||| +magic_getvec||| +magic_get||| +magic_killbackrefs||| +magic_len||| +magic_methcall||| +magic_methpack||| +magic_nextpack||| +magic_regdata_cnt||| +magic_regdatum_get||| +magic_regdatum_set||| +magic_scalarpack||| +magic_set_all_env||| +magic_setamagic||| +magic_setarylen||| +magic_setbm||| +magic_setcollxfrm||| +magic_setdbline||| +magic_setdefelem||| +magic_setenv||| +magic_setfm||| +magic_setglob||| +magic_sethint||| +magic_setisa||| +magic_setmglob||| +magic_setnkeys||| +magic_setpack||| +magic_setpos||| +magic_setregexp||| +magic_setsig||| +magic_setsubstr||| +magic_settaint||| +magic_setutf8||| +magic_setuvar||| +magic_setvec||| +magic_set||| +magic_sizepack||| +magic_wipepack||| +magicname||| +make_matcher||| +make_trie_failtable||| +make_trie||| +malloced_size|||n +malloc||5.007002|n +markstack_grow||| +matcher_matches_sv||| +measure_struct||| +memEQ|5.004000||p +memNE|5.004000||p +mem_collxfrm||| +mess_alloc||| +mess_nocontext|||vn +mess||5.006000|v +method_common||| +mfree||5.007002|n +mg_clear||| +mg_copy||| +mg_dup||| +mg_find||| +mg_free||| +mg_get||| +mg_length||5.005000| +mg_localize||| +mg_magical||| +mg_set||| +mg_size||5.005000| +mini_mktime||5.007002| +missingterm||| +mode_from_discipline||| +modkids||| +mod||| +more_bodies||| +more_sv||| +moreswitches||| +mro_get_linear_isa_c3||5.009005| +mro_get_linear_isa_dfs||5.009005| +mro_get_linear_isa||5.009005| +mro_isa_changed_in||| +mro_meta_dup||| +mro_meta_init||| +mro_method_changed_in||5.009005| +mul128||| +mulexp10|||n +my_atof2||5.007002| +my_atof||5.006000| +my_attrs||| +my_bcopy|||n +my_betoh16|||n +my_betoh32|||n +my_betoh64|||n +my_betohi|||n +my_betohl|||n +my_betohs|||n +my_bzero|||n +my_chsize||| +my_clearenv||| +my_cxt_index||| +my_cxt_init||| +my_dirfd||5.009005| +my_exit_jump||| +my_exit||| +my_failure_exit||5.004000| +my_fflush_all||5.006000| +my_fork||5.007003|n +my_htobe16|||n +my_htobe32|||n +my_htobe64|||n +my_htobei|||n +my_htobel|||n +my_htobes|||n +my_htole16|||n +my_htole32|||n +my_htole64|||n +my_htolei|||n +my_htolel|||n +my_htoles|||n +my_htonl||| +my_kid||| +my_letoh16|||n +my_letoh32|||n +my_letoh64|||n +my_letohi|||n +my_letohl|||n +my_letohs|||n +my_lstat||| +my_memcmp||5.004000|n +my_memset|||n +my_ntohl||| +my_pclose||5.004000| +my_popen_list||5.007001| +my_popen||5.004000| +my_setenv||| +my_snprintf|5.009004||pvn +my_socketpair||5.007003|n +my_sprintf||5.009003|vn +my_stat||| +my_strftime||5.007002| +my_strlcat|5.009004||pn +my_strlcpy|5.009004||pn +my_swabn|||n +my_swap||| +my_unexec||| +my_vsnprintf||5.009004|n +my||| +need_utf8|||n +newANONATTRSUB||5.006000| +newANONHASH||| +newANONLIST||| +newANONSUB||| +newASSIGNOP||| +newATTRSUB||5.006000| +newAVREF||| +newAV||| +newBINOP||| +newCONDOP||| +newCONSTSUB|5.004050||p +newCVREF||| +newDEFSVOP||| +newFORM||| +newFOROP||| +newGIVENOP||5.009003| +newGIVWHENOP||| +newGP||| +newGVOP||| +newGVREF||| +newGVgen||| +newHVREF||| +newHVhv||5.005000| +newHV||| +newIO||| +newLISTOP||| +newLOGOP||| +newLOOPEX||| +newLOOPOP||| +newMADPROP||| +newMADsv||| +newMYSUB||| +newNULLLIST||| +newOP||| +newPADOP||| +newPMOP||| +newPROG||| +newPVOP||| +newRANGE||| +newRV_inc|5.004000||p +newRV_noinc|5.004000||p +newRV||| +newSLICEOP||| +newSTATEOP||| +newSUB||| +newSVOP||| +newSVREF||| +newSV_type||5.009005| +newSVhek||5.009003| +newSViv||| +newSVnv||| +newSVpvf_nocontext|||vn +newSVpvf||5.004000|v +newSVpvn_share|5.007001||p +newSVpvn|5.004050||p +newSVpvs_share||5.009003| +newSVpvs|5.009003||p +newSVpv||| +newSVrv||| +newSVsv||| +newSVuv|5.006000||p +newSV||| +newTOKEN||| +newUNOP||| +newWHENOP||5.009003| +newWHILEOP||5.009003| +newXS_flags||5.009004| +newXSproto||5.006000| +newXS||5.006000| +new_collate||5.006000| +new_constant||| +new_ctype||5.006000| +new_he||| +new_logop||| +new_numeric||5.006000| +new_stackinfo||5.005000| +new_version||5.009000| +new_warnings_bitfield||| +next_symbol||| +nextargv||| +nextchar||| +ninstr||| +no_bareword_allowed||| +no_fh_allowed||| +no_op||| +not_a_number||| +nothreadhook||5.008000| +nuke_stacks||| +num_overflow|||n +offer_nice_chunk||| +oopsAV||| +oopsCV||| +oopsHV||| +op_clear||| +op_const_sv||| +op_dump||5.006000| +op_free||| +op_getmad_weak||| +op_getmad||| +op_null||5.007002| +op_refcnt_dec||| +op_refcnt_inc||| +op_refcnt_lock||5.009002| +op_refcnt_unlock||5.009002| +op_xmldump||| +open_script||| +pMY_CXT_|5.007003||p +pMY_CXT|5.007003||p +pTHX_|5.006000||p +pTHX|5.006000||p +packWARN|5.007003||p +pack_cat||5.007003| +pack_rec||| +package||| +packlist||5.008001| +pad_add_anon||| +pad_add_name||| +pad_alloc||| +pad_block_start||| +pad_check_dup||| +pad_compname_type||| +pad_findlex||| +pad_findmy||| +pad_fixup_inner_anons||| +pad_free||| +pad_leavemy||| +pad_new||| +pad_peg|||n +pad_push||| +pad_reset||| +pad_setsv||| +pad_sv||5.009005| +pad_swipe||| +pad_tidy||| +pad_undef||| +parse_body||| +parse_unicode_opts||| +parser_dup||| +parser_free||| +path_is_absolute|||n +peep||| +pending_Slabs_to_ro||| +perl_alloc_using|||n +perl_alloc|||n +perl_clone_using|||n +perl_clone|||n +perl_construct|||n +perl_destruct||5.007003|n +perl_free|||n +perl_parse||5.006000|n +perl_run|||n +pidgone||| +pm_description||| +pmflag||| +pmop_dump||5.006000| +pmop_xmldump||| +pmruntime||| +pmtrans||| +pop_scope||| +pregcomp||5.009005| +pregexec||| +pregfree||| +prepend_elem||| +prepend_madprops||| +printbuf||| +printf_nocontext|||vn +process_special_blocks||| +ptr_table_clear||5.009005| +ptr_table_fetch||5.009005| +ptr_table_find|||n +ptr_table_free||5.009005| +ptr_table_new||5.009005| +ptr_table_split||5.009005| +ptr_table_store||5.009005| +push_scope||| +put_byte||| +pv_display||5.006000| +pv_escape||5.009004| +pv_pretty||5.009004| +pv_uni_display||5.007003| +qerror||| +qsortsvu||| +re_compile||5.009005| +re_croak2||| +re_dup||| +re_intuit_start||5.009005| +re_intuit_string||5.006000| +readpipe_override||| +realloc||5.007002|n +reentrant_free||| +reentrant_init||| +reentrant_retry|||vn +reentrant_size||| +ref_array_or_hash||| +refcounted_he_chain_2hv||| +refcounted_he_fetch||| +refcounted_he_free||| +refcounted_he_new||| +refcounted_he_value||| +refkids||| +refto||| +ref||5.009003| +reg_check_named_buff_matched||| +reg_named_buff_all||5.009005| +reg_named_buff_exists||5.009005| +reg_named_buff_fetch||5.009005| +reg_named_buff_firstkey||5.009005| +reg_named_buff_iter||| +reg_named_buff_nextkey||5.009005| +reg_named_buff_scalar||5.009005| +reg_named_buff||| +reg_namedseq||| +reg_node||| +reg_numbered_buff_fetch||| +reg_numbered_buff_length||| +reg_numbered_buff_store||| +reg_qr_package||| +reg_recode||| +reg_scan_name||| +reg_skipcomment||| +reg_stringify||5.009005| +reg_temp_copy||| +reganode||| +regatom||| +regbranch||| +regclass_swash||5.009004| +regclass||| +regcppop||| +regcppush||| +regcurly|||n +regdump_extflags||| +regdump||5.005000| +regdupe_internal||| +regexec_flags||5.005000| +regfree_internal||5.009005| +reghop3|||n +reghop4|||n +reghopmaybe3|||n +reginclass||| +reginitcolors||5.006000| +reginsert||| +regmatch||| +regnext||5.005000| +regpiece||| +regpposixcc||| +regprop||| +regrepeat||| +regtail_study||| +regtail||| +regtry||| +reguni||| +regwhite|||n +reg||| +repeatcpy||| +report_evil_fh||| +report_uninit||| +require_pv||5.006000| +require_tie_mod||| +restore_magic||| +rninstr||| +rsignal_restore||| +rsignal_save||| +rsignal_state||5.004000| +rsignal||5.004000| +run_body||| +run_user_filter||| +runops_debug||5.005000| +runops_standard||5.005000| +rvpv_dup||| +rxres_free||| +rxres_restore||| +rxres_save||| +safesyscalloc||5.006000|n +safesysfree||5.006000|n +safesysmalloc||5.006000|n +safesysrealloc||5.006000|n +same_dirent||| +save_I16||5.004000| +save_I32||| +save_I8||5.006000| +save_aelem||5.004050| +save_alloc||5.006000| +save_aptr||| +save_ary||| +save_bool||5.008001| +save_clearsv||| +save_delete||| +save_destructor_x||5.006000| +save_destructor||5.006000| +save_freeop||| +save_freepv||| +save_freesv||| +save_generic_pvref||5.006001| +save_generic_svref||5.005030| +save_gp||5.004000| +save_hash||| +save_hek_flags|||n +save_helem||5.004050| +save_hints||5.005000| +save_hptr||| +save_int||| +save_item||| +save_iv||5.005000| +save_lines||| +save_list||| +save_long||| +save_magic||| +save_mortalizesv||5.007001| +save_nogv||| +save_op||| +save_padsv||5.007001| +save_pptr||| +save_re_context||5.006000| +save_scalar_at||| +save_scalar||| +save_set_svflags||5.009000| +save_shared_pvref||5.007003| +save_sptr||| +save_svref||| +save_vptr||5.006000| +savepvn||| +savepvs||5.009003| +savepv||| +savesharedpvn||5.009005| +savesharedpv||5.007003| +savestack_grow_cnt||5.008001| +savestack_grow||| +savesvpv||5.009002| +sawparens||| +scalar_mod_type|||n +scalarboolean||| +scalarkids||| +scalarseq||| +scalarvoid||| +scalar||| +scan_bin||5.006000| +scan_commit||| +scan_const||| +scan_formline||| +scan_heredoc||| +scan_hex||| +scan_ident||| +scan_inputsymbol||| +scan_num||5.007001| +scan_oct||| +scan_pat||| +scan_str||| +scan_subst||| +scan_trans||| +scan_version||5.009001| +scan_vstring||5.009005| +scan_word||| +scope||| +screaminstr||5.005000| +seed||5.008001| +sequence_num||| +sequence_tail||| +sequence||| +set_context||5.006000|n +set_csh||| +set_numeric_local||5.006000| +set_numeric_radix||5.006000| +set_numeric_standard||5.006000| +setdefout||| +setenv_getix||| +share_hek_flags||| +share_hek||5.004000| +si_dup||| +sighandler|||n +simplify_sort||| +skipspace0||| +skipspace1||| +skipspace2||| +skipspace||| +softref2xv||| +sortcv_stacked||| +sortcv_xsub||| +sortcv||| +sortsv_flags||5.009003| +sortsv||5.007003| +space_join_names_mortal||| +ss_dup||| +stack_grow||| +start_force||| +start_glob||| +start_subparse||5.004000| +stashpv_hvname_match||5.009005| +stdize_locale||| +strEQ||| +strGE||| +strGT||| +strLE||| +strLT||| +strNE||| +str_to_version||5.006000| +strip_return||| +strnEQ||| +strnNE||| +study_chunk||| +sub_crush_depth||| +sublex_done||| +sublex_push||| +sublex_start||| +sv_2bool||| +sv_2cv||| +sv_2io||| +sv_2iuv_common||| +sv_2iuv_non_preserve||| +sv_2iv_flags||5.009001| +sv_2iv||| +sv_2mortal||| +sv_2nv||| +sv_2pv_flags|5.007002||p +sv_2pv_nolen|5.006000||p +sv_2pvbyte_nolen|5.006000||p +sv_2pvbyte|5.006000||p +sv_2pvutf8_nolen||5.006000| +sv_2pvutf8||5.006000| +sv_2pv||| +sv_2uv_flags||5.009001| +sv_2uv|5.004000||p +sv_add_arena||| +sv_add_backref||| +sv_backoff||| +sv_bless||| +sv_cat_decode||5.008001| +sv_catpv_mg|5.004050||p +sv_catpvf_mg_nocontext|||pvn +sv_catpvf_mg|5.006000|5.004000|pv +sv_catpvf_nocontext|||vn +sv_catpvf||5.004000|v +sv_catpvn_flags||5.007002| +sv_catpvn_mg|5.004050||p +sv_catpvn_nomg|5.007002||p +sv_catpvn||| +sv_catpvs|5.009003||p +sv_catpv||| +sv_catsv_flags||5.007002| +sv_catsv_mg|5.004050||p +sv_catsv_nomg|5.007002||p +sv_catsv||| +sv_catxmlpvn||| +sv_catxmlsv||| +sv_chop||| +sv_clean_all||| +sv_clean_objs||| +sv_clear||| +sv_cmp_locale||5.004000| +sv_cmp||| +sv_collxfrm||| +sv_compile_2op||5.008001| +sv_copypv||5.007003| +sv_dec||| +sv_del_backref||| +sv_derived_from||5.004000| +sv_does||5.009004| +sv_dump||| +sv_dup||| +sv_eq||| +sv_exp_grow||| +sv_force_normal_flags||5.007001| +sv_force_normal||5.006000| +sv_free2||| +sv_free_arenas||| +sv_free||| +sv_gets||5.004000| +sv_grow||| +sv_i_ncmp||| +sv_inc||| +sv_insert||| +sv_isa||| +sv_isobject||| +sv_iv||5.005000| +sv_kill_backrefs||| +sv_len_utf8||5.006000| +sv_len||| +sv_magic_portable|5.009005|5.004000|p +sv_magicext||5.007003| +sv_magic||| +sv_mortalcopy||| +sv_ncmp||| +sv_newmortal||| +sv_newref||| +sv_nolocking||5.007003| +sv_nosharing||5.007003| +sv_nounlocking||| +sv_nv||5.005000| +sv_peek||5.005000| +sv_pos_b2u_midway||| +sv_pos_b2u||5.006000| +sv_pos_u2b_cached||| +sv_pos_u2b_forwards|||n +sv_pos_u2b_midway|||n +sv_pos_u2b||5.006000| +sv_pvbyten_force||5.006000| +sv_pvbyten||5.006000| +sv_pvbyte||5.006000| +sv_pvn_force_flags|5.007002||p +sv_pvn_force||| +sv_pvn_nomg|5.007003||p +sv_pvn||| +sv_pvutf8n_force||5.006000| +sv_pvutf8n||5.006000| +sv_pvutf8||5.006000| +sv_pv||5.006000| +sv_recode_to_utf8||5.007003| +sv_reftype||| +sv_release_COW||| +sv_replace||| +sv_report_used||| +sv_reset||| +sv_rvweaken||5.006000| +sv_setiv_mg|5.004050||p +sv_setiv||| +sv_setnv_mg|5.006000||p +sv_setnv||| +sv_setpv_mg|5.004050||p +sv_setpvf_mg_nocontext|||pvn +sv_setpvf_mg|5.006000|5.004000|pv +sv_setpvf_nocontext|||vn +sv_setpvf||5.004000|v +sv_setpviv_mg||5.008001| +sv_setpviv||5.008001| +sv_setpvn_mg|5.004050||p +sv_setpvn||| +sv_setpvs|5.009004||p +sv_setpv||| +sv_setref_iv||| +sv_setref_nv||| +sv_setref_pvn||| +sv_setref_pv||| +sv_setref_uv||5.007001| +sv_setsv_cow||| +sv_setsv_flags||5.007002| +sv_setsv_mg|5.004050||p +sv_setsv_nomg|5.007002||p +sv_setsv||| +sv_setuv_mg|5.004050||p +sv_setuv|5.004000||p +sv_tainted||5.004000| +sv_taint||5.004000| +sv_true||5.005000| +sv_unglob||| +sv_uni_display||5.007003| +sv_unmagic||| +sv_unref_flags||5.007001| +sv_unref||| +sv_untaint||5.004000| +sv_upgrade||| +sv_usepvn_flags||5.009004| +sv_usepvn_mg|5.004050||p +sv_usepvn||| +sv_utf8_decode||5.006000| +sv_utf8_downgrade||5.006000| +sv_utf8_encode||5.006000| +sv_utf8_upgrade_flags||5.007002| +sv_utf8_upgrade||5.007001| +sv_uv|5.005000||p +sv_vcatpvf_mg|5.006000|5.004000|p +sv_vcatpvfn||5.004000| +sv_vcatpvf|5.006000|5.004000|p +sv_vsetpvf_mg|5.006000|5.004000|p +sv_vsetpvfn||5.004000| +sv_vsetpvf|5.006000|5.004000|p +sv_xmlpeek||| +svtype||| +swallow_bom||| +swap_match_buff||| +swash_fetch||5.007002| +swash_get||| +swash_init||5.006000| +sys_intern_clear||| +sys_intern_dup||| +sys_intern_init||| +taint_env||| +taint_proper||| +tmps_grow||5.006000| +toLOWER||| +toUPPER||| +to_byte_substr||| +to_uni_fold||5.007003| +to_uni_lower_lc||5.006000| +to_uni_lower||5.007003| +to_uni_title_lc||5.006000| +to_uni_title||5.007003| +to_uni_upper_lc||5.006000| +to_uni_upper||5.007003| +to_utf8_case||5.007003| +to_utf8_fold||5.007003| +to_utf8_lower||5.007003| +to_utf8_substr||| +to_utf8_title||5.007003| +to_utf8_upper||5.007003| +token_free||| +token_getmad||| +tokenize_use||| +tokeq||| +tokereport||| +too_few_arguments||| +too_many_arguments||| +uiv_2buf|||n +unlnk||| +unpack_rec||| +unpack_str||5.007003| +unpackstring||5.008001| +unshare_hek_or_pvn||| +unshare_hek||| +unsharepvn||5.004000| +unwind_handler_stack||| +update_debugger_info||| +upg_version||5.009005| +usage||| +utf16_to_utf8_reversed||5.006001| +utf16_to_utf8||5.006001| +utf8_distance||5.006000| +utf8_hop||5.006000| +utf8_length||5.007001| +utf8_mg_pos_cache_update||| +utf8_to_bytes||5.006001| +utf8_to_uvchr||5.007001| +utf8_to_uvuni||5.007001| +utf8n_to_uvchr||| +utf8n_to_uvuni||5.007001| +utilize||| +uvchr_to_utf8_flags||5.007003| +uvchr_to_utf8||| +uvuni_to_utf8_flags||5.007003| +uvuni_to_utf8||5.007001| +validate_suid||| +varname||| +vcmp||5.009000| +vcroak||5.006000| +vdeb||5.007003| +vdie_common||| +vdie_croak_common||| +vdie||| +vform||5.006000| +visit||| +vivify_defelem||| +vivify_ref||| +vload_module|5.006000||p +vmess||5.006000| +vnewSVpvf|5.006000|5.004000|p +vnormal||5.009002| +vnumify||5.009000| +vstringify||5.009000| +vverify||5.009003| +vwarner||5.006000| +vwarn||5.006000| +wait4pid||| +warn_nocontext|||vn +warner_nocontext|||vn +warner|5.006000|5.004000|pv +warn|||v +watch||| +whichsig||| +write_no_mem||| +write_to_stderr||| +xmldump_all||| +xmldump_attr||| +xmldump_eval||| +xmldump_form||| +xmldump_indent|||v +xmldump_packsubs||| +xmldump_sub||| +xmldump_vindent||| +yyerror||| +yylex||| +yyparse||| +yywarn||| +); + +if (exists $opt{'list-unsupported'}) { + my $f; + for $f (sort { lc $a cmp lc $b } keys %API) { + next unless $API{$f}{todo}; + print "$f ", '.'x(40-length($f)), " ", format_version($API{$f}{todo}), "\n"; + } + exit 0; +} + +# Scan for possible replacement candidates + +my(%replace, %need, %hints, %warnings, %depends); +my $replace = 0; +my($hint, $define, $function); + +sub find_api +{ + my $code = shift; + $code =~ s{ + / (?: \*[^*]*\*+(?:[^$ccs][^*]*\*+)* / | /[^\r\n]*) + | "[^"\\]*(?:\\.[^"\\]*)*" + | '[^'\\]*(?:\\.[^'\\]*)*' }{}egsx; + grep { exists $API{$_} } $code =~ /(\w+)/mg; +} + +while () { + if ($hint) { + my $h = $hint->[0] eq 'Hint' ? \%hints : \%warnings; + if (m{^\s*\*\s(.*?)\s*$}) { + for (@{$hint->[1]}) { + $h->{$_} ||= ''; # suppress warning with older perls + $h->{$_} .= "$1\n"; + } + } + else { undef $hint } + } + + $hint = [$1, [split /,?\s+/, $2]] + if m{^\s*$rccs\s+(Hint|Warning):\s+(\w+(?:,?\s+\w+)*)\s*$}; + + if ($define) { + if ($define->[1] =~ /\\$/) { + $define->[1] .= $_; + } + else { + if (exists $API{$define->[0]} && $define->[1] !~ /^DPPP_\(/) { + my @n = find_api($define->[1]); + push @{$depends{$define->[0]}}, @n if @n + } + undef $define; + } + } + + $define = [$1, $2] if m{^\s*#\s*define\s+(\w+)(?:\([^)]*\))?\s+(.*)}; + + if ($function) { + if (/^}/) { + if (exists $API{$function->[0]}) { + my @n = find_api($function->[1]); + push @{$depends{$function->[0]}}, @n if @n + } + undef $define; + } + else { + $function->[1] .= $_; + } + } + + $function = [$1, ''] if m{^DPPP_\(my_(\w+)\)}; + + $replace = $1 if m{^\s*$rccs\s+Replace:\s+(\d+)\s+$rcce\s*$}; + $replace{$2} = $1 if $replace and m{^\s*#\s*define\s+(\w+)(?:\([^)]*\))?\s+(\w+)}; + $replace{$2} = $1 if m{^\s*#\s*define\s+(\w+)(?:\([^)]*\))?\s+(\w+).*$rccs\s+Replace\s+$rcce}; + $replace{$1} = $2 if m{^\s*$rccs\s+Replace (\w+) with (\w+)\s+$rcce\s*$}; + + if (m{^\s*$rccs\s+(\w+)\s+depends\s+on\s+(\w+(\s*,\s*\w+)*)\s+$rcce\s*$}) { + push @{$depends{$1}}, map { s/\s+//g; $_ } split /,/, $2; + } + + $need{$1} = 1 if m{^#if\s+defined\(NEED_(\w+)(?:_GLOBAL)?\)}; +} + +for (values %depends) { + my %s; + $_ = [sort grep !$s{$_}++, @$_]; +} + +if (exists $opt{'api-info'}) { + my $f; + my $count = 0; + my $match = $opt{'api-info'} =~ m!^/(.*)/$! ? $1 : "^\Q$opt{'api-info'}\E\$"; + for $f (sort { lc $a cmp lc $b } keys %API) { + next unless $f =~ /$match/; + print "\n=== $f ===\n\n"; + my $info = 0; + if ($API{$f}{base} || $API{$f}{todo}) { + my $base = format_version($API{$f}{base} || $API{$f}{todo}); + print "Supported at least starting from perl-$base.\n"; + $info++; + } + if ($API{$f}{provided}) { + my $todo = $API{$f}{todo} ? format_version($API{$f}{todo}) : "5.003"; + print "Support by $ppport provided back to perl-$todo.\n"; + print "Support needs to be explicitly requested by NEED_$f.\n" if exists $need{$f}; + print "Depends on: ", join(', ', @{$depends{$f}}), ".\n" if exists $depends{$f}; + print "\n$hints{$f}" if exists $hints{$f}; + print "\nWARNING:\n$warnings{$f}" if exists $warnings{$f}; + $info++; + } + print "No portability information available.\n" unless $info; + $count++; + } + $count or print "Found no API matching '$opt{'api-info'}'."; + print "\n"; + exit 0; +} + +if (exists $opt{'list-provided'}) { + my $f; + for $f (sort { lc $a cmp lc $b } keys %API) { + next unless $API{$f}{provided}; + my @flags; + push @flags, 'explicit' if exists $need{$f}; + push @flags, 'depend' if exists $depends{$f}; + push @flags, 'hint' if exists $hints{$f}; + push @flags, 'warning' if exists $warnings{$f}; + my $flags = @flags ? ' ['.join(', ', @flags).']' : ''; + print "$f$flags\n"; + } + exit 0; +} + +my @files; +my @srcext = qw( .xs .c .h .cc .cpp -c.inc -xs.inc ); +my $srcext = join '|', map { quotemeta $_ } @srcext; + +if (@ARGV) { + my %seen; + for (@ARGV) { + if (-e) { + if (-f) { + push @files, $_ unless $seen{$_}++; + } + else { warn "'$_' is not a file.\n" } + } + else { + my @new = grep { -f } glob $_ + or warn "'$_' does not exist.\n"; + push @files, grep { !$seen{$_}++ } @new; + } + } +} +else { + eval { + require File::Find; + File::Find::find(sub { + $File::Find::name =~ /($srcext)$/i + and push @files, $File::Find::name; + }, '.'); + }; + if ($@) { + @files = map { glob "*$_" } @srcext; + } +} + +if (!@ARGV || $opt{filter}) { + my(@in, @out); + my %xsc = map { /(.*)\.xs$/ ? ("$1.c" => 1, "$1.cc" => 1) : () } @files; + for (@files) { + my $out = exists $xsc{$_} || /\b\Q$ppport\E$/i || !/($srcext)$/i; + push @{ $out ? \@out : \@in }, $_; + } + if (@ARGV && @out) { + warning("Skipping the following files (use --nofilter to avoid this):\n| ", join "\n| ", @out); + } + @files = @in; +} + +die "No input files given!\n" unless @files; + +my(%files, %global, %revreplace); +%revreplace = reverse %replace; +my $filename; +my $patch_opened = 0; + +for $filename (@files) { + unless (open IN, "<$filename") { + warn "Unable to read from $filename: $!\n"; + next; + } + + info("Scanning $filename ..."); + + my $c = do { local $/; }; + close IN; + + my %file = (orig => $c, changes => 0); + + # Temporarily remove C/XS comments and strings from the code + my @ccom; + + $c =~ s{ + ( ^$HS*\#$HS*include\b[^\r\n]+\b(?:\Q$ppport\E|XSUB\.h)\b[^\r\n]* + | ^$HS*\#$HS*(?:define|elif|if(?:def)?)\b[^\r\n]* ) + | ( ^$HS*\#[^\r\n]* + | "[^"\\]*(?:\\.[^"\\]*)*" + | '[^'\\]*(?:\\.[^'\\]*)*' + | / (?: \*[^*]*\*+(?:[^$ccs][^*]*\*+)* / | /[^\r\n]* ) ) + }{ defined $2 and push @ccom, $2; + defined $1 ? $1 : "$ccs$#ccom$cce" }mgsex; + + $file{ccom} = \@ccom; + $file{code} = $c; + $file{has_inc_ppport} = $c =~ /^$HS*#$HS*include[^\r\n]+\b\Q$ppport\E\b/m; + + my $func; + + for $func (keys %API) { + my $match = $func; + $match .= "|$revreplace{$func}" if exists $revreplace{$func}; + if ($c =~ /\b(?:Perl_)?($match)\b/) { + $file{uses_replace}{$1}++ if exists $revreplace{$func} && $1 eq $revreplace{$func}; + $file{uses_Perl}{$func}++ if $c =~ /\bPerl_$func\b/; + if (exists $API{$func}{provided}) { + $file{uses_provided}{$func}++; + if (!exists $API{$func}{base} || $API{$func}{base} > $opt{'compat-version'}) { + $file{uses}{$func}++; + my @deps = rec_depend($func); + if (@deps) { + $file{uses_deps}{$func} = \@deps; + for (@deps) { + $file{uses}{$_} = 0 unless exists $file{uses}{$_}; + } + } + for ($func, @deps) { + $file{needs}{$_} = 'static' if exists $need{$_}; + } + } + } + if (exists $API{$func}{todo} && $API{$func}{todo} > $opt{'compat-version'}) { + if ($c =~ /\b$func\b/) { + $file{uses_todo}{$func}++; + } + } + } + } + + while ($c =~ /^$HS*#$HS*define$HS+(NEED_(\w+?)(_GLOBAL)?)\b/mg) { + if (exists $need{$2}) { + $file{defined $3 ? 'needed_global' : 'needed_static'}{$2}++; + } + else { warning("Possibly wrong #define $1 in $filename") } + } + + for (qw(uses needs uses_todo needed_global needed_static)) { + for $func (keys %{$file{$_}}) { + push @{$global{$_}{$func}}, $filename; + } + } + + $files{$filename} = \%file; +} + +# Globally resolve NEED_'s +my $need; +for $need (keys %{$global{needs}}) { + if (@{$global{needs}{$need}} > 1) { + my @targets = @{$global{needs}{$need}}; + my @t = grep $files{$_}{needed_global}{$need}, @targets; + @targets = @t if @t; + @t = grep /\.xs$/i, @targets; + @targets = @t if @t; + my $target = shift @targets; + $files{$target}{needs}{$need} = 'global'; + for (@{$global{needs}{$need}}) { + $files{$_}{needs}{$need} = 'extern' if $_ ne $target; + } + } +} + +for $filename (@files) { + exists $files{$filename} or next; + + info("=== Analyzing $filename ==="); + + my %file = %{$files{$filename}}; + my $func; + my $c = $file{code}; + my $warnings = 0; + + for $func (sort keys %{$file{uses_Perl}}) { + if ($API{$func}{varargs}) { + unless ($API{$func}{nothxarg}) { + my $changes = ($c =~ s{\b(Perl_$func\s*\(\s*)(?!aTHX_?)(\)|[^\s)]*\))} + { $1 . ($2 eq ')' ? 'aTHX' : 'aTHX_ ') . $2 }ge); + if ($changes) { + warning("Doesn't pass interpreter argument aTHX to Perl_$func"); + $file{changes} += $changes; + } + } + } + else { + warning("Uses Perl_$func instead of $func"); + $file{changes} += ($c =~ s{\bPerl_$func(\s*)\((\s*aTHX_?)?\s*} + {$func$1(}g); + } + } + + for $func (sort keys %{$file{uses_replace}}) { + warning("Uses $func instead of $replace{$func}"); + $file{changes} += ($c =~ s/\b$func\b/$replace{$func}/g); + } + + for $func (sort keys %{$file{uses_provided}}) { + if ($file{uses}{$func}) { + if (exists $file{uses_deps}{$func}) { + diag("Uses $func, which depends on ", join(', ', @{$file{uses_deps}{$func}})); + } + else { + diag("Uses $func"); + } + } + $warnings += hint($func); + } + + unless ($opt{quiet}) { + for $func (sort keys %{$file{uses_todo}}) { + print "*** WARNING: Uses $func, which may not be portable below perl ", + format_version($API{$func}{todo}), ", even with '$ppport'\n"; + $warnings++; + } + } + + for $func (sort keys %{$file{needed_static}}) { + my $message = ''; + if (not exists $file{uses}{$func}) { + $message = "No need to define NEED_$func if $func is never used"; + } + elsif (exists $file{needs}{$func} && $file{needs}{$func} ne 'static') { + $message = "No need to define NEED_$func when already needed globally"; + } + if ($message) { + diag($message); + $file{changes} += ($c =~ s/^$HS*#$HS*define$HS+NEED_$func\b.*$LF//mg); + } + } + + for $func (sort keys %{$file{needed_global}}) { + my $message = ''; + if (not exists $global{uses}{$func}) { + $message = "No need to define NEED_${func}_GLOBAL if $func is never used"; + } + elsif (exists $file{needs}{$func}) { + if ($file{needs}{$func} eq 'extern') { + $message = "No need to define NEED_${func}_GLOBAL when already needed globally"; + } + elsif ($file{needs}{$func} eq 'static') { + $message = "No need to define NEED_${func}_GLOBAL when only used in this file"; + } + } + if ($message) { + diag($message); + $file{changes} += ($c =~ s/^$HS*#$HS*define$HS+NEED_${func}_GLOBAL\b.*$LF//mg); + } + } + + $file{needs_inc_ppport} = keys %{$file{uses}}; + + if ($file{needs_inc_ppport}) { + my $pp = ''; + + for $func (sort keys %{$file{needs}}) { + my $type = $file{needs}{$func}; + next if $type eq 'extern'; + my $suffix = $type eq 'global' ? '_GLOBAL' : ''; + unless (exists $file{"needed_$type"}{$func}) { + if ($type eq 'global') { + diag("Files [@{$global{needs}{$func}}] need $func, adding global request"); + } + else { + diag("File needs $func, adding static request"); + } + $pp .= "#define NEED_$func$suffix\n"; + } + } + + if ($pp && ($c =~ s/^(?=$HS*#$HS*define$HS+NEED_\w+)/$pp/m)) { + $pp = ''; + $file{changes}++; + } + + unless ($file{has_inc_ppport}) { + diag("Needs to include '$ppport'"); + $pp .= qq(#include "$ppport"\n) + } + + if ($pp) { + $file{changes} += ($c =~ s/^($HS*#$HS*define$HS+NEED_\w+.*?)^/$1$pp/ms) + || ($c =~ s/^(?=$HS*#$HS*include.*\Q$ppport\E)/$pp/m) + || ($c =~ s/^($HS*#$HS*include.*XSUB.*\s*?)^/$1$pp/m) + || ($c =~ s/^/$pp/); + } + } + else { + if ($file{has_inc_ppport}) { + diag("No need to include '$ppport'"); + $file{changes} += ($c =~ s/^$HS*?#$HS*include.*\Q$ppport\E.*?$LF//m); + } + } + + # put back in our C comments + my $ix; + my $cppc = 0; + my @ccom = @{$file{ccom}}; + for $ix (0 .. $#ccom) { + if (!$opt{cplusplus} && $ccom[$ix] =~ s!^//!!) { + $cppc++; + $file{changes} += $c =~ s/$rccs$ix$rcce/$ccs$ccom[$ix] $cce/; + } + else { + $c =~ s/$rccs$ix$rcce/$ccom[$ix]/; + } + } + + if ($cppc) { + my $s = $cppc != 1 ? 's' : ''; + warning("Uses $cppc C++ style comment$s, which is not portable"); + } + + my $s = $warnings != 1 ? 's' : ''; + my $warn = $warnings ? " ($warnings warning$s)" : ''; + info("Analysis completed$warn"); + + if ($file{changes}) { + if (exists $opt{copy}) { + my $newfile = "$filename$opt{copy}"; + if (-e $newfile) { + error("'$newfile' already exists, refusing to write copy of '$filename'"); + } + else { + local *F; + if (open F, ">$newfile") { + info("Writing copy of '$filename' with changes to '$newfile'"); + print F $c; + close F; + } + else { + error("Cannot open '$newfile' for writing: $!"); + } + } + } + elsif (exists $opt{patch} || $opt{changes}) { + if (exists $opt{patch}) { + unless ($patch_opened) { + if (open PATCH, ">$opt{patch}") { + $patch_opened = 1; + } + else { + error("Cannot open '$opt{patch}' for writing: $!"); + delete $opt{patch}; + $opt{changes} = 1; + goto fallback; + } + } + mydiff(\*PATCH, $filename, $c); + } + else { +fallback: + info("Suggested changes:"); + mydiff(\*STDOUT, $filename, $c); + } + } + else { + my $s = $file{changes} == 1 ? '' : 's'; + info("$file{changes} potentially required change$s detected"); + } + } + else { + info("Looks good"); + } +} + +close PATCH if $patch_opened; + +exit 0; + + +sub try_use { eval "use @_;"; return $@ eq '' } + +sub mydiff +{ + local *F = shift; + my($file, $str) = @_; + my $diff; + + if (exists $opt{diff}) { + $diff = run_diff($opt{diff}, $file, $str); + } + + if (!defined $diff and try_use('Text::Diff')) { + $diff = Text::Diff::diff($file, \$str, { STYLE => 'Unified' }); + $diff = <
$tmp") { + print F $str; + close F; + + if (open F, "$prog $file $tmp |") { + while () { + s/\Q$tmp\E/$file.patched/; + $diff .= $_; + } + close F; + unlink $tmp; + return $diff; + } + + unlink $tmp; + } + else { + error("Cannot open '$tmp' for writing: $!"); + } + + return undef; +} + +sub rec_depend +{ + my($func, $seen) = @_; + return () unless exists $depends{$func}; + $seen = {%{$seen||{}}}; + return () if $seen->{$func}++; + my %s; + grep !$s{$_}++, map { ($_, rec_depend($_, $seen)) } @{$depends{$func}}; +} + +sub parse_version +{ + my $ver = shift; + + if ($ver =~ /^(\d+)\.(\d+)\.(\d+)$/) { + return ($1, $2, $3); + } + elsif ($ver !~ /^\d+\.[\d_]+$/) { + die "cannot parse version '$ver'\n"; + } + + $ver =~ s/_//g; + $ver =~ s/$/000000/; + + my($r,$v,$s) = $ver =~ /(\d+)\.(\d{3})(\d{3})/; + + $v = int $v; + $s = int $s; + + if ($r < 5 || ($r == 5 && $v < 6)) { + if ($s % 10) { + die "cannot parse version '$ver'\n"; + } + } + + return ($r, $v, $s); +} + +sub format_version +{ + my $ver = shift; + + $ver =~ s/$/000000/; + my($r,$v,$s) = $ver =~ /(\d+)\.(\d{3})(\d{3})/; + + $v = int $v; + $s = int $s; + + if ($r < 5 || ($r == 5 && $v < 6)) { + if ($s % 10) { + die "invalid version '$ver'\n"; + } + $s /= 10; + + $ver = sprintf "%d.%03d", $r, $v; + $s > 0 and $ver .= sprintf "_%02d", $s; + + return $ver; + } + + return sprintf "%d.%d.%d", $r, $v, $s; +} + +sub info +{ + $opt{quiet} and return; + print @_, "\n"; +} + +sub diag +{ + $opt{quiet} and return; + $opt{diag} and print @_, "\n"; +} + +sub warning +{ + $opt{quiet} and return; + print "*** ", @_, "\n"; +} + +sub error +{ + print "*** ERROR: ", @_, "\n"; +} + +my %given_hints; +my %given_warnings; +sub hint +{ + $opt{quiet} and return; + my $func = shift; + my $rv = 0; + if (exists $warnings{$func} && !$given_warnings{$func}++) { + my $warn = $warnings{$func}; + $warn =~ s!^!*** !mg; + print "*** WARNING: $func\n", $warn; + $rv++; + } + if ($opt{hints} && exists $hints{$func} && !$given_hints{$func}++) { + my $hint = $hints{$func}; + $hint =~ s/^/ /mg; + print " --- hint for $func ---\n", $hint; + } + $rv; +} + +sub usage +{ + my($usage) = do { local(@ARGV,$/)=($0); <> } =~ /^=head\d$HS+SYNOPSIS\s*^(.*?)\s*^=/ms; + my %M = ( 'I' => '*' ); + $usage =~ s/^\s*perl\s+\S+/$^X $0/; + $usage =~ s/([A-Z])<([^>]+)>/$M{$1}$2$M{$1}/g; + + print < }; + my($copy) = $self =~ /^=head\d\s+COPYRIGHT\s*^(.*?)^=\w+/ms; + $copy =~ s/^(?=\S+)/ /gms; + $self =~ s/^$HS+Do NOT edit.*?(?=^-)/$copy/ms; + $self =~ s/^SKIP.*(?=^__DATA__)/SKIP +if (\@ARGV && \$ARGV[0] eq '--unstrip') { + eval { require Devel::PPPort }; + \$@ and die "Cannot require Devel::PPPort, please install.\\n"; + if (\$Devel::PPPort::VERSION < $VERSION) { + die "$0 was originally generated with Devel::PPPort $VERSION.\\n" + . "Your Devel::PPPort is only version \$Devel::PPPort::VERSION.\\n" + . "Please install a newer version, or --unstrip will not work.\\n"; + } + Devel::PPPort::WriteFile(\$0); + exit 0; +} +print <$0" or die "cannot strip $0: $!\n"; + print OUT "$pl$c\n"; + + exit 0; +} + +__DATA__ +*/ + +#ifndef _P_P_PORTABILITY_H_ +#define _P_P_PORTABILITY_H_ + +#ifndef DPPP_NAMESPACE +# define DPPP_NAMESPACE DPPP_ +#endif + +#define DPPP_CAT2(x,y) CAT2(x,y) +#define DPPP_(name) DPPP_CAT2(DPPP_NAMESPACE, name) + +#ifndef PERL_REVISION +# if !defined(__PATCHLEVEL_H_INCLUDED__) && !(defined(PATCHLEVEL) && defined(SUBVERSION)) +# define PERL_PATCHLEVEL_H_IMPLICIT +# include +# endif +# if !(defined(PERL_VERSION) || (defined(SUBVERSION) && defined(PATCHLEVEL))) +# include +# endif +# ifndef PERL_REVISION +# define PERL_REVISION (5) + /* Replace: 1 */ +# define PERL_VERSION PATCHLEVEL +# define PERL_SUBVERSION SUBVERSION + /* Replace PERL_PATCHLEVEL with PERL_VERSION */ + /* Replace: 0 */ +# endif +#endif + +#define _dpppDEC2BCD(dec) ((((dec)/100)<<8)|((((dec)%100)/10)<<4)|((dec)%10)) +#define PERL_BCDVERSION ((_dpppDEC2BCD(PERL_REVISION)<<24)|(_dpppDEC2BCD(PERL_VERSION)<<12)|_dpppDEC2BCD(PERL_SUBVERSION)) + +/* It is very unlikely that anyone will try to use this with Perl 6 + (or greater), but who knows. + */ +#if PERL_REVISION != 5 +# error ppport.h only works with Perl version 5 +#endif /* PERL_REVISION != 5 */ + +#ifdef I_LIMITS +# include +#endif + +#ifndef PERL_UCHAR_MIN +# define PERL_UCHAR_MIN ((unsigned char)0) +#endif + +#ifndef PERL_UCHAR_MAX +# ifdef UCHAR_MAX +# define PERL_UCHAR_MAX ((unsigned char)UCHAR_MAX) +# else +# ifdef MAXUCHAR +# define PERL_UCHAR_MAX ((unsigned char)MAXUCHAR) +# else +# define PERL_UCHAR_MAX ((unsigned char)~(unsigned)0) +# endif +# endif +#endif + +#ifndef PERL_USHORT_MIN +# define PERL_USHORT_MIN ((unsigned short)0) +#endif + +#ifndef PERL_USHORT_MAX +# ifdef USHORT_MAX +# define PERL_USHORT_MAX ((unsigned short)USHORT_MAX) +# else +# ifdef MAXUSHORT +# define PERL_USHORT_MAX ((unsigned short)MAXUSHORT) +# else +# ifdef USHRT_MAX +# define PERL_USHORT_MAX ((unsigned short)USHRT_MAX) +# else +# define PERL_USHORT_MAX ((unsigned short)~(unsigned)0) +# endif +# endif +# endif +#endif + +#ifndef PERL_SHORT_MAX +# ifdef SHORT_MAX +# define PERL_SHORT_MAX ((short)SHORT_MAX) +# else +# ifdef MAXSHORT /* Often used in */ +# define PERL_SHORT_MAX ((short)MAXSHORT) +# else +# ifdef SHRT_MAX +# define PERL_SHORT_MAX ((short)SHRT_MAX) +# else +# define PERL_SHORT_MAX ((short) (PERL_USHORT_MAX >> 1)) +# endif +# endif +# endif +#endif + +#ifndef PERL_SHORT_MIN +# ifdef SHORT_MIN +# define PERL_SHORT_MIN ((short)SHORT_MIN) +# else +# ifdef MINSHORT +# define PERL_SHORT_MIN ((short)MINSHORT) +# else +# ifdef SHRT_MIN +# define PERL_SHORT_MIN ((short)SHRT_MIN) +# else +# define PERL_SHORT_MIN (-PERL_SHORT_MAX - ((3 & -1) == 3)) +# endif +# endif +# endif +#endif + +#ifndef PERL_UINT_MAX +# ifdef UINT_MAX +# define PERL_UINT_MAX ((unsigned int)UINT_MAX) +# else +# ifdef MAXUINT +# define PERL_UINT_MAX ((unsigned int)MAXUINT) +# else +# define PERL_UINT_MAX (~(unsigned int)0) +# endif +# endif +#endif + +#ifndef PERL_UINT_MIN +# define PERL_UINT_MIN ((unsigned int)0) +#endif + +#ifndef PERL_INT_MAX +# ifdef INT_MAX +# define PERL_INT_MAX ((int)INT_MAX) +# else +# ifdef MAXINT /* Often used in */ +# define PERL_INT_MAX ((int)MAXINT) +# else +# define PERL_INT_MAX ((int)(PERL_UINT_MAX >> 1)) +# endif +# endif +#endif + +#ifndef PERL_INT_MIN +# ifdef INT_MIN +# define PERL_INT_MIN ((int)INT_MIN) +# else +# ifdef MININT +# define PERL_INT_MIN ((int)MININT) +# else +# define PERL_INT_MIN (-PERL_INT_MAX - ((3 & -1) == 3)) +# endif +# endif +#endif + +#ifndef PERL_ULONG_MAX +# ifdef ULONG_MAX +# define PERL_ULONG_MAX ((unsigned long)ULONG_MAX) +# else +# ifdef MAXULONG +# define PERL_ULONG_MAX ((unsigned long)MAXULONG) +# else +# define PERL_ULONG_MAX (~(unsigned long)0) +# endif +# endif +#endif + +#ifndef PERL_ULONG_MIN +# define PERL_ULONG_MIN ((unsigned long)0L) +#endif + +#ifndef PERL_LONG_MAX +# ifdef LONG_MAX +# define PERL_LONG_MAX ((long)LONG_MAX) +# else +# ifdef MAXLONG +# define PERL_LONG_MAX ((long)MAXLONG) +# else +# define PERL_LONG_MAX ((long) (PERL_ULONG_MAX >> 1)) +# endif +# endif +#endif + +#ifndef PERL_LONG_MIN +# ifdef LONG_MIN +# define PERL_LONG_MIN ((long)LONG_MIN) +# else +# ifdef MINLONG +# define PERL_LONG_MIN ((long)MINLONG) +# else +# define PERL_LONG_MIN (-PERL_LONG_MAX - ((3 & -1) == 3)) +# endif +# endif +#endif + +#if defined(HAS_QUAD) && (defined(convex) || defined(uts)) +# ifndef PERL_UQUAD_MAX +# ifdef ULONGLONG_MAX +# define PERL_UQUAD_MAX ((unsigned long long)ULONGLONG_MAX) +# else +# ifdef MAXULONGLONG +# define PERL_UQUAD_MAX ((unsigned long long)MAXULONGLONG) +# else +# define PERL_UQUAD_MAX (~(unsigned long long)0) +# endif +# endif +# endif + +# ifndef PERL_UQUAD_MIN +# define PERL_UQUAD_MIN ((unsigned long long)0L) +# endif + +# ifndef PERL_QUAD_MAX +# ifdef LONGLONG_MAX +# define PERL_QUAD_MAX ((long long)LONGLONG_MAX) +# else +# ifdef MAXLONGLONG +# define PERL_QUAD_MAX ((long long)MAXLONGLONG) +# else +# define PERL_QUAD_MAX ((long long) (PERL_UQUAD_MAX >> 1)) +# endif +# endif +# endif + +# ifndef PERL_QUAD_MIN +# ifdef LONGLONG_MIN +# define PERL_QUAD_MIN ((long long)LONGLONG_MIN) +# else +# ifdef MINLONGLONG +# define PERL_QUAD_MIN ((long long)MINLONGLONG) +# else +# define PERL_QUAD_MIN (-PERL_QUAD_MAX - ((3 & -1) == 3)) +# endif +# endif +# endif +#endif + +/* This is based on code from 5.003 perl.h */ +#ifdef HAS_QUAD +# ifdef cray +#ifndef IVTYPE +# define IVTYPE int +#endif + +#ifndef IV_MIN +# define IV_MIN PERL_INT_MIN +#endif + +#ifndef IV_MAX +# define IV_MAX PERL_INT_MAX +#endif + +#ifndef UV_MIN +# define UV_MIN PERL_UINT_MIN +#endif + +#ifndef UV_MAX +# define UV_MAX PERL_UINT_MAX +#endif + +# ifdef INTSIZE +#ifndef IVSIZE +# define IVSIZE INTSIZE +#endif + +# endif +# else +# if defined(convex) || defined(uts) +#ifndef IVTYPE +# define IVTYPE long long +#endif + +#ifndef IV_MIN +# define IV_MIN PERL_QUAD_MIN +#endif + +#ifndef IV_MAX +# define IV_MAX PERL_QUAD_MAX +#endif + +#ifndef UV_MIN +# define UV_MIN PERL_UQUAD_MIN +#endif + +#ifndef UV_MAX +# define UV_MAX PERL_UQUAD_MAX +#endif + +# ifdef LONGLONGSIZE +#ifndef IVSIZE +# define IVSIZE LONGLONGSIZE +#endif + +# endif +# else +#ifndef IVTYPE +# define IVTYPE long +#endif + +#ifndef IV_MIN +# define IV_MIN PERL_LONG_MIN +#endif + +#ifndef IV_MAX +# define IV_MAX PERL_LONG_MAX +#endif + +#ifndef UV_MIN +# define UV_MIN PERL_ULONG_MIN +#endif + +#ifndef UV_MAX +# define UV_MAX PERL_ULONG_MAX +#endif + +# ifdef LONGSIZE +#ifndef IVSIZE +# define IVSIZE LONGSIZE +#endif + +# endif +# endif +# endif +#ifndef IVSIZE +# define IVSIZE 8 +#endif + +#ifndef PERL_QUAD_MIN +# define PERL_QUAD_MIN IV_MIN +#endif + +#ifndef PERL_QUAD_MAX +# define PERL_QUAD_MAX IV_MAX +#endif + +#ifndef PERL_UQUAD_MIN +# define PERL_UQUAD_MIN UV_MIN +#endif + +#ifndef PERL_UQUAD_MAX +# define PERL_UQUAD_MAX UV_MAX +#endif + +#else +#ifndef IVTYPE +# define IVTYPE long +#endif + +#ifndef IV_MIN +# define IV_MIN PERL_LONG_MIN +#endif + +#ifndef IV_MAX +# define IV_MAX PERL_LONG_MAX +#endif + +#ifndef UV_MIN +# define UV_MIN PERL_ULONG_MIN +#endif + +#ifndef UV_MAX +# define UV_MAX PERL_ULONG_MAX +#endif + +#endif + +#ifndef IVSIZE +# ifdef LONGSIZE +# define IVSIZE LONGSIZE +# else +# define IVSIZE 4 /* A bold guess, but the best we can make. */ +# endif +#endif +#ifndef UVTYPE +# define UVTYPE unsigned IVTYPE +#endif + +#ifndef UVSIZE +# define UVSIZE IVSIZE +#endif +#ifndef sv_setuv +# define sv_setuv(sv, uv) \ + STMT_START { \ + UV TeMpUv = uv; \ + if (TeMpUv <= IV_MAX) \ + sv_setiv(sv, TeMpUv); \ + else \ + sv_setnv(sv, (double)TeMpUv); \ + } STMT_END +#endif +#ifndef newSVuv +# define newSVuv(uv) ((uv) <= IV_MAX ? newSViv((IV)uv) : newSVnv((NV)uv)) +#endif +#ifndef sv_2uv +# define sv_2uv(sv) ((PL_Sv = (sv)), (UV) (SvNOK(PL_Sv) ? SvNV(PL_Sv) : sv_2nv(PL_Sv))) +#endif + +#ifndef SvUVX +# define SvUVX(sv) ((UV)SvIVX(sv)) +#endif + +#ifndef SvUVXx +# define SvUVXx(sv) SvUVX(sv) +#endif + +#ifndef SvUV +# define SvUV(sv) (SvIOK(sv) ? SvUVX(sv) : sv_2uv(sv)) +#endif + +#ifndef SvUVx +# define SvUVx(sv) ((PL_Sv = (sv)), SvUV(PL_Sv)) +#endif + +/* Hint: sv_uv + * Always use the SvUVx() macro instead of sv_uv(). + */ +#ifndef sv_uv +# define sv_uv(sv) SvUVx(sv) +#endif + +#if !defined(SvUOK) && defined(SvIOK_UV) +# define SvUOK(sv) SvIOK_UV(sv) +#endif +#ifndef XST_mUV +# define XST_mUV(i,v) (ST(i) = sv_2mortal(newSVuv(v)) ) +#endif + +#ifndef XSRETURN_UV +# define XSRETURN_UV(v) STMT_START { XST_mUV(0,v); XSRETURN(1); } STMT_END +#endif +#ifndef PUSHu +# define PUSHu(u) STMT_START { sv_setuv(TARG, (UV)(u)); PUSHTARG; } STMT_END +#endif + +#ifndef XPUSHu +# define XPUSHu(u) STMT_START { sv_setuv(TARG, (UV)(u)); XPUSHTARG; } STMT_END +#endif + +#ifdef HAS_MEMCMP +#ifndef memNE +# define memNE(s1,s2,l) (memcmp(s1,s2,l)) +#endif + +#ifndef memEQ +# define memEQ(s1,s2,l) (!memcmp(s1,s2,l)) +#endif + +#else +#ifndef memNE +# define memNE(s1,s2,l) (bcmp(s1,s2,l)) +#endif + +#ifndef memEQ +# define memEQ(s1,s2,l) (!bcmp(s1,s2,l)) +#endif + +#endif +#ifndef MoveD +# define MoveD(s,d,n,t) memmove((char*)(d),(char*)(s), (n) * sizeof(t)) +#endif + +#ifndef CopyD +# define CopyD(s,d,n,t) memcpy((char*)(d),(char*)(s), (n) * sizeof(t)) +#endif + +#ifdef HAS_MEMSET +#ifndef ZeroD +# define ZeroD(d,n,t) memzero((char*)(d), (n) * sizeof(t)) +#endif + +#else +#ifndef ZeroD +# define ZeroD(d,n,t) ((void)memzero((char*)(d), (n) * sizeof(t)), d) +#endif + +#endif +#ifndef PoisonWith +# define PoisonWith(d,n,t,b) (void)memset((char*)(d), (U8)(b), (n) * sizeof(t)) +#endif + +#ifndef PoisonNew +# define PoisonNew(d,n,t) PoisonWith(d,n,t,0xAB) +#endif + +#ifndef PoisonFree +# define PoisonFree(d,n,t) PoisonWith(d,n,t,0xEF) +#endif + +#ifndef Poison +# define Poison(d,n,t) PoisonFree(d,n,t) +#endif +#ifndef Newx +# define Newx(v,n,t) New(0,v,n,t) +#endif + +#ifndef Newxc +# define Newxc(v,n,t,c) Newc(0,v,n,t,c) +#endif + +#ifndef Newxz +# define Newxz(v,n,t) Newz(0,v,n,t) +#endif + +#ifndef PERL_UNUSED_DECL +# ifdef HASATTRIBUTE +# if (defined(__GNUC__) && defined(__cplusplus)) || defined(__INTEL_COMPILER) +# define PERL_UNUSED_DECL +# else +# define PERL_UNUSED_DECL __attribute__((unused)) +# endif +# else +# define PERL_UNUSED_DECL +# endif +#endif + +#ifndef PERL_UNUSED_ARG +# if defined(lint) && defined(S_SPLINT_S) /* www.splint.org */ +# include +# define PERL_UNUSED_ARG(x) NOTE(ARGUNUSED(x)) +# else +# define PERL_UNUSED_ARG(x) ((void)x) +# endif +#endif + +#ifndef PERL_UNUSED_VAR +# define PERL_UNUSED_VAR(x) ((void)x) +#endif + +#ifndef PERL_UNUSED_CONTEXT +# ifdef USE_ITHREADS +# define PERL_UNUSED_CONTEXT PERL_UNUSED_ARG(my_perl) +# else +# define PERL_UNUSED_CONTEXT +# endif +#endif +#ifndef NOOP +# define NOOP /*EMPTY*/(void)0 +#endif + +#ifndef dNOOP +# define dNOOP extern int /*@unused@*/ Perl___notused PERL_UNUSED_DECL +#endif + +#ifndef NVTYPE +# if defined(USE_LONG_DOUBLE) && defined(HAS_LONG_DOUBLE) +# define NVTYPE long double +# else +# define NVTYPE double +# endif +typedef NVTYPE NV; +#endif + +#ifndef INT2PTR + +# if (IVSIZE == PTRSIZE) && (UVSIZE == PTRSIZE) +# define PTRV UV +# define INT2PTR(any,d) (any)(d) +# else +# if PTRSIZE == LONGSIZE +# define PTRV unsigned long +# else +# define PTRV unsigned +# endif +# define INT2PTR(any,d) (any)(PTRV)(d) +# endif + +# define NUM2PTR(any,d) (any)(PTRV)(d) +# define PTR2IV(p) INT2PTR(IV,p) +# define PTR2UV(p) INT2PTR(UV,p) +# define PTR2NV(p) NUM2PTR(NV,p) + +# if PTRSIZE == LONGSIZE +# define PTR2ul(p) (unsigned long)(p) +# else +# define PTR2ul(p) INT2PTR(unsigned long,p) +# endif + +#endif /* !INT2PTR */ + +#undef START_EXTERN_C +#undef END_EXTERN_C +#undef EXTERN_C +#ifdef __cplusplus +# define START_EXTERN_C extern "C" { +# define END_EXTERN_C } +# define EXTERN_C extern "C" +#else +# define START_EXTERN_C +# define END_EXTERN_C +# define EXTERN_C extern +#endif + +#if defined(PERL_GCC_PEDANTIC) +# ifndef PERL_GCC_BRACE_GROUPS_FORBIDDEN +# define PERL_GCC_BRACE_GROUPS_FORBIDDEN +# endif +#endif + +#if defined(__GNUC__) && !defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN) && !defined(__cplusplus) +# ifndef PERL_USE_GCC_BRACE_GROUPS +# define PERL_USE_GCC_BRACE_GROUPS +# endif +#endif + +#undef STMT_START +#undef STMT_END +#ifdef PERL_USE_GCC_BRACE_GROUPS +# define STMT_START (void)( /* gcc supports ``({ STATEMENTS; })'' */ +# define STMT_END ) +#else +# if defined(VOIDFLAGS) && (VOIDFLAGS) && (defined(sun) || defined(__sun__)) && !defined(__GNUC__) +# define STMT_START if (1) +# define STMT_END else (void)0 +# else +# define STMT_START do +# define STMT_END while (0) +# endif +#endif +#ifndef boolSV +# define boolSV(b) ((b) ? &PL_sv_yes : &PL_sv_no) +#endif + +/* DEFSV appears first in 5.004_56 */ +#ifndef DEFSV +# define DEFSV GvSV(PL_defgv) +#endif + +#ifndef SAVE_DEFSV +# define SAVE_DEFSV SAVESPTR(GvSV(PL_defgv)) +#endif + +/* Older perls (<=5.003) lack AvFILLp */ +#ifndef AvFILLp +# define AvFILLp AvFILL +#endif +#ifndef ERRSV +# define ERRSV get_sv("@",FALSE) +#endif +#ifndef newSVpvn +# define newSVpvn(data,len) ((data) \ + ? ((len) ? newSVpv((data), (len)) : newSVpv("", 0)) \ + : newSV(0)) +#endif + +/* Hint: gv_stashpvn + * This function's backport doesn't support the length parameter, but + * rather ignores it. Portability can only be ensured if the length + * parameter is used for speed reasons, but the length can always be + * correctly computed from the string argument. + */ +#ifndef gv_stashpvn +# define gv_stashpvn(str,len,create) gv_stashpv(str,create) +#endif + +/* Replace: 1 */ +#ifndef get_cv +# define get_cv perl_get_cv +#endif + +#ifndef get_sv +# define get_sv perl_get_sv +#endif + +#ifndef get_av +# define get_av perl_get_av +#endif + +#ifndef get_hv +# define get_hv perl_get_hv +#endif + +/* Replace: 0 */ +#ifndef dUNDERBAR +# define dUNDERBAR dNOOP +#endif + +#ifndef UNDERBAR +# define UNDERBAR DEFSV +#endif +#ifndef dAX +# define dAX I32 ax = MARK - PL_stack_base + 1 +#endif + +#ifndef dITEMS +# define dITEMS I32 items = SP - MARK +#endif +#ifndef dXSTARG +# define dXSTARG SV * targ = sv_newmortal() +#endif +#ifndef dAXMARK +# define dAXMARK I32 ax = POPMARK; \ + register SV ** const mark = PL_stack_base + ax++ +#endif +#ifndef XSprePUSH +# define XSprePUSH (sp = PL_stack_base + ax - 1) +#endif + +#if (PERL_BCDVERSION < 0x5005000) +# undef XSRETURN +# define XSRETURN(off) \ + STMT_START { \ + PL_stack_sp = PL_stack_base + ax + ((off) - 1); \ + return; \ + } STMT_END +#endif +#ifndef PERL_ABS +# define PERL_ABS(x) ((x) < 0 ? -(x) : (x)) +#endif +#ifndef dVAR +# define dVAR dNOOP +#endif +#ifndef SVf +# define SVf "_" +#endif +#ifndef UTF8_MAXBYTES +# define UTF8_MAXBYTES UTF8_MAXLEN +#endif +#ifndef PERL_HASH +# define PERL_HASH(hash,str,len) \ + STMT_START { \ + const char *s_PeRlHaSh = str; \ + I32 i_PeRlHaSh = len; \ + U32 hash_PeRlHaSh = 0; \ + while (i_PeRlHaSh--) \ + hash_PeRlHaSh = hash_PeRlHaSh * 33 + *s_PeRlHaSh++; \ + (hash) = hash_PeRlHaSh; \ + } STMT_END +#endif + +#ifndef PERL_SIGNALS_UNSAFE_FLAG + +#define PERL_SIGNALS_UNSAFE_FLAG 0x0001 + +#if (PERL_BCDVERSION < 0x5008000) +# define D_PPP_PERL_SIGNALS_INIT PERL_SIGNALS_UNSAFE_FLAG +#else +# define D_PPP_PERL_SIGNALS_INIT 0 +#endif + +#if defined(NEED_PL_signals) +static U32 DPPP_(my_PL_signals) = D_PPP_PERL_SIGNALS_INIT; +#elif defined(NEED_PL_signals_GLOBAL) +U32 DPPP_(my_PL_signals) = D_PPP_PERL_SIGNALS_INIT; +#else +extern U32 DPPP_(my_PL_signals); +#endif +#define PL_signals DPPP_(my_PL_signals) + +#endif + +/* Hint: PL_ppaddr + * Calling an op via PL_ppaddr requires passing a context argument + * for threaded builds. Since the context argument is different for + * 5.005 perls, you can use aTHXR (supplied by ppport.h), which will + * automatically be defined as the correct argument. + */ + +#if (PERL_BCDVERSION <= 0x5005005) +/* Replace: 1 */ +# define PL_ppaddr ppaddr +# define PL_no_modify no_modify +/* Replace: 0 */ +#endif + +#if (PERL_BCDVERSION <= 0x5004005) +/* Replace: 1 */ +# define PL_DBsignal DBsignal +# define PL_DBsingle DBsingle +# define PL_DBsub DBsub +# define PL_DBtrace DBtrace +# define PL_Sv Sv +# define PL_compiling compiling +# define PL_copline copline +# define PL_curcop curcop +# define PL_curstash curstash +# define PL_debstash debstash +# define PL_defgv defgv +# define PL_diehook diehook +# define PL_dirty dirty +# define PL_dowarn dowarn +# define PL_errgv errgv +# define PL_expect expect +# define PL_hexdigit hexdigit +# define PL_hints hints +# define PL_laststatval laststatval +# define PL_na na +# define PL_perl_destruct_level perl_destruct_level +# define PL_perldb perldb +# define PL_rsfp_filters rsfp_filters +# define PL_rsfp rsfp +# define PL_stack_base stack_base +# define PL_stack_sp stack_sp +# define PL_statcache statcache +# define PL_stdingv stdingv +# define PL_sv_arenaroot sv_arenaroot +# define PL_sv_no sv_no +# define PL_sv_undef sv_undef +# define PL_sv_yes sv_yes +# define PL_tainted tainted +# define PL_tainting tainting +/* Replace: 0 */ +#endif + +/* Warning: PL_expect, PL_copline, PL_rsfp, PL_rsfp_filters + * Do not use this variable. It is internal to the perl parser + * and may change or even be removed in the future. Note that + * as of perl 5.9.5 you cannot assign to this variable anymore. + */ + +/* TODO: cannot assign to these vars; is it worth fixing? */ +#if (PERL_BCDVERSION >= 0x5009005) +# define PL_expect (PL_parser ? PL_parser->expect : 0) +# define PL_copline (PL_parser ? PL_parser->copline : 0) +# define PL_rsfp (PL_parser ? PL_parser->rsfp : (PerlIO *) 0) +# define PL_rsfp_filters (PL_parser ? PL_parser->rsfp_filters : (AV *) 0) +#endif +#ifndef dTHR +# define dTHR dNOOP +#endif +#ifndef dTHX +# define dTHX dNOOP +#endif + +#ifndef dTHXa +# define dTHXa(x) dNOOP +#endif +#ifndef pTHX +# define pTHX void +#endif + +#ifndef pTHX_ +# define pTHX_ +#endif + +#ifndef aTHX +# define aTHX +#endif + +#ifndef aTHX_ +# define aTHX_ +#endif + +#if (PERL_BCDVERSION < 0x5006000) +# ifdef USE_THREADS +# define aTHXR thr +# define aTHXR_ thr, +# else +# define aTHXR +# define aTHXR_ +# endif +# define dTHXR dTHR +#else +# define aTHXR aTHX +# define aTHXR_ aTHX_ +# define dTHXR dTHX +#endif +#ifndef dTHXoa +# define dTHXoa(x) dTHXa(x) +#endif +#ifndef PUSHmortal +# define PUSHmortal PUSHs(sv_newmortal()) +#endif + +#ifndef mPUSHp +# define mPUSHp(p,l) sv_setpvn_mg(PUSHmortal, (p), (l)) +#endif + +#ifndef mPUSHn +# define mPUSHn(n) sv_setnv_mg(PUSHmortal, (NV)(n)) +#endif + +#ifndef mPUSHi +# define mPUSHi(i) sv_setiv_mg(PUSHmortal, (IV)(i)) +#endif + +#ifndef mPUSHu +# define mPUSHu(u) sv_setuv_mg(PUSHmortal, (UV)(u)) +#endif +#ifndef XPUSHmortal +# define XPUSHmortal XPUSHs(sv_newmortal()) +#endif + +#ifndef mXPUSHp +# define mXPUSHp(p,l) STMT_START { EXTEND(sp,1); sv_setpvn_mg(PUSHmortal, (p), (l)); } STMT_END +#endif + +#ifndef mXPUSHn +# define mXPUSHn(n) STMT_START { EXTEND(sp,1); sv_setnv_mg(PUSHmortal, (NV)(n)); } STMT_END +#endif + +#ifndef mXPUSHi +# define mXPUSHi(i) STMT_START { EXTEND(sp,1); sv_setiv_mg(PUSHmortal, (IV)(i)); } STMT_END +#endif + +#ifndef mXPUSHu +# define mXPUSHu(u) STMT_START { EXTEND(sp,1); sv_setuv_mg(PUSHmortal, (UV)(u)); } STMT_END +#endif + +/* Replace: 1 */ +#ifndef call_sv +# define call_sv perl_call_sv +#endif + +#ifndef call_pv +# define call_pv perl_call_pv +#endif + +#ifndef call_argv +# define call_argv perl_call_argv +#endif + +#ifndef call_method +# define call_method perl_call_method +#endif +#ifndef eval_sv +# define eval_sv perl_eval_sv +#endif +#ifndef PERL_LOADMOD_DENY +# define PERL_LOADMOD_DENY 0x1 +#endif + +#ifndef PERL_LOADMOD_NOIMPORT +# define PERL_LOADMOD_NOIMPORT 0x2 +#endif + +#ifndef PERL_LOADMOD_IMPORT_OPS +# define PERL_LOADMOD_IMPORT_OPS 0x4 +#endif + +/* Replace: 0 */ + +/* Replace perl_eval_pv with eval_pv */ + +#ifndef eval_pv +#if defined(NEED_eval_pv) +static SV* DPPP_(my_eval_pv)(char *p, I32 croak_on_error); +static +#else +extern SV* DPPP_(my_eval_pv)(char *p, I32 croak_on_error); +#endif + +#ifdef eval_pv +# undef eval_pv +#endif +#define eval_pv(a,b) DPPP_(my_eval_pv)(aTHX_ a,b) +#define Perl_eval_pv DPPP_(my_eval_pv) + +#if defined(NEED_eval_pv) || defined(NEED_eval_pv_GLOBAL) + +SV* +DPPP_(my_eval_pv)(char *p, I32 croak_on_error) +{ + dSP; + SV* sv = newSVpv(p, 0); + + PUSHMARK(sp); + eval_sv(sv, G_SCALAR); + SvREFCNT_dec(sv); + + SPAGAIN; + sv = POPs; + PUTBACK; + + if (croak_on_error && SvTRUE(GvSV(errgv))) + croak(SvPVx(GvSV(errgv), na)); + + return sv; +} + +#endif +#endif + +#ifndef vload_module +#if defined(NEED_vload_module) +static void DPPP_(my_vload_module)(U32 flags, SV *name, SV *ver, va_list *args); +static +#else +extern void DPPP_(my_vload_module)(U32 flags, SV *name, SV *ver, va_list *args); +#endif + +#ifdef vload_module +# undef vload_module +#endif +#define vload_module(a,b,c,d) DPPP_(my_vload_module)(aTHX_ a,b,c,d) +#define Perl_vload_module DPPP_(my_vload_module) + +#if defined(NEED_vload_module) || defined(NEED_vload_module_GLOBAL) + +void +DPPP_(my_vload_module)(U32 flags, SV *name, SV *ver, va_list *args) +{ + dTHR; + dVAR; + OP *veop, *imop; + + OP * const modname = newSVOP(OP_CONST, 0, name); + /* 5.005 has a somewhat hacky force_normal that doesn't croak on + SvREADONLY() if PL_compling is true. Current perls take care in + ck_require() to correctly turn off SvREADONLY before calling + force_normal_flags(). This seems a better fix than fudging PL_compling + */ + SvREADONLY_off(((SVOP*)modname)->op_sv); + modname->op_private |= OPpCONST_BARE; + if (ver) { + veop = newSVOP(OP_CONST, 0, ver); + } + else + veop = NULL; + if (flags & PERL_LOADMOD_NOIMPORT) { + imop = sawparens(newNULLLIST()); + } + else if (flags & PERL_LOADMOD_IMPORT_OPS) { + imop = va_arg(*args, OP*); + } + else { + SV *sv; + imop = NULL; + sv = va_arg(*args, SV*); + while (sv) { + imop = append_elem(OP_LIST, imop, newSVOP(OP_CONST, 0, sv)); + sv = va_arg(*args, SV*); + } + } + { + const line_t ocopline = PL_copline; + COP * const ocurcop = PL_curcop; + const int oexpect = PL_expect; + +#if (PERL_BCDVERSION >= 0x5004000) + utilize(!(flags & PERL_LOADMOD_DENY), start_subparse(FALSE, 0), + veop, modname, imop); +#else + utilize(!(flags & PERL_LOADMOD_DENY), start_subparse(), + modname, imop); +#endif + PL_expect = oexpect; + PL_copline = ocopline; + PL_curcop = ocurcop; + } +} + +#endif +#endif + +#ifndef load_module +#if defined(NEED_load_module) +static void DPPP_(my_load_module)(U32 flags, SV *name, SV *ver, ...); +static +#else +extern void DPPP_(my_load_module)(U32 flags, SV *name, SV *ver, ...); +#endif + +#ifdef load_module +# undef load_module +#endif +#define load_module DPPP_(my_load_module) +#define Perl_load_module DPPP_(my_load_module) + +#if defined(NEED_load_module) || defined(NEED_load_module_GLOBAL) + +void +DPPP_(my_load_module)(U32 flags, SV *name, SV *ver, ...) +{ + va_list args; + va_start(args, ver); + vload_module(flags, name, ver, &args); + va_end(args); +} + +#endif +#endif +#ifndef newRV_inc +# define newRV_inc(sv) newRV(sv) /* Replace */ +#endif + +#ifndef newRV_noinc +#if defined(NEED_newRV_noinc) +static SV * DPPP_(my_newRV_noinc)(SV *sv); +static +#else +extern SV * DPPP_(my_newRV_noinc)(SV *sv); +#endif + +#ifdef newRV_noinc +# undef newRV_noinc +#endif +#define newRV_noinc(a) DPPP_(my_newRV_noinc)(aTHX_ a) +#define Perl_newRV_noinc DPPP_(my_newRV_noinc) + +#if defined(NEED_newRV_noinc) || defined(NEED_newRV_noinc_GLOBAL) +SV * +DPPP_(my_newRV_noinc)(SV *sv) +{ + SV *rv = (SV *)newRV(sv); + SvREFCNT_dec(sv); + return rv; +} +#endif +#endif + +/* Hint: newCONSTSUB + * Returns a CV* as of perl-5.7.1. This return value is not supported + * by Devel::PPPort. + */ + +/* newCONSTSUB from IO.xs is in the core starting with 5.004_63 */ +#if (PERL_BCDVERSION < 0x5004063) && (PERL_BCDVERSION != 0x5004005) +#if defined(NEED_newCONSTSUB) +static void DPPP_(my_newCONSTSUB)(HV *stash, const char *name, SV *sv); +static +#else +extern void DPPP_(my_newCONSTSUB)(HV *stash, const char *name, SV *sv); +#endif + +#ifdef newCONSTSUB +# undef newCONSTSUB +#endif +#define newCONSTSUB(a,b,c) DPPP_(my_newCONSTSUB)(aTHX_ a,b,c) +#define Perl_newCONSTSUB DPPP_(my_newCONSTSUB) + +#if defined(NEED_newCONSTSUB) || defined(NEED_newCONSTSUB_GLOBAL) + +void +DPPP_(my_newCONSTSUB)(HV *stash, const char *name, SV *sv) +{ + U32 oldhints = PL_hints; + HV *old_cop_stash = PL_curcop->cop_stash; + HV *old_curstash = PL_curstash; + line_t oldline = PL_curcop->cop_line; + PL_curcop->cop_line = PL_copline; + + PL_hints &= ~HINT_BLOCK_SCOPE; + if (stash) + PL_curstash = PL_curcop->cop_stash = stash; + + newSUB( + +#if (PERL_BCDVERSION < 0x5003022) + start_subparse(), +#elif (PERL_BCDVERSION == 0x5003022) + start_subparse(0), +#else /* 5.003_23 onwards */ + start_subparse(FALSE, 0), +#endif + + newSVOP(OP_CONST, 0, newSVpv((char *) name, 0)), + newSVOP(OP_CONST, 0, &PL_sv_no), /* SvPV(&PL_sv_no) == "" -- GMB */ + newSTATEOP(0, Nullch, newSVOP(OP_CONST, 0, sv)) + ); + + PL_hints = oldhints; + PL_curcop->cop_stash = old_cop_stash; + PL_curstash = old_curstash; + PL_curcop->cop_line = oldline; +} +#endif +#endif + +/* + * Boilerplate macros for initializing and accessing interpreter-local + * data from C. All statics in extensions should be reworked to use + * this, if you want to make the extension thread-safe. See ext/re/re.xs + * for an example of the use of these macros. + * + * Code that uses these macros is responsible for the following: + * 1. #define MY_CXT_KEY to a unique string, e.g. "DynaLoader_guts" + * 2. Declare a typedef named my_cxt_t that is a structure that contains + * all the data that needs to be interpreter-local. + * 3. Use the START_MY_CXT macro after the declaration of my_cxt_t. + * 4. Use the MY_CXT_INIT macro such that it is called exactly once + * (typically put in the BOOT: section). + * 5. Use the members of the my_cxt_t structure everywhere as + * MY_CXT.member. + * 6. Use the dMY_CXT macro (a declaration) in all the functions that + * access MY_CXT. + */ + +#if defined(MULTIPLICITY) || defined(PERL_OBJECT) || \ + defined(PERL_CAPI) || defined(PERL_IMPLICIT_CONTEXT) + +#ifndef START_MY_CXT + +/* This must appear in all extensions that define a my_cxt_t structure, + * right after the definition (i.e. at file scope). The non-threads + * case below uses it to declare the data as static. */ +#define START_MY_CXT + +#if (PERL_BCDVERSION < 0x5004068) +/* Fetches the SV that keeps the per-interpreter data. */ +#define dMY_CXT_SV \ + SV *my_cxt_sv = get_sv(MY_CXT_KEY, FALSE) +#else /* >= perl5.004_68 */ +#define dMY_CXT_SV \ + SV *my_cxt_sv = *hv_fetch(PL_modglobal, MY_CXT_KEY, \ + sizeof(MY_CXT_KEY)-1, TRUE) +#endif /* < perl5.004_68 */ + +/* This declaration should be used within all functions that use the + * interpreter-local data. */ +#define dMY_CXT \ + dMY_CXT_SV; \ + my_cxt_t *my_cxtp = INT2PTR(my_cxt_t*,SvUV(my_cxt_sv)) + +/* Creates and zeroes the per-interpreter data. + * (We allocate my_cxtp in a Perl SV so that it will be released when + * the interpreter goes away.) */ +#define MY_CXT_INIT \ + dMY_CXT_SV; \ + /* newSV() allocates one more than needed */ \ + my_cxt_t *my_cxtp = (my_cxt_t*)SvPVX(newSV(sizeof(my_cxt_t)-1));\ + Zero(my_cxtp, 1, my_cxt_t); \ + sv_setuv(my_cxt_sv, PTR2UV(my_cxtp)) + +/* This macro must be used to access members of the my_cxt_t structure. + * e.g. MYCXT.some_data */ +#define MY_CXT (*my_cxtp) + +/* Judicious use of these macros can reduce the number of times dMY_CXT + * is used. Use is similar to pTHX, aTHX etc. */ +#define pMY_CXT my_cxt_t *my_cxtp +#define pMY_CXT_ pMY_CXT, +#define _pMY_CXT ,pMY_CXT +#define aMY_CXT my_cxtp +#define aMY_CXT_ aMY_CXT, +#define _aMY_CXT ,aMY_CXT + +#endif /* START_MY_CXT */ + +#ifndef MY_CXT_CLONE +/* Clones the per-interpreter data. */ +#define MY_CXT_CLONE \ + dMY_CXT_SV; \ + my_cxt_t *my_cxtp = (my_cxt_t*)SvPVX(newSV(sizeof(my_cxt_t)-1));\ + Copy(INT2PTR(my_cxt_t*, SvUV(my_cxt_sv)), my_cxtp, 1, my_cxt_t);\ + sv_setuv(my_cxt_sv, PTR2UV(my_cxtp)) +#endif + +#else /* single interpreter */ + +#ifndef START_MY_CXT + +#define START_MY_CXT static my_cxt_t my_cxt; +#define dMY_CXT_SV dNOOP +#define dMY_CXT dNOOP +#define MY_CXT_INIT NOOP +#define MY_CXT my_cxt + +#define pMY_CXT void +#define pMY_CXT_ +#define _pMY_CXT +#define aMY_CXT +#define aMY_CXT_ +#define _aMY_CXT + +#endif /* START_MY_CXT */ + +#ifndef MY_CXT_CLONE +#define MY_CXT_CLONE NOOP +#endif + +#endif + +#ifndef IVdf +# if IVSIZE == LONGSIZE +# define IVdf "ld" +# define UVuf "lu" +# define UVof "lo" +# define UVxf "lx" +# define UVXf "lX" +# else +# if IVSIZE == INTSIZE +# define IVdf "d" +# define UVuf "u" +# define UVof "o" +# define UVxf "x" +# define UVXf "X" +# endif +# endif +#endif + +#ifndef NVef +# if defined(USE_LONG_DOUBLE) && defined(HAS_LONG_DOUBLE) && \ + defined(PERL_PRIfldbl) /* Not very likely, but let's try anyway. */ +# define NVef PERL_PRIeldbl +# define NVff PERL_PRIfldbl +# define NVgf PERL_PRIgldbl +# else +# define NVef "e" +# define NVff "f" +# define NVgf "g" +# endif +#endif + +#ifndef SvREFCNT_inc +# ifdef PERL_USE_GCC_BRACE_GROUPS +# define SvREFCNT_inc(sv) \ + ({ \ + SV * const _sv = (SV*)(sv); \ + if (_sv) \ + (SvREFCNT(_sv))++; \ + _sv; \ + }) +# else +# define SvREFCNT_inc(sv) \ + ((PL_Sv=(SV*)(sv)) ? (++(SvREFCNT(PL_Sv)),PL_Sv) : NULL) +# endif +#endif + +#ifndef SvREFCNT_inc_simple +# ifdef PERL_USE_GCC_BRACE_GROUPS +# define SvREFCNT_inc_simple(sv) \ + ({ \ + if (sv) \ + (SvREFCNT(sv))++; \ + (SV *)(sv); \ + }) +# else +# define SvREFCNT_inc_simple(sv) \ + ((sv) ? (SvREFCNT(sv)++,(SV*)(sv)) : NULL) +# endif +#endif + +#ifndef SvREFCNT_inc_NN +# ifdef PERL_USE_GCC_BRACE_GROUPS +# define SvREFCNT_inc_NN(sv) \ + ({ \ + SV * const _sv = (SV*)(sv); \ + SvREFCNT(_sv)++; \ + _sv; \ + }) +# else +# define SvREFCNT_inc_NN(sv) \ + (PL_Sv=(SV*)(sv),++(SvREFCNT(PL_Sv)),PL_Sv) +# endif +#endif + +#ifndef SvREFCNT_inc_void +# ifdef PERL_USE_GCC_BRACE_GROUPS +# define SvREFCNT_inc_void(sv) \ + ({ \ + SV * const _sv = (SV*)(sv); \ + if (_sv) \ + (void)(SvREFCNT(_sv)++); \ + }) +# else +# define SvREFCNT_inc_void(sv) \ + (void)((PL_Sv=(SV*)(sv)) ? ++(SvREFCNT(PL_Sv)) : 0) +# endif +#endif +#ifndef SvREFCNT_inc_simple_void +# define SvREFCNT_inc_simple_void(sv) STMT_START { if (sv) SvREFCNT(sv)++; } STMT_END +#endif + +#ifndef SvREFCNT_inc_simple_NN +# define SvREFCNT_inc_simple_NN(sv) (++SvREFCNT(sv), (SV*)(sv)) +#endif + +#ifndef SvREFCNT_inc_void_NN +# define SvREFCNT_inc_void_NN(sv) (void)(++SvREFCNT((SV*)(sv))) +#endif + +#ifndef SvREFCNT_inc_simple_void_NN +# define SvREFCNT_inc_simple_void_NN(sv) (void)(++SvREFCNT((SV*)(sv))) +#endif + +/* Backwards compatibility stuff... :-( */ +#if !defined(NEED_sv_2pv_flags) && defined(NEED_sv_2pv_nolen) +# define NEED_sv_2pv_flags +#endif +#if !defined(NEED_sv_2pv_flags_GLOBAL) && defined(NEED_sv_2pv_nolen_GLOBAL) +# define NEED_sv_2pv_flags_GLOBAL +#endif + +/* Hint: sv_2pv_nolen + * Use the SvPV_nolen() or SvPV_nolen_const() macros instead of sv_2pv_nolen(). + */ +#ifndef sv_2pv_nolen +# define sv_2pv_nolen(sv) SvPV_nolen(sv) +#endif + +#ifdef SvPVbyte + +/* Hint: SvPVbyte + * Does not work in perl-5.6.1, ppport.h implements a version + * borrowed from perl-5.7.3. + */ + +#if (PERL_BCDVERSION < 0x5007000) + +#if defined(NEED_sv_2pvbyte) +static char * DPPP_(my_sv_2pvbyte)(pTHX_ SV * sv, STRLEN * lp); +static +#else +extern char * DPPP_(my_sv_2pvbyte)(pTHX_ SV * sv, STRLEN * lp); +#endif + +#ifdef sv_2pvbyte +# undef sv_2pvbyte +#endif +#define sv_2pvbyte(a,b) DPPP_(my_sv_2pvbyte)(aTHX_ a,b) +#define Perl_sv_2pvbyte DPPP_(my_sv_2pvbyte) + +#if defined(NEED_sv_2pvbyte) || defined(NEED_sv_2pvbyte_GLOBAL) + +char * +DPPP_(my_sv_2pvbyte)(pTHX_ SV *sv, STRLEN *lp) +{ + sv_utf8_downgrade(sv,0); + return SvPV(sv,*lp); +} + +#endif + +/* Hint: sv_2pvbyte + * Use the SvPVbyte() macro instead of sv_2pvbyte(). + */ + +#undef SvPVbyte + +#define SvPVbyte(sv, lp) \ + ((SvFLAGS(sv) & (SVf_POK|SVf_UTF8)) == (SVf_POK) \ + ? ((lp = SvCUR(sv)), SvPVX(sv)) : sv_2pvbyte(sv, &lp)) + +#endif + +#else + +# define SvPVbyte SvPV +# define sv_2pvbyte sv_2pv + +#endif +#ifndef sv_2pvbyte_nolen +# define sv_2pvbyte_nolen(sv) sv_2pv_nolen(sv) +#endif + +/* Hint: sv_pvn + * Always use the SvPV() macro instead of sv_pvn(). + */ + +/* Hint: sv_pvn_force + * Always use the SvPV_force() macro instead of sv_pvn_force(). + */ + +/* If these are undefined, they're not handled by the core anyway */ +#ifndef SV_IMMEDIATE_UNREF +# define SV_IMMEDIATE_UNREF 0 +#endif + +#ifndef SV_GMAGIC +# define SV_GMAGIC 0 +#endif + +#ifndef SV_COW_DROP_PV +# define SV_COW_DROP_PV 0 +#endif + +#ifndef SV_UTF8_NO_ENCODING +# define SV_UTF8_NO_ENCODING 0 +#endif + +#ifndef SV_NOSTEAL +# define SV_NOSTEAL 0 +#endif + +#ifndef SV_CONST_RETURN +# define SV_CONST_RETURN 0 +#endif + +#ifndef SV_MUTABLE_RETURN +# define SV_MUTABLE_RETURN 0 +#endif + +#ifndef SV_SMAGIC +# define SV_SMAGIC 0 +#endif + +#ifndef SV_HAS_TRAILING_NUL +# define SV_HAS_TRAILING_NUL 0 +#endif + +#ifndef SV_COW_SHARED_HASH_KEYS +# define SV_COW_SHARED_HASH_KEYS 0 +#endif + +#if (PERL_BCDVERSION < 0x5007002) + +#if defined(NEED_sv_2pv_flags) +static char * DPPP_(my_sv_2pv_flags)(pTHX_ SV * sv, STRLEN * lp, I32 flags); +static +#else +extern char * DPPP_(my_sv_2pv_flags)(pTHX_ SV * sv, STRLEN * lp, I32 flags); +#endif + +#ifdef sv_2pv_flags +# undef sv_2pv_flags +#endif +#define sv_2pv_flags(a,b,c) DPPP_(my_sv_2pv_flags)(aTHX_ a,b,c) +#define Perl_sv_2pv_flags DPPP_(my_sv_2pv_flags) + +#if defined(NEED_sv_2pv_flags) || defined(NEED_sv_2pv_flags_GLOBAL) + +char * +DPPP_(my_sv_2pv_flags)(pTHX_ SV *sv, STRLEN *lp, I32 flags) +{ + STRLEN n_a = (STRLEN) flags; + return sv_2pv(sv, lp ? lp : &n_a); +} + +#endif + +#if defined(NEED_sv_pvn_force_flags) +static char * DPPP_(my_sv_pvn_force_flags)(pTHX_ SV * sv, STRLEN * lp, I32 flags); +static +#else +extern char * DPPP_(my_sv_pvn_force_flags)(pTHX_ SV * sv, STRLEN * lp, I32 flags); +#endif + +#ifdef sv_pvn_force_flags +# undef sv_pvn_force_flags +#endif +#define sv_pvn_force_flags(a,b,c) DPPP_(my_sv_pvn_force_flags)(aTHX_ a,b,c) +#define Perl_sv_pvn_force_flags DPPP_(my_sv_pvn_force_flags) + +#if defined(NEED_sv_pvn_force_flags) || defined(NEED_sv_pvn_force_flags_GLOBAL) + +char * +DPPP_(my_sv_pvn_force_flags)(pTHX_ SV *sv, STRLEN *lp, I32 flags) +{ + STRLEN n_a = (STRLEN) flags; + return sv_pvn_force(sv, lp ? lp : &n_a); +} + +#endif + +#endif +#ifndef SvPV_const +# define SvPV_const(sv, lp) SvPV_flags_const(sv, lp, SV_GMAGIC) +#endif + +#ifndef SvPV_mutable +# define SvPV_mutable(sv, lp) SvPV_flags_mutable(sv, lp, SV_GMAGIC) +#endif +#ifndef SvPV_flags +# define SvPV_flags(sv, lp, flags) \ + ((SvFLAGS(sv) & (SVf_POK)) == SVf_POK \ + ? ((lp = SvCUR(sv)), SvPVX(sv)) : sv_2pv_flags(sv, &lp, flags)) +#endif +#ifndef SvPV_flags_const +# define SvPV_flags_const(sv, lp, flags) \ + ((SvFLAGS(sv) & (SVf_POK)) == SVf_POK \ + ? ((lp = SvCUR(sv)), SvPVX_const(sv)) : \ + (const char*) sv_2pv_flags(sv, &lp, flags|SV_CONST_RETURN)) +#endif +#ifndef SvPV_flags_const_nolen +# define SvPV_flags_const_nolen(sv, flags) \ + ((SvFLAGS(sv) & (SVf_POK)) == SVf_POK \ + ? SvPVX_const(sv) : \ + (const char*) sv_2pv_flags(sv, 0, flags|SV_CONST_RETURN)) +#endif +#ifndef SvPV_flags_mutable +# define SvPV_flags_mutable(sv, lp, flags) \ + ((SvFLAGS(sv) & (SVf_POK)) == SVf_POK \ + ? ((lp = SvCUR(sv)), SvPVX_mutable(sv)) : \ + sv_2pv_flags(sv, &lp, flags|SV_MUTABLE_RETURN)) +#endif +#ifndef SvPV_force +# define SvPV_force(sv, lp) SvPV_force_flags(sv, lp, SV_GMAGIC) +#endif + +#ifndef SvPV_force_nolen +# define SvPV_force_nolen(sv) SvPV_force_flags_nolen(sv, SV_GMAGIC) +#endif + +#ifndef SvPV_force_mutable +# define SvPV_force_mutable(sv, lp) SvPV_force_flags_mutable(sv, lp, SV_GMAGIC) +#endif + +#ifndef SvPV_force_nomg +# define SvPV_force_nomg(sv, lp) SvPV_force_flags(sv, lp, 0) +#endif + +#ifndef SvPV_force_nomg_nolen +# define SvPV_force_nomg_nolen(sv) SvPV_force_flags_nolen(sv, 0) +#endif +#ifndef SvPV_force_flags +# define SvPV_force_flags(sv, lp, flags) \ + ((SvFLAGS(sv) & (SVf_POK|SVf_THINKFIRST)) == SVf_POK \ + ? ((lp = SvCUR(sv)), SvPVX(sv)) : sv_pvn_force_flags(sv, &lp, flags)) +#endif +#ifndef SvPV_force_flags_nolen +# define SvPV_force_flags_nolen(sv, flags) \ + ((SvFLAGS(sv) & (SVf_POK|SVf_THINKFIRST)) == SVf_POK \ + ? SvPVX(sv) : sv_pvn_force_flags(sv, 0, flags)) +#endif +#ifndef SvPV_force_flags_mutable +# define SvPV_force_flags_mutable(sv, lp, flags) \ + ((SvFLAGS(sv) & (SVf_POK|SVf_THINKFIRST)) == SVf_POK \ + ? ((lp = SvCUR(sv)), SvPVX_mutable(sv)) \ + : sv_pvn_force_flags(sv, &lp, flags|SV_MUTABLE_RETURN)) +#endif +#ifndef SvPV_nolen +# define SvPV_nolen(sv) \ + ((SvFLAGS(sv) & (SVf_POK)) == SVf_POK \ + ? SvPVX(sv) : sv_2pv_flags(sv, 0, SV_GMAGIC)) +#endif +#ifndef SvPV_nolen_const +# define SvPV_nolen_const(sv) \ + ((SvFLAGS(sv) & (SVf_POK)) == SVf_POK \ + ? SvPVX_const(sv) : sv_2pv_flags(sv, 0, SV_GMAGIC|SV_CONST_RETURN)) +#endif +#ifndef SvPV_nomg +# define SvPV_nomg(sv, lp) SvPV_flags(sv, lp, 0) +#endif + +#ifndef SvPV_nomg_const +# define SvPV_nomg_const(sv, lp) SvPV_flags_const(sv, lp, 0) +#endif + +#ifndef SvPV_nomg_const_nolen +# define SvPV_nomg_const_nolen(sv) SvPV_flags_const_nolen(sv, 0) +#endif +#ifndef SvMAGIC_set +# define SvMAGIC_set(sv, val) \ + STMT_START { assert(SvTYPE(sv) >= SVt_PVMG); \ + (((XPVMG*) SvANY(sv))->xmg_magic = (val)); } STMT_END +#endif + +#if (PERL_BCDVERSION < 0x5009003) +#ifndef SvPVX_const +# define SvPVX_const(sv) ((const char*) (0 + SvPVX(sv))) +#endif + +#ifndef SvPVX_mutable +# define SvPVX_mutable(sv) (0 + SvPVX(sv)) +#endif +#ifndef SvRV_set +# define SvRV_set(sv, val) \ + STMT_START { assert(SvTYPE(sv) >= SVt_RV); \ + (((XRV*) SvANY(sv))->xrv_rv = (val)); } STMT_END +#endif + +#else +#ifndef SvPVX_const +# define SvPVX_const(sv) ((const char*)((sv)->sv_u.svu_pv)) +#endif + +#ifndef SvPVX_mutable +# define SvPVX_mutable(sv) ((sv)->sv_u.svu_pv) +#endif +#ifndef SvRV_set +# define SvRV_set(sv, val) \ + STMT_START { assert(SvTYPE(sv) >= SVt_RV); \ + ((sv)->sv_u.svu_rv = (val)); } STMT_END +#endif + +#endif +#ifndef SvSTASH_set +# define SvSTASH_set(sv, val) \ + STMT_START { assert(SvTYPE(sv) >= SVt_PVMG); \ + (((XPVMG*) SvANY(sv))->xmg_stash = (val)); } STMT_END +#endif + +#if (PERL_BCDVERSION < 0x5004000) +#ifndef SvUV_set +# define SvUV_set(sv, val) \ + STMT_START { assert(SvTYPE(sv) == SVt_IV || SvTYPE(sv) >= SVt_PVIV); \ + (((XPVIV*) SvANY(sv))->xiv_iv = (IV) (val)); } STMT_END +#endif + +#else +#ifndef SvUV_set +# define SvUV_set(sv, val) \ + STMT_START { assert(SvTYPE(sv) == SVt_IV || SvTYPE(sv) >= SVt_PVIV); \ + (((XPVUV*) SvANY(sv))->xuv_uv = (val)); } STMT_END +#endif + +#endif + +#if (PERL_BCDVERSION >= 0x5004000) && !defined(vnewSVpvf) +#if defined(NEED_vnewSVpvf) +static SV * DPPP_(my_vnewSVpvf)(pTHX_ const char * pat, va_list * args); +static +#else +extern SV * DPPP_(my_vnewSVpvf)(pTHX_ const char * pat, va_list * args); +#endif + +#ifdef vnewSVpvf +# undef vnewSVpvf +#endif +#define vnewSVpvf(a,b) DPPP_(my_vnewSVpvf)(aTHX_ a,b) +#define Perl_vnewSVpvf DPPP_(my_vnewSVpvf) + +#if defined(NEED_vnewSVpvf) || defined(NEED_vnewSVpvf_GLOBAL) + +SV * +DPPP_(my_vnewSVpvf)(pTHX_ const char *pat, va_list *args) +{ + register SV *sv = newSV(0); + sv_vsetpvfn(sv, pat, strlen(pat), args, Null(SV**), 0, Null(bool*)); + return sv; +} + +#endif +#endif + +#if (PERL_BCDVERSION >= 0x5004000) && !defined(sv_vcatpvf) +# define sv_vcatpvf(sv, pat, args) sv_vcatpvfn(sv, pat, strlen(pat), args, Null(SV**), 0, Null(bool*)) +#endif + +#if (PERL_BCDVERSION >= 0x5004000) && !defined(sv_vsetpvf) +# define sv_vsetpvf(sv, pat, args) sv_vsetpvfn(sv, pat, strlen(pat), args, Null(SV**), 0, Null(bool*)) +#endif + +#if (PERL_BCDVERSION >= 0x5004000) && !defined(sv_catpvf_mg) +#if defined(NEED_sv_catpvf_mg) +static void DPPP_(my_sv_catpvf_mg)(pTHX_ SV * sv, const char * pat, ...); +static +#else +extern void DPPP_(my_sv_catpvf_mg)(pTHX_ SV * sv, const char * pat, ...); +#endif + +#define Perl_sv_catpvf_mg DPPP_(my_sv_catpvf_mg) + +#if defined(NEED_sv_catpvf_mg) || defined(NEED_sv_catpvf_mg_GLOBAL) + +void +DPPP_(my_sv_catpvf_mg)(pTHX_ SV *sv, const char *pat, ...) +{ + va_list args; + va_start(args, pat); + sv_vcatpvfn(sv, pat, strlen(pat), &args, Null(SV**), 0, Null(bool*)); + SvSETMAGIC(sv); + va_end(args); +} + +#endif +#endif + +#ifdef PERL_IMPLICIT_CONTEXT +#if (PERL_BCDVERSION >= 0x5004000) && !defined(sv_catpvf_mg_nocontext) +#if defined(NEED_sv_catpvf_mg_nocontext) +static void DPPP_(my_sv_catpvf_mg_nocontext)(SV * sv, const char * pat, ...); +static +#else +extern void DPPP_(my_sv_catpvf_mg_nocontext)(SV * sv, const char * pat, ...); +#endif + +#define sv_catpvf_mg_nocontext DPPP_(my_sv_catpvf_mg_nocontext) +#define Perl_sv_catpvf_mg_nocontext DPPP_(my_sv_catpvf_mg_nocontext) + +#if defined(NEED_sv_catpvf_mg_nocontext) || defined(NEED_sv_catpvf_mg_nocontext_GLOBAL) + +void +DPPP_(my_sv_catpvf_mg_nocontext)(SV *sv, const char *pat, ...) +{ + dTHX; + va_list args; + va_start(args, pat); + sv_vcatpvfn(sv, pat, strlen(pat), &args, Null(SV**), 0, Null(bool*)); + SvSETMAGIC(sv); + va_end(args); +} + +#endif +#endif +#endif + +/* sv_catpvf_mg depends on sv_catpvf_mg_nocontext */ +#ifndef sv_catpvf_mg +# ifdef PERL_IMPLICIT_CONTEXT +# define sv_catpvf_mg Perl_sv_catpvf_mg_nocontext +# else +# define sv_catpvf_mg Perl_sv_catpvf_mg +# endif +#endif + +#if (PERL_BCDVERSION >= 0x5004000) && !defined(sv_vcatpvf_mg) +# define sv_vcatpvf_mg(sv, pat, args) \ + STMT_START { \ + sv_vcatpvfn(sv, pat, strlen(pat), args, Null(SV**), 0, Null(bool*)); \ + SvSETMAGIC(sv); \ + } STMT_END +#endif + +#if (PERL_BCDVERSION >= 0x5004000) && !defined(sv_setpvf_mg) +#if defined(NEED_sv_setpvf_mg) +static void DPPP_(my_sv_setpvf_mg)(pTHX_ SV * sv, const char * pat, ...); +static +#else +extern void DPPP_(my_sv_setpvf_mg)(pTHX_ SV * sv, const char * pat, ...); +#endif + +#define Perl_sv_setpvf_mg DPPP_(my_sv_setpvf_mg) + +#if defined(NEED_sv_setpvf_mg) || defined(NEED_sv_setpvf_mg_GLOBAL) + +void +DPPP_(my_sv_setpvf_mg)(pTHX_ SV *sv, const char *pat, ...) +{ + va_list args; + va_start(args, pat); + sv_vsetpvfn(sv, pat, strlen(pat), &args, Null(SV**), 0, Null(bool*)); + SvSETMAGIC(sv); + va_end(args); +} + +#endif +#endif + +#ifdef PERL_IMPLICIT_CONTEXT +#if (PERL_BCDVERSION >= 0x5004000) && !defined(sv_setpvf_mg_nocontext) +#if defined(NEED_sv_setpvf_mg_nocontext) +static void DPPP_(my_sv_setpvf_mg_nocontext)(SV * sv, const char * pat, ...); +static +#else +extern void DPPP_(my_sv_setpvf_mg_nocontext)(SV * sv, const char * pat, ...); +#endif + +#define sv_setpvf_mg_nocontext DPPP_(my_sv_setpvf_mg_nocontext) +#define Perl_sv_setpvf_mg_nocontext DPPP_(my_sv_setpvf_mg_nocontext) + +#if defined(NEED_sv_setpvf_mg_nocontext) || defined(NEED_sv_setpvf_mg_nocontext_GLOBAL) + +void +DPPP_(my_sv_setpvf_mg_nocontext)(SV *sv, const char *pat, ...) +{ + dTHX; + va_list args; + va_start(args, pat); + sv_vsetpvfn(sv, pat, strlen(pat), &args, Null(SV**), 0, Null(bool*)); + SvSETMAGIC(sv); + va_end(args); +} + +#endif +#endif +#endif + +/* sv_setpvf_mg depends on sv_setpvf_mg_nocontext */ +#ifndef sv_setpvf_mg +# ifdef PERL_IMPLICIT_CONTEXT +# define sv_setpvf_mg Perl_sv_setpvf_mg_nocontext +# else +# define sv_setpvf_mg Perl_sv_setpvf_mg +# endif +#endif + +#if (PERL_BCDVERSION >= 0x5004000) && !defined(sv_vsetpvf_mg) +# define sv_vsetpvf_mg(sv, pat, args) \ + STMT_START { \ + sv_vsetpvfn(sv, pat, strlen(pat), args, Null(SV**), 0, Null(bool*)); \ + SvSETMAGIC(sv); \ + } STMT_END +#endif + +#ifndef newSVpvn_share + +#if defined(NEED_newSVpvn_share) +static SV * DPPP_(my_newSVpvn_share)(pTHX_ const char *src, I32 len, U32 hash); +static +#else +extern SV * DPPP_(my_newSVpvn_share)(pTHX_ const char *src, I32 len, U32 hash); +#endif + +#ifdef newSVpvn_share +# undef newSVpvn_share +#endif +#define newSVpvn_share(a,b,c) DPPP_(my_newSVpvn_share)(aTHX_ a,b,c) +#define Perl_newSVpvn_share DPPP_(my_newSVpvn_share) + +#if defined(NEED_newSVpvn_share) || defined(NEED_newSVpvn_share_GLOBAL) + +SV * +DPPP_(my_newSVpvn_share)(pTHX_ const char *src, I32 len, U32 hash) +{ + SV *sv; + if (len < 0) + len = -len; + if (!hash) + PERL_HASH(hash, (char*) src, len); + sv = newSVpvn((char *) src, len); + sv_upgrade(sv, SVt_PVIV); + SvIVX(sv) = hash; + SvREADONLY_on(sv); + SvPOK_on(sv); + return sv; +} + +#endif + +#endif +#ifndef SvSHARED_HASH +# define SvSHARED_HASH(sv) (0 + SvUVX(sv)) +#endif +#ifndef WARN_ALL +# define WARN_ALL 0 +#endif + +#ifndef WARN_CLOSURE +# define WARN_CLOSURE 1 +#endif + +#ifndef WARN_DEPRECATED +# define WARN_DEPRECATED 2 +#endif + +#ifndef WARN_EXITING +# define WARN_EXITING 3 +#endif + +#ifndef WARN_GLOB +# define WARN_GLOB 4 +#endif + +#ifndef WARN_IO +# define WARN_IO 5 +#endif + +#ifndef WARN_CLOSED +# define WARN_CLOSED 6 +#endif + +#ifndef WARN_EXEC +# define WARN_EXEC 7 +#endif + +#ifndef WARN_LAYER +# define WARN_LAYER 8 +#endif + +#ifndef WARN_NEWLINE +# define WARN_NEWLINE 9 +#endif + +#ifndef WARN_PIPE +# define WARN_PIPE 10 +#endif + +#ifndef WARN_UNOPENED +# define WARN_UNOPENED 11 +#endif + +#ifndef WARN_MISC +# define WARN_MISC 12 +#endif + +#ifndef WARN_NUMERIC +# define WARN_NUMERIC 13 +#endif + +#ifndef WARN_ONCE +# define WARN_ONCE 14 +#endif + +#ifndef WARN_OVERFLOW +# define WARN_OVERFLOW 15 +#endif + +#ifndef WARN_PACK +# define WARN_PACK 16 +#endif + +#ifndef WARN_PORTABLE +# define WARN_PORTABLE 17 +#endif + +#ifndef WARN_RECURSION +# define WARN_RECURSION 18 +#endif + +#ifndef WARN_REDEFINE +# define WARN_REDEFINE 19 +#endif + +#ifndef WARN_REGEXP +# define WARN_REGEXP 20 +#endif + +#ifndef WARN_SEVERE +# define WARN_SEVERE 21 +#endif + +#ifndef WARN_DEBUGGING +# define WARN_DEBUGGING 22 +#endif + +#ifndef WARN_INPLACE +# define WARN_INPLACE 23 +#endif + +#ifndef WARN_INTERNAL +# define WARN_INTERNAL 24 +#endif + +#ifndef WARN_MALLOC +# define WARN_MALLOC 25 +#endif + +#ifndef WARN_SIGNAL +# define WARN_SIGNAL 26 +#endif + +#ifndef WARN_SUBSTR +# define WARN_SUBSTR 27 +#endif + +#ifndef WARN_SYNTAX +# define WARN_SYNTAX 28 +#endif + +#ifndef WARN_AMBIGUOUS +# define WARN_AMBIGUOUS 29 +#endif + +#ifndef WARN_BAREWORD +# define WARN_BAREWORD 30 +#endif + +#ifndef WARN_DIGIT +# define WARN_DIGIT 31 +#endif + +#ifndef WARN_PARENTHESIS +# define WARN_PARENTHESIS 32 +#endif + +#ifndef WARN_PRECEDENCE +# define WARN_PRECEDENCE 33 +#endif + +#ifndef WARN_PRINTF +# define WARN_PRINTF 34 +#endif + +#ifndef WARN_PROTOTYPE +# define WARN_PROTOTYPE 35 +#endif + +#ifndef WARN_QW +# define WARN_QW 36 +#endif + +#ifndef WARN_RESERVED +# define WARN_RESERVED 37 +#endif + +#ifndef WARN_SEMICOLON +# define WARN_SEMICOLON 38 +#endif + +#ifndef WARN_TAINT +# define WARN_TAINT 39 +#endif + +#ifndef WARN_THREADS +# define WARN_THREADS 40 +#endif + +#ifndef WARN_UNINITIALIZED +# define WARN_UNINITIALIZED 41 +#endif + +#ifndef WARN_UNPACK +# define WARN_UNPACK 42 +#endif + +#ifndef WARN_UNTIE +# define WARN_UNTIE 43 +#endif + +#ifndef WARN_UTF8 +# define WARN_UTF8 44 +#endif + +#ifndef WARN_VOID +# define WARN_VOID 45 +#endif + +#ifndef WARN_ASSERTIONS +# define WARN_ASSERTIONS 46 +#endif +#ifndef packWARN +# define packWARN(a) (a) +#endif + +#ifndef ckWARN +# ifdef G_WARN_ON +# define ckWARN(a) (PL_dowarn & G_WARN_ON) +# else +# define ckWARN(a) PL_dowarn +# endif +#endif + +#if (PERL_BCDVERSION >= 0x5004000) && !defined(warner) +#if defined(NEED_warner) +static void DPPP_(my_warner)(U32 err, const char *pat, ...); +static +#else +extern void DPPP_(my_warner)(U32 err, const char *pat, ...); +#endif + +#define Perl_warner DPPP_(my_warner) + +#if defined(NEED_warner) || defined(NEED_warner_GLOBAL) + +void +DPPP_(my_warner)(U32 err, const char *pat, ...) +{ + SV *sv; + va_list args; + + PERL_UNUSED_ARG(err); + + va_start(args, pat); + sv = vnewSVpvf(pat, &args); + va_end(args); + sv_2mortal(sv); + warn("%s", SvPV_nolen(sv)); +} + +#define warner Perl_warner + +#define Perl_warner_nocontext Perl_warner + +#endif +#endif + +/* concatenating with "" ensures that only literal strings are accepted as argument + * note that STR_WITH_LEN() can't be used as argument to macros or functions that + * under some configurations might be macros + */ +#ifndef STR_WITH_LEN +# define STR_WITH_LEN(s) (s ""), (sizeof(s)-1) +#endif +#ifndef newSVpvs +# define newSVpvs(str) newSVpvn(str "", sizeof(str) - 1) +#endif + +#ifndef sv_catpvs +# define sv_catpvs(sv, str) sv_catpvn(sv, str "", sizeof(str) - 1) +#endif + +#ifndef sv_setpvs +# define sv_setpvs(sv, str) sv_setpvn(sv, str "", sizeof(str) - 1) +#endif + +#ifndef hv_fetchs +# define hv_fetchs(hv, key, lval) hv_fetch(hv, key "", sizeof(key) - 1, lval) +#endif + +#ifndef hv_stores +# define hv_stores(hv, key, val) hv_store(hv, key "", sizeof(key) - 1, val, 0) +#endif +#ifndef SvGETMAGIC +# define SvGETMAGIC(x) STMT_START { if (SvGMAGICAL(x)) mg_get(x); } STMT_END +#endif +#ifndef PERL_MAGIC_sv +# define PERL_MAGIC_sv '\0' +#endif + +#ifndef PERL_MAGIC_overload +# define PERL_MAGIC_overload 'A' +#endif + +#ifndef PERL_MAGIC_overload_elem +# define PERL_MAGIC_overload_elem 'a' +#endif + +#ifndef PERL_MAGIC_overload_table +# define PERL_MAGIC_overload_table 'c' +#endif + +#ifndef PERL_MAGIC_bm +# define PERL_MAGIC_bm 'B' +#endif + +#ifndef PERL_MAGIC_regdata +# define PERL_MAGIC_regdata 'D' +#endif + +#ifndef PERL_MAGIC_regdatum +# define PERL_MAGIC_regdatum 'd' +#endif + +#ifndef PERL_MAGIC_env +# define PERL_MAGIC_env 'E' +#endif + +#ifndef PERL_MAGIC_envelem +# define PERL_MAGIC_envelem 'e' +#endif + +#ifndef PERL_MAGIC_fm +# define PERL_MAGIC_fm 'f' +#endif + +#ifndef PERL_MAGIC_regex_global +# define PERL_MAGIC_regex_global 'g' +#endif + +#ifndef PERL_MAGIC_isa +# define PERL_MAGIC_isa 'I' +#endif + +#ifndef PERL_MAGIC_isaelem +# define PERL_MAGIC_isaelem 'i' +#endif + +#ifndef PERL_MAGIC_nkeys +# define PERL_MAGIC_nkeys 'k' +#endif + +#ifndef PERL_MAGIC_dbfile +# define PERL_MAGIC_dbfile 'L' +#endif + +#ifndef PERL_MAGIC_dbline +# define PERL_MAGIC_dbline 'l' +#endif + +#ifndef PERL_MAGIC_mutex +# define PERL_MAGIC_mutex 'm' +#endif + +#ifndef PERL_MAGIC_shared +# define PERL_MAGIC_shared 'N' +#endif + +#ifndef PERL_MAGIC_shared_scalar +# define PERL_MAGIC_shared_scalar 'n' +#endif + +#ifndef PERL_MAGIC_collxfrm +# define PERL_MAGIC_collxfrm 'o' +#endif + +#ifndef PERL_MAGIC_tied +# define PERL_MAGIC_tied 'P' +#endif + +#ifndef PERL_MAGIC_tiedelem +# define PERL_MAGIC_tiedelem 'p' +#endif + +#ifndef PERL_MAGIC_tiedscalar +# define PERL_MAGIC_tiedscalar 'q' +#endif + +#ifndef PERL_MAGIC_qr +# define PERL_MAGIC_qr 'r' +#endif + +#ifndef PERL_MAGIC_sig +# define PERL_MAGIC_sig 'S' +#endif + +#ifndef PERL_MAGIC_sigelem +# define PERL_MAGIC_sigelem 's' +#endif + +#ifndef PERL_MAGIC_taint +# define PERL_MAGIC_taint 't' +#endif + +#ifndef PERL_MAGIC_uvar +# define PERL_MAGIC_uvar 'U' +#endif + +#ifndef PERL_MAGIC_uvar_elem +# define PERL_MAGIC_uvar_elem 'u' +#endif + +#ifndef PERL_MAGIC_vstring +# define PERL_MAGIC_vstring 'V' +#endif + +#ifndef PERL_MAGIC_vec +# define PERL_MAGIC_vec 'v' +#endif + +#ifndef PERL_MAGIC_utf8 +# define PERL_MAGIC_utf8 'w' +#endif + +#ifndef PERL_MAGIC_substr +# define PERL_MAGIC_substr 'x' +#endif + +#ifndef PERL_MAGIC_defelem +# define PERL_MAGIC_defelem 'y' +#endif + +#ifndef PERL_MAGIC_glob +# define PERL_MAGIC_glob '*' +#endif + +#ifndef PERL_MAGIC_arylen +# define PERL_MAGIC_arylen '#' +#endif + +#ifndef PERL_MAGIC_pos +# define PERL_MAGIC_pos '.' +#endif + +#ifndef PERL_MAGIC_backref +# define PERL_MAGIC_backref '<' +#endif + +#ifndef PERL_MAGIC_ext +# define PERL_MAGIC_ext '~' +#endif + +/* That's the best we can do... */ +#ifndef sv_catpvn_nomg +# define sv_catpvn_nomg sv_catpvn +#endif + +#ifndef sv_catsv_nomg +# define sv_catsv_nomg sv_catsv +#endif + +#ifndef sv_setsv_nomg +# define sv_setsv_nomg sv_setsv +#endif + +#ifndef sv_pvn_nomg +# define sv_pvn_nomg sv_pvn +#endif + +#ifndef SvIV_nomg +# define SvIV_nomg SvIV +#endif + +#ifndef SvUV_nomg +# define SvUV_nomg SvUV +#endif + +#ifndef sv_catpv_mg +# define sv_catpv_mg(sv, ptr) \ + STMT_START { \ + SV *TeMpSv = sv; \ + sv_catpv(TeMpSv,ptr); \ + SvSETMAGIC(TeMpSv); \ + } STMT_END +#endif + +#ifndef sv_catpvn_mg +# define sv_catpvn_mg(sv, ptr, len) \ + STMT_START { \ + SV *TeMpSv = sv; \ + sv_catpvn(TeMpSv,ptr,len); \ + SvSETMAGIC(TeMpSv); \ + } STMT_END +#endif + +#ifndef sv_catsv_mg +# define sv_catsv_mg(dsv, ssv) \ + STMT_START { \ + SV *TeMpSv = dsv; \ + sv_catsv(TeMpSv,ssv); \ + SvSETMAGIC(TeMpSv); \ + } STMT_END +#endif + +#ifndef sv_setiv_mg +# define sv_setiv_mg(sv, i) \ + STMT_START { \ + SV *TeMpSv = sv; \ + sv_setiv(TeMpSv,i); \ + SvSETMAGIC(TeMpSv); \ + } STMT_END +#endif + +#ifndef sv_setnv_mg +# define sv_setnv_mg(sv, num) \ + STMT_START { \ + SV *TeMpSv = sv; \ + sv_setnv(TeMpSv,num); \ + SvSETMAGIC(TeMpSv); \ + } STMT_END +#endif + +#ifndef sv_setpv_mg +# define sv_setpv_mg(sv, ptr) \ + STMT_START { \ + SV *TeMpSv = sv; \ + sv_setpv(TeMpSv,ptr); \ + SvSETMAGIC(TeMpSv); \ + } STMT_END +#endif + +#ifndef sv_setpvn_mg +# define sv_setpvn_mg(sv, ptr, len) \ + STMT_START { \ + SV *TeMpSv = sv; \ + sv_setpvn(TeMpSv,ptr,len); \ + SvSETMAGIC(TeMpSv); \ + } STMT_END +#endif + +#ifndef sv_setsv_mg +# define sv_setsv_mg(dsv, ssv) \ + STMT_START { \ + SV *TeMpSv = dsv; \ + sv_setsv(TeMpSv,ssv); \ + SvSETMAGIC(TeMpSv); \ + } STMT_END +#endif + +#ifndef sv_setuv_mg +# define sv_setuv_mg(sv, i) \ + STMT_START { \ + SV *TeMpSv = sv; \ + sv_setuv(TeMpSv,i); \ + SvSETMAGIC(TeMpSv); \ + } STMT_END +#endif + +#ifndef sv_usepvn_mg +# define sv_usepvn_mg(sv, ptr, len) \ + STMT_START { \ + SV *TeMpSv = sv; \ + sv_usepvn(TeMpSv,ptr,len); \ + SvSETMAGIC(TeMpSv); \ + } STMT_END +#endif +#ifndef SvVSTRING_mg +# define SvVSTRING_mg(sv) (SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_vstring) : NULL) +#endif + +/* Hint: sv_magic_portable + * This is a compatibility function that is only available with + * Devel::PPPort. It is NOT in the perl core. + * Its purpose is to mimic the 5.8.0 behaviour of sv_magic() when + * it is being passed a name pointer with namlen == 0. In that + * case, perl 5.8.0 and later store the pointer, not a copy of it. + * The compatibility can be provided back to perl 5.004. With + * earlier versions, the code will not compile. + */ + +#if (PERL_BCDVERSION < 0x5004000) + + /* code that uses sv_magic_portable will not compile */ + +#elif (PERL_BCDVERSION < 0x5008000) + +# define sv_magic_portable(sv, obj, how, name, namlen) \ + STMT_START { \ + SV *SvMp_sv = (sv); \ + char *SvMp_name = (char *) (name); \ + I32 SvMp_namlen = (namlen); \ + if (SvMp_name && SvMp_namlen == 0) \ + { \ + MAGIC *mg; \ + sv_magic(SvMp_sv, obj, how, 0, 0); \ + mg = SvMAGIC(SvMp_sv); \ + mg->mg_len = -42; /* XXX: this is the tricky part */ \ + mg->mg_ptr = SvMp_name; \ + } \ + else \ + { \ + sv_magic(SvMp_sv, obj, how, SvMp_name, SvMp_namlen); \ + } \ + } STMT_END + +#else + +# define sv_magic_portable(a, b, c, d, e) sv_magic(a, b, c, d, e) + +#endif + +#ifdef USE_ITHREADS +#ifndef CopFILE +# define CopFILE(c) ((c)->cop_file) +#endif + +#ifndef CopFILEGV +# define CopFILEGV(c) (CopFILE(c) ? gv_fetchfile(CopFILE(c)) : Nullgv) +#endif + +#ifndef CopFILE_set +# define CopFILE_set(c,pv) ((c)->cop_file = savepv(pv)) +#endif + +#ifndef CopFILESV +# define CopFILESV(c) (CopFILE(c) ? GvSV(gv_fetchfile(CopFILE(c))) : Nullsv) +#endif + +#ifndef CopFILEAV +# define CopFILEAV(c) (CopFILE(c) ? GvAV(gv_fetchfile(CopFILE(c))) : Nullav) +#endif + +#ifndef CopSTASHPV +# define CopSTASHPV(c) ((c)->cop_stashpv) +#endif + +#ifndef CopSTASHPV_set +# define CopSTASHPV_set(c,pv) ((c)->cop_stashpv = ((pv) ? savepv(pv) : Nullch)) +#endif + +#ifndef CopSTASH +# define CopSTASH(c) (CopSTASHPV(c) ? gv_stashpv(CopSTASHPV(c),GV_ADD) : Nullhv) +#endif + +#ifndef CopSTASH_set +# define CopSTASH_set(c,hv) CopSTASHPV_set(c, (hv) ? HvNAME(hv) : Nullch) +#endif + +#ifndef CopSTASH_eq +# define CopSTASH_eq(c,hv) ((hv) && (CopSTASHPV(c) == HvNAME(hv) \ + || (CopSTASHPV(c) && HvNAME(hv) \ + && strEQ(CopSTASHPV(c), HvNAME(hv))))) +#endif + +#else +#ifndef CopFILEGV +# define CopFILEGV(c) ((c)->cop_filegv) +#endif + +#ifndef CopFILEGV_set +# define CopFILEGV_set(c,gv) ((c)->cop_filegv = (GV*)SvREFCNT_inc(gv)) +#endif + +#ifndef CopFILE_set +# define CopFILE_set(c,pv) CopFILEGV_set((c), gv_fetchfile(pv)) +#endif + +#ifndef CopFILESV +# define CopFILESV(c) (CopFILEGV(c) ? GvSV(CopFILEGV(c)) : Nullsv) +#endif + +#ifndef CopFILEAV +# define CopFILEAV(c) (CopFILEGV(c) ? GvAV(CopFILEGV(c)) : Nullav) +#endif + +#ifndef CopFILE +# define CopFILE(c) (CopFILESV(c) ? SvPVX(CopFILESV(c)) : Nullch) +#endif + +#ifndef CopSTASH +# define CopSTASH(c) ((c)->cop_stash) +#endif + +#ifndef CopSTASH_set +# define CopSTASH_set(c,hv) ((c)->cop_stash = (hv)) +#endif + +#ifndef CopSTASHPV +# define CopSTASHPV(c) (CopSTASH(c) ? HvNAME(CopSTASH(c)) : Nullch) +#endif + +#ifndef CopSTASHPV_set +# define CopSTASHPV_set(c,pv) CopSTASH_set((c), gv_stashpv(pv,GV_ADD)) +#endif + +#ifndef CopSTASH_eq +# define CopSTASH_eq(c,hv) (CopSTASH(c) == (hv)) +#endif + +#endif /* USE_ITHREADS */ +#ifndef IN_PERL_COMPILETIME +# define IN_PERL_COMPILETIME (PL_curcop == &PL_compiling) +#endif + +#ifndef IN_LOCALE_RUNTIME +# define IN_LOCALE_RUNTIME (PL_curcop->op_private & HINT_LOCALE) +#endif + +#ifndef IN_LOCALE_COMPILETIME +# define IN_LOCALE_COMPILETIME (PL_hints & HINT_LOCALE) +#endif + +#ifndef IN_LOCALE +# define IN_LOCALE (IN_PERL_COMPILETIME ? IN_LOCALE_COMPILETIME : IN_LOCALE_RUNTIME) +#endif +#ifndef IS_NUMBER_IN_UV +# define IS_NUMBER_IN_UV 0x01 +#endif + +#ifndef IS_NUMBER_GREATER_THAN_UV_MAX +# define IS_NUMBER_GREATER_THAN_UV_MAX 0x02 +#endif + +#ifndef IS_NUMBER_NOT_INT +# define IS_NUMBER_NOT_INT 0x04 +#endif + +#ifndef IS_NUMBER_NEG +# define IS_NUMBER_NEG 0x08 +#endif + +#ifndef IS_NUMBER_INFINITY +# define IS_NUMBER_INFINITY 0x10 +#endif + +#ifndef IS_NUMBER_NAN +# define IS_NUMBER_NAN 0x20 +#endif +#ifndef GROK_NUMERIC_RADIX +# define GROK_NUMERIC_RADIX(sp, send) grok_numeric_radix(sp, send) +#endif +#ifndef PERL_SCAN_GREATER_THAN_UV_MAX +# define PERL_SCAN_GREATER_THAN_UV_MAX 0x02 +#endif + +#ifndef PERL_SCAN_SILENT_ILLDIGIT +# define PERL_SCAN_SILENT_ILLDIGIT 0x04 +#endif + +#ifndef PERL_SCAN_ALLOW_UNDERSCORES +# define PERL_SCAN_ALLOW_UNDERSCORES 0x01 +#endif + +#ifndef PERL_SCAN_DISALLOW_PREFIX +# define PERL_SCAN_DISALLOW_PREFIX 0x02 +#endif + +#ifndef grok_numeric_radix +#if defined(NEED_grok_numeric_radix) +static bool DPPP_(my_grok_numeric_radix)(pTHX_ const char ** sp, const char * send); +static +#else +extern bool DPPP_(my_grok_numeric_radix)(pTHX_ const char ** sp, const char * send); +#endif + +#ifdef grok_numeric_radix +# undef grok_numeric_radix +#endif +#define grok_numeric_radix(a,b) DPPP_(my_grok_numeric_radix)(aTHX_ a,b) +#define Perl_grok_numeric_radix DPPP_(my_grok_numeric_radix) + +#if defined(NEED_grok_numeric_radix) || defined(NEED_grok_numeric_radix_GLOBAL) +bool +DPPP_(my_grok_numeric_radix)(pTHX_ const char **sp, const char *send) +{ +#ifdef USE_LOCALE_NUMERIC +#ifdef PL_numeric_radix_sv + if (PL_numeric_radix_sv && IN_LOCALE) { + STRLEN len; + char* radix = SvPV(PL_numeric_radix_sv, len); + if (*sp + len <= send && memEQ(*sp, radix, len)) { + *sp += len; + return TRUE; + } + } +#else + /* older perls don't have PL_numeric_radix_sv so the radix + * must manually be requested from locale.h + */ +#include + dTHR; /* needed for older threaded perls */ + struct lconv *lc = localeconv(); + char *radix = lc->decimal_point; + if (radix && IN_LOCALE) { + STRLEN len = strlen(radix); + if (*sp + len <= send && memEQ(*sp, radix, len)) { + *sp += len; + return TRUE; + } + } +#endif +#endif /* USE_LOCALE_NUMERIC */ + /* always try "." if numeric radix didn't match because + * we may have data from different locales mixed */ + if (*sp < send && **sp == '.') { + ++*sp; + return TRUE; + } + return FALSE; +} +#endif +#endif + +#ifndef grok_number +#if defined(NEED_grok_number) +static int DPPP_(my_grok_number)(pTHX_ const char * pv, STRLEN len, UV * valuep); +static +#else +extern int DPPP_(my_grok_number)(pTHX_ const char * pv, STRLEN len, UV * valuep); +#endif + +#ifdef grok_number +# undef grok_number +#endif +#define grok_number(a,b,c) DPPP_(my_grok_number)(aTHX_ a,b,c) +#define Perl_grok_number DPPP_(my_grok_number) + +#if defined(NEED_grok_number) || defined(NEED_grok_number_GLOBAL) +int +DPPP_(my_grok_number)(pTHX_ const char *pv, STRLEN len, UV *valuep) +{ + const char *s = pv; + const char *send = pv + len; + const UV max_div_10 = UV_MAX / 10; + const char max_mod_10 = UV_MAX % 10; + int numtype = 0; + int sawinf = 0; + int sawnan = 0; + + while (s < send && isSPACE(*s)) + s++; + if (s == send) { + return 0; + } else if (*s == '-') { + s++; + numtype = IS_NUMBER_NEG; + } + else if (*s == '+') + s++; + + if (s == send) + return 0; + + /* next must be digit or the radix separator or beginning of infinity */ + if (isDIGIT(*s)) { + /* UVs are at least 32 bits, so the first 9 decimal digits cannot + overflow. */ + UV value = *s - '0'; + /* This construction seems to be more optimiser friendly. + (without it gcc does the isDIGIT test and the *s - '0' separately) + With it gcc on arm is managing 6 instructions (6 cycles) per digit. + In theory the optimiser could deduce how far to unroll the loop + before checking for overflow. */ + if (++s < send) { + int digit = *s - '0'; + if (digit >= 0 && digit <= 9) { + value = value * 10 + digit; + if (++s < send) { + digit = *s - '0'; + if (digit >= 0 && digit <= 9) { + value = value * 10 + digit; + if (++s < send) { + digit = *s - '0'; + if (digit >= 0 && digit <= 9) { + value = value * 10 + digit; + if (++s < send) { + digit = *s - '0'; + if (digit >= 0 && digit <= 9) { + value = value * 10 + digit; + if (++s < send) { + digit = *s - '0'; + if (digit >= 0 && digit <= 9) { + value = value * 10 + digit; + if (++s < send) { + digit = *s - '0'; + if (digit >= 0 && digit <= 9) { + value = value * 10 + digit; + if (++s < send) { + digit = *s - '0'; + if (digit >= 0 && digit <= 9) { + value = value * 10 + digit; + if (++s < send) { + digit = *s - '0'; + if (digit >= 0 && digit <= 9) { + value = value * 10 + digit; + if (++s < send) { + /* Now got 9 digits, so need to check + each time for overflow. */ + digit = *s - '0'; + while (digit >= 0 && digit <= 9 + && (value < max_div_10 + || (value == max_div_10 + && digit <= max_mod_10))) { + value = value * 10 + digit; + if (++s < send) + digit = *s - '0'; + else + break; + } + if (digit >= 0 && digit <= 9 + && (s < send)) { + /* value overflowed. + skip the remaining digits, don't + worry about setting *valuep. */ + do { + s++; + } while (s < send && isDIGIT(*s)); + numtype |= + IS_NUMBER_GREATER_THAN_UV_MAX; + goto skip_value; + } + } + } + } + } + } + } + } + } + } + } + } + } + } + } + } + } + } + numtype |= IS_NUMBER_IN_UV; + if (valuep) + *valuep = value; + + skip_value: + if (GROK_NUMERIC_RADIX(&s, send)) { + numtype |= IS_NUMBER_NOT_INT; + while (s < send && isDIGIT(*s)) /* optional digits after the radix */ + s++; + } + } + else if (GROK_NUMERIC_RADIX(&s, send)) { + numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */ + /* no digits before the radix means we need digits after it */ + if (s < send && isDIGIT(*s)) { + do { + s++; + } while (s < send && isDIGIT(*s)); + if (valuep) { + /* integer approximation is valid - it's 0. */ + *valuep = 0; + } + } + else + return 0; + } else if (*s == 'I' || *s == 'i') { + s++; if (s == send || (*s != 'N' && *s != 'n')) return 0; + s++; if (s == send || (*s != 'F' && *s != 'f')) return 0; + s++; if (s < send && (*s == 'I' || *s == 'i')) { + s++; if (s == send || (*s != 'N' && *s != 'n')) return 0; + s++; if (s == send || (*s != 'I' && *s != 'i')) return 0; + s++; if (s == send || (*s != 'T' && *s != 't')) return 0; + s++; if (s == send || (*s != 'Y' && *s != 'y')) return 0; + s++; + } + sawinf = 1; + } else if (*s == 'N' || *s == 'n') { + /* XXX TODO: There are signaling NaNs and quiet NaNs. */ + s++; if (s == send || (*s != 'A' && *s != 'a')) return 0; + s++; if (s == send || (*s != 'N' && *s != 'n')) return 0; + s++; + sawnan = 1; + } else + return 0; + + if (sawinf) { + numtype &= IS_NUMBER_NEG; /* Keep track of sign */ + numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT; + } else if (sawnan) { + numtype &= IS_NUMBER_NEG; /* Keep track of sign */ + numtype |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT; + } else if (s < send) { + /* we can have an optional exponent part */ + if (*s == 'e' || *s == 'E') { + /* The only flag we keep is sign. Blow away any "it's UV" */ + numtype &= IS_NUMBER_NEG; + numtype |= IS_NUMBER_NOT_INT; + s++; + if (s < send && (*s == '-' || *s == '+')) + s++; + if (s < send && isDIGIT(*s)) { + do { + s++; + } while (s < send && isDIGIT(*s)); + } + else + return 0; + } + } + while (s < send && isSPACE(*s)) + s++; + if (s >= send) + return numtype; + if (len == 10 && memEQ(pv, "0 but true", 10)) { + if (valuep) + *valuep = 0; + return IS_NUMBER_IN_UV; + } + return 0; +} +#endif +#endif + +/* + * The grok_* routines have been modified to use warn() instead of + * Perl_warner(). Also, 'hexdigit' was the former name of PL_hexdigit, + * which is why the stack variable has been renamed to 'xdigit'. + */ + +#ifndef grok_bin +#if defined(NEED_grok_bin) +static UV DPPP_(my_grok_bin)(pTHX_ const char * start, STRLEN * len_p, I32 * flags, NV * result); +static +#else +extern UV DPPP_(my_grok_bin)(pTHX_ const char * start, STRLEN * len_p, I32 * flags, NV * result); +#endif + +#ifdef grok_bin +# undef grok_bin +#endif +#define grok_bin(a,b,c,d) DPPP_(my_grok_bin)(aTHX_ a,b,c,d) +#define Perl_grok_bin DPPP_(my_grok_bin) + +#if defined(NEED_grok_bin) || defined(NEED_grok_bin_GLOBAL) +UV +DPPP_(my_grok_bin)(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result) +{ + const char *s = start; + STRLEN len = *len_p; + UV value = 0; + NV value_nv = 0; + + const UV max_div_2 = UV_MAX / 2; + bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES; + bool overflowed = FALSE; + + if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) { + /* strip off leading b or 0b. + for compatibility silently suffer "b" and "0b" as valid binary + numbers. */ + if (len >= 1) { + if (s[0] == 'b') { + s++; + len--; + } + else if (len >= 2 && s[0] == '0' && s[1] == 'b') { + s+=2; + len-=2; + } + } + } + + for (; len-- && *s; s++) { + char bit = *s; + if (bit == '0' || bit == '1') { + /* Write it in this wonky order with a goto to attempt to get the + compiler to make the common case integer-only loop pretty tight. + With gcc seems to be much straighter code than old scan_bin. */ + redo: + if (!overflowed) { + if (value <= max_div_2) { + value = (value << 1) | (bit - '0'); + continue; + } + /* Bah. We're just overflowed. */ + warn("Integer overflow in binary number"); + overflowed = TRUE; + value_nv = (NV) value; + } + value_nv *= 2.0; + /* If an NV has not enough bits in its mantissa to + * represent a UV this summing of small low-order numbers + * is a waste of time (because the NV cannot preserve + * the low-order bits anyway): we could just remember when + * did we overflow and in the end just multiply value_nv by the + * right amount. */ + value_nv += (NV)(bit - '0'); + continue; + } + if (bit == '_' && len && allow_underscores && (bit = s[1]) + && (bit == '0' || bit == '1')) + { + --len; + ++s; + goto redo; + } + if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT)) + warn("Illegal binary digit '%c' ignored", *s); + break; + } + + if ( ( overflowed && value_nv > 4294967295.0) +#if UVSIZE > 4 + || (!overflowed && value > 0xffffffff ) +#endif + ) { + warn("Binary number > 0b11111111111111111111111111111111 non-portable"); + } + *len_p = s - start; + if (!overflowed) { + *flags = 0; + return value; + } + *flags = PERL_SCAN_GREATER_THAN_UV_MAX; + if (result) + *result = value_nv; + return UV_MAX; +} +#endif +#endif + +#ifndef grok_hex +#if defined(NEED_grok_hex) +static UV DPPP_(my_grok_hex)(pTHX_ const char * start, STRLEN * len_p, I32 * flags, NV * result); +static +#else +extern UV DPPP_(my_grok_hex)(pTHX_ const char * start, STRLEN * len_p, I32 * flags, NV * result); +#endif + +#ifdef grok_hex +# undef grok_hex +#endif +#define grok_hex(a,b,c,d) DPPP_(my_grok_hex)(aTHX_ a,b,c,d) +#define Perl_grok_hex DPPP_(my_grok_hex) + +#if defined(NEED_grok_hex) || defined(NEED_grok_hex_GLOBAL) +UV +DPPP_(my_grok_hex)(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result) +{ + const char *s = start; + STRLEN len = *len_p; + UV value = 0; + NV value_nv = 0; + + const UV max_div_16 = UV_MAX / 16; + bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES; + bool overflowed = FALSE; + const char *xdigit; + + if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) { + /* strip off leading x or 0x. + for compatibility silently suffer "x" and "0x" as valid hex numbers. + */ + if (len >= 1) { + if (s[0] == 'x') { + s++; + len--; + } + else if (len >= 2 && s[0] == '0' && s[1] == 'x') { + s+=2; + len-=2; + } + } + } + + for (; len-- && *s; s++) { + xdigit = strchr((char *) PL_hexdigit, *s); + if (xdigit) { + /* Write it in this wonky order with a goto to attempt to get the + compiler to make the common case integer-only loop pretty tight. + With gcc seems to be much straighter code than old scan_hex. */ + redo: + if (!overflowed) { + if (value <= max_div_16) { + value = (value << 4) | ((xdigit - PL_hexdigit) & 15); + continue; + } + warn("Integer overflow in hexadecimal number"); + overflowed = TRUE; + value_nv = (NV) value; + } + value_nv *= 16.0; + /* If an NV has not enough bits in its mantissa to + * represent a UV this summing of small low-order numbers + * is a waste of time (because the NV cannot preserve + * the low-order bits anyway): we could just remember when + * did we overflow and in the end just multiply value_nv by the + * right amount of 16-tuples. */ + value_nv += (NV)((xdigit - PL_hexdigit) & 15); + continue; + } + if (*s == '_' && len && allow_underscores && s[1] + && (xdigit = strchr((char *) PL_hexdigit, s[1]))) + { + --len; + ++s; + goto redo; + } + if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT)) + warn("Illegal hexadecimal digit '%c' ignored", *s); + break; + } + + if ( ( overflowed && value_nv > 4294967295.0) +#if UVSIZE > 4 + || (!overflowed && value > 0xffffffff ) +#endif + ) { + warn("Hexadecimal number > 0xffffffff non-portable"); + } + *len_p = s - start; + if (!overflowed) { + *flags = 0; + return value; + } + *flags = PERL_SCAN_GREATER_THAN_UV_MAX; + if (result) + *result = value_nv; + return UV_MAX; +} +#endif +#endif + +#ifndef grok_oct +#if defined(NEED_grok_oct) +static UV DPPP_(my_grok_oct)(pTHX_ const char * start, STRLEN * len_p, I32 * flags, NV * result); +static +#else +extern UV DPPP_(my_grok_oct)(pTHX_ const char * start, STRLEN * len_p, I32 * flags, NV * result); +#endif + +#ifdef grok_oct +# undef grok_oct +#endif +#define grok_oct(a,b,c,d) DPPP_(my_grok_oct)(aTHX_ a,b,c,d) +#define Perl_grok_oct DPPP_(my_grok_oct) + +#if defined(NEED_grok_oct) || defined(NEED_grok_oct_GLOBAL) +UV +DPPP_(my_grok_oct)(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result) +{ + const char *s = start; + STRLEN len = *len_p; + UV value = 0; + NV value_nv = 0; + + const UV max_div_8 = UV_MAX / 8; + bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES; + bool overflowed = FALSE; + + for (; len-- && *s; s++) { + /* gcc 2.95 optimiser not smart enough to figure that this subtraction + out front allows slicker code. */ + int digit = *s - '0'; + if (digit >= 0 && digit <= 7) { + /* Write it in this wonky order with a goto to attempt to get the + compiler to make the common case integer-only loop pretty tight. + */ + redo: + if (!overflowed) { + if (value <= max_div_8) { + value = (value << 3) | digit; + continue; + } + /* Bah. We're just overflowed. */ + warn("Integer overflow in octal number"); + overflowed = TRUE; + value_nv = (NV) value; + } + value_nv *= 8.0; + /* If an NV has not enough bits in its mantissa to + * represent a UV this summing of small low-order numbers + * is a waste of time (because the NV cannot preserve + * the low-order bits anyway): we could just remember when + * did we overflow and in the end just multiply value_nv by the + * right amount of 8-tuples. */ + value_nv += (NV)digit; + continue; + } + if (digit == ('_' - '0') && len && allow_underscores + && (digit = s[1] - '0') && (digit >= 0 && digit <= 7)) + { + --len; + ++s; + goto redo; + } + /* Allow \octal to work the DWIM way (that is, stop scanning + * as soon as non-octal characters are seen, complain only iff + * someone seems to want to use the digits eight and nine). */ + if (digit == 8 || digit == 9) { + if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT)) + warn("Illegal octal digit '%c' ignored", *s); + } + break; + } + + if ( ( overflowed && value_nv > 4294967295.0) +#if UVSIZE > 4 + || (!overflowed && value > 0xffffffff ) +#endif + ) { + warn("Octal number > 037777777777 non-portable"); + } + *len_p = s - start; + if (!overflowed) { + *flags = 0; + return value; + } + *flags = PERL_SCAN_GREATER_THAN_UV_MAX; + if (result) + *result = value_nv; + return UV_MAX; +} +#endif +#endif + +#if !defined(my_snprintf) +#if defined(NEED_my_snprintf) +static int DPPP_(my_my_snprintf)(char * buffer, const Size_t len, const char * format, ...); +static +#else +extern int DPPP_(my_my_snprintf)(char * buffer, const Size_t len, const char * format, ...); +#endif + +#define my_snprintf DPPP_(my_my_snprintf) +#define Perl_my_snprintf DPPP_(my_my_snprintf) + +#if defined(NEED_my_snprintf) || defined(NEED_my_snprintf_GLOBAL) + +int +DPPP_(my_my_snprintf)(char *buffer, const Size_t len, const char *format, ...) +{ + dTHX; + int retval; + va_list ap; + va_start(ap, format); +#ifdef HAS_VSNPRINTF + retval = vsnprintf(buffer, len, format, ap); +#else + retval = vsprintf(buffer, format, ap); +#endif + va_end(ap); + if (retval >= (int)len) + Perl_croak(aTHX_ "panic: my_snprintf buffer overflow"); + return retval; +} + +#endif +#endif + +#ifdef NO_XSLOCKS +# ifdef dJMPENV +# define dXCPT dJMPENV; int rEtV = 0 +# define XCPT_TRY_START JMPENV_PUSH(rEtV); if (rEtV == 0) +# define XCPT_TRY_END JMPENV_POP; +# define XCPT_CATCH if (rEtV != 0) +# define XCPT_RETHROW JMPENV_JUMP(rEtV) +# else +# define dXCPT Sigjmp_buf oldTOP; int rEtV = 0 +# define XCPT_TRY_START Copy(top_env, oldTOP, 1, Sigjmp_buf); rEtV = Sigsetjmp(top_env, 1); if (rEtV == 0) +# define XCPT_TRY_END Copy(oldTOP, top_env, 1, Sigjmp_buf); +# define XCPT_CATCH if (rEtV != 0) +# define XCPT_RETHROW Siglongjmp(top_env, rEtV) +# endif +#endif + +#if !defined(my_strlcat) +#if defined(NEED_my_strlcat) +static Size_t DPPP_(my_my_strlcat)(char * dst, const char * src, Size_t size); +static +#else +extern Size_t DPPP_(my_my_strlcat)(char * dst, const char * src, Size_t size); +#endif + +#define my_strlcat DPPP_(my_my_strlcat) +#define Perl_my_strlcat DPPP_(my_my_strlcat) + +#if defined(NEED_my_strlcat) || defined(NEED_my_strlcat_GLOBAL) + +Size_t +DPPP_(my_my_strlcat)(char *dst, const char *src, Size_t size) +{ + Size_t used, length, copy; + + used = strlen(dst); + length = strlen(src); + if (size > 0 && used < size - 1) { + copy = (length >= size - used) ? size - used - 1 : length; + memcpy(dst + used, src, copy); + dst[used + copy] = '\0'; + } + return used + length; +} +#endif +#endif + +#if !defined(my_strlcpy) +#if defined(NEED_my_strlcpy) +static Size_t DPPP_(my_my_strlcpy)(char * dst, const char * src, Size_t size); +static +#else +extern Size_t DPPP_(my_my_strlcpy)(char * dst, const char * src, Size_t size); +#endif + +#define my_strlcpy DPPP_(my_my_strlcpy) +#define Perl_my_strlcpy DPPP_(my_my_strlcpy) + +#if defined(NEED_my_strlcpy) || defined(NEED_my_strlcpy_GLOBAL) + +Size_t +DPPP_(my_my_strlcpy)(char *dst, const char *src, Size_t size) +{ + Size_t length, copy; + + length = strlen(src); + if (size > 0) { + copy = (length >= size) ? size - 1 : length; + memcpy(dst, src, copy); + dst[copy] = '\0'; + } + return length; +} + +#endif +#endif + +#endif /* _P_P_PORTABILITY_H_ */ + +/* End of File ppport.h */ diff --git a/scripts/Test.pm-to-Test-More.pl b/scripts/Test.pm-to-Test-More.pl new file mode 100644 index 0000000..22ec92a --- /dev/null +++ b/scripts/Test.pm-to-Test-More.pl @@ -0,0 +1,175 @@ +#!/usr/bin/perl + +=head1 NAME + +Test.pm-to-Test-More.pl - semi-automatically and partially convert Test.pm +scripts to Test::More. + +=head1 USAGE + + perl Test.pm-to-Test-More.pl -o new.t t/old.t + +=head1 VERSION + +0.2.0 + +=cut + +use strict; +use warnings; + +use Getopt::Long; +use PPI; + +my $out_filename; +my $inplace = ''; +if (!GetOptions( + 'o|output=s' => \$out_filename, + 'inplace!' => \$inplace, +)) +{ + die "Cannot process arguments."; +} + +if ($inplace && defined($out_filename)) +{ + die 'Inplace is mutually exclusive with specifying an output file!'; +} + +my $filename = shift(@ARGV); + +if ($inplace) +{ + $out_filename = $filename; +} + +my $doc = PPI::Document->new($filename); + +my $statements = $doc->find('PPI::Statement'); + +if (! $statements) +{ + die "Could not find any statements."; +} + +sub is_comma +{ + my $node = shift; + return $node->isa('PPI::Token::Operator') && ($node->content() eq ","); +} + +foreach my $stmt (@{$statements}) +{ + my $call = $stmt->child(0); + if ($call->isa('PPI::Token::Word') + && ($call->literal() eq "ok") + ) + { + # print "$stmt\n"; + my $comment = PPI::Token::Comment->new; + $comment->line(1); + $comment->set_content ("# TEST\n"); + + my $which_to_prepend = $stmt; + my $prev = $stmt->previous_sibling; + if ($prev->isa('PPI::Token::Whitespace')) + { + my $space = PPI::Token::Whitespace->new; + $space->set_content($prev->content()); + $prev->insert_before($space); + $prev->insert_before($comment); + } + else + { + $stmt->insert_before( $comment ); + } + + my $args = $stmt->find_first('PPI::Structure::List')->find_first('PPI::Statement::Expression'); + + my $num_childs = scalar (() = $args->children()); + + my $num_args = 1 + scalar (() = grep { is_comma($_) } $args->children()); + + my $last_child = $args->child($num_childs - 1); + if (is_comma($last_child) + || + ( + $last_child->isa('PPI::Token::Whitespace') + && + is_comma($args->child($num_childs - 2)) + ) + ) + { + $num_args--; + } + + if ( $num_args == 2) + { + $call->set_content('is'); + } + + my $test_op = PPI::Token::Operator->new(q{,}); + my $test_ws = PPI::Token::Whitespace->new; + $test_ws->set_content(' '); + my $test_name = PPI::Token::Quote::Single->new(q{' TODO : Add test name'}); + # $test_name->string(' TODO : Add test name'); + $args->add_element($test_op); + $args->add_element($test_ws); + $args->add_element($test_name); + } +} + +$doc->save($out_filename); + +=begin removed + +{ + my $out_fh; + if (defined($out_filename)) + { + open $out_fh, ">", $out_filename + or die qq{Cannot open "$out_filename" for writing!}; + } + else + { + open $out_fh, ">&STDOUT"; + } + + print {$out_fh} "$doc"; + + close ($out_fh) +} + +=end removed + +=cut + +=head1 COPYRIGHT & LICENSE + +Copyright 2011 by Shlomi Fish + +This program is distributed under the MIT (X11) License: +L + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +=cut diff --git a/scripts/bump-version-number.pl b/scripts/bump-version-number.pl new file mode 100644 index 0000000..18b13ba --- /dev/null +++ b/scripts/bump-version-number.pl @@ -0,0 +1,44 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +use File::Find::Object (); +use Path::Tiny qw/ path tempdir tempfile cwd /; + +my $tree = File::Find::Object->new( {}, 'lib/' ); + +my $version_n = shift(@ARGV); + +if ( !defined($version_n) ) +{ + die "Specify version number as an argument! bump-version-number.pl '0.0.1'"; +} + +sub process_file +{ + # The filename. + my ($r) = @_; + my $fh = path($r); + + my @lines = $fh->lines_utf8; + foreach (@lines) + { +s#(\$VERSION = "|^Version )\d+\.\d+(?:\.\d+)?("|)#$1 . $version_n . $2#e; + } + $fh->spew_utf8(@lines); +} + +process_file('LibXML.pm'); + +while ( my $r = $tree->next() ) +{ + if ( $r =~ m{/\.(?:svn|hg|git)\z} ) + { + $tree->prune(); + } + elsif ( $r =~ m{\.pm\z} ) + { + process_file($r); + } +} diff --git a/scripts/fast-eumm.pl b/scripts/fast-eumm.pl new file mode 100644 index 0000000..44dbb80 --- /dev/null +++ b/scripts/fast-eumm.pl @@ -0,0 +1,16 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +use File::Slurp qw(:edit); + +if (system("$^X", "Makefile.PL")) +{ + die "Cannot run 'Makefile.PL' - $!"; +} + +edit_file_lines( + sub { $_ = '' if m/\$\(OBJECT\).*:.*\$\(FIRST_MAKEFILE\)/ }, +'Makefile' +); diff --git a/scripts/prints-to-comments.pl b/scripts/prints-to-comments.pl new file mode 100644 index 0000000..4589c93 --- /dev/null +++ b/scripts/prints-to-comments.pl @@ -0,0 +1,10 @@ +#!perl -ln -i.bak + +use strict; +use warnings; + +if (/\A( *)print "(#.*?)\\n";\z/) +{ + $_ = "$1$2"; +} +print $_; diff --git a/scripts/tag-release.pl b/scripts/tag-release.pl new file mode 100644 index 0000000..aca9b97 --- /dev/null +++ b/scripts/tag-release.pl @@ -0,0 +1,23 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +use Path::Tiny qw/ path /; + +my ($version) = + ( map { m{\$VERSION *= *"([^"]+)"} ? ($1) : () } + path('LibXML.pm')->lines_utf8() ); + +if ( !defined($version) ) +{ + die "Version is undefined!"; +} + +my @cmd = ( + "git", "tag", "-m", "Tagging the XML-LibXML release as $version", + "XML-LibXML-$version", +); + +print join( " ", map { /\s/ ? qq{"$_"} : $_ } @cmd ), "\n"; +exec(@cmd); diff --git a/scripts/total-build-and-test.bash b/scripts/total-build-and-test.bash new file mode 100644 index 0000000..829f152 --- /dev/null +++ b/scripts/total-build-and-test.bash @@ -0,0 +1,9 @@ +#!/bin/bash +set -x +export HARNESS_OPTIONS="j4:c" TEST_JOBS=4 +mak='make -j8' +perl Makefile.PL && \ + ($mak docs || true) && \ + perl Makefile.PL && \ + $mak test && \ + $mak disttest diff --git a/scripts/update-HACKING-file.bash b/scripts/update-HACKING-file.bash new file mode 100644 index 0000000..10fd246 --- /dev/null +++ b/scripts/update-HACKING-file.bash @@ -0,0 +1,2 @@ +#!/bin/bash +cp -f /home/shlomif/Docs/homepage/homepage/trunk/t2/open-source/resources/how-to-contribute-to-my-projects/HACKING.txt ./HACKING.txt diff --git a/t/00-report-prereqs.t b/t/00-report-prereqs.t new file mode 100644 index 0000000..259894e --- /dev/null +++ b/t/00-report-prereqs.t @@ -0,0 +1,194 @@ +#!perl + +use strict; +use warnings; + +# This test was generated by Dist::Zilla::Plugin::Test::ReportPrereqs 0.027 + +use Test::More tests => 1; + +use ExtUtils::MakeMaker; +use File::Spec; + +# from $version::LAX +my $lax_version_re = + qr/(?: undef | (?: (?:[0-9]+) (?: \. | (?:\.[0-9]+) (?:_[0-9]+)? )? + | + (?:\.[0-9]+) (?:_[0-9]+)? + ) | (?: + v (?:[0-9]+) (?: (?:\.[0-9]+)+ (?:_[0-9]+)? )? + | + (?:[0-9]+)? (?:\.[0-9]+){2,} (?:_[0-9]+)? + ) + )/x; + +# hide optional CPAN::Meta modules from prereq scanner +# and check if they are available +my $cpan_meta = "CPAN::Meta"; +my $cpan_meta_pre = "CPAN::Meta::Prereqs"; +my $HAS_CPAN_META = eval "require $cpan_meta; $cpan_meta->VERSION('2.120900')" && eval "require $cpan_meta_pre"; ## no critic + +# Verify requirements? +my $DO_VERIFY_PREREQS = 1; + +sub _max { + my $max = shift; + $max = ( $_ > $max ) ? $_ : $max for @_; + return $max; +} + +sub _merge_prereqs { + my ($collector, $prereqs) = @_; + + # CPAN::Meta::Prereqs object + if (ref $collector eq $cpan_meta_pre) { + return $collector->with_merged_prereqs( + CPAN::Meta::Prereqs->new( $prereqs ) + ); + } + + # Raw hashrefs + for my $phase ( keys %$prereqs ) { + for my $type ( keys %{ $prereqs->{$phase} } ) { + for my $module ( keys %{ $prereqs->{$phase}{$type} } ) { + $collector->{$phase}{$type}{$module} = $prereqs->{$phase}{$type}{$module}; + } + } + } + + return $collector; +} + +my @include = qw( + +); + +my @exclude = qw( + +); + +# Add static prereqs to the included modules list +my $static_prereqs = do './t/00-report-prereqs.dd'; + +# Merge all prereqs (either with ::Prereqs or a hashref) +my $full_prereqs = _merge_prereqs( + ( $HAS_CPAN_META ? $cpan_meta_pre->new : {} ), + $static_prereqs +); + +# Add dynamic prereqs to the included modules list (if we can) +my ($source) = grep { -f } 'MYMETA.json', 'MYMETA.yml'; +my $cpan_meta_error; +if ( $source && $HAS_CPAN_META + && (my $meta = eval { CPAN::Meta->load_file($source) } ) +) { + $full_prereqs = _merge_prereqs($full_prereqs, $meta->prereqs); +} +else { + $cpan_meta_error = $@; # capture error from CPAN::Meta->load_file($source) + $source = 'static metadata'; +} + +my @full_reports; +my @dep_errors; +my $req_hash = $HAS_CPAN_META ? $full_prereqs->as_string_hash : $full_prereqs; + +# Add static includes into a fake section +for my $mod (@include) { + $req_hash->{other}{modules}{$mod} = 0; +} + +for my $phase ( qw(configure build test runtime develop other) ) { + next unless $req_hash->{$phase}; + next if ($phase eq 'develop' and not $ENV{AUTHOR_TESTING}); + + for my $type ( qw(requires recommends suggests conflicts modules) ) { + next unless $req_hash->{$phase}{$type}; + + my $title = ucfirst($phase).' '.ucfirst($type); + my @reports = [qw/Module Want Have/]; + + for my $mod ( sort keys %{ $req_hash->{$phase}{$type} } ) { + next if $mod eq 'perl'; + next if grep { $_ eq $mod } @exclude; + + my $file = $mod; + $file =~ s{::}{/}g; + $file .= ".pm"; + my ($prefix) = grep { -e File::Spec->catfile($_, $file) } @INC; + + my $want = $req_hash->{$phase}{$type}{$mod}; + $want = "undef" unless defined $want; + $want = "any" if !$want && $want == 0; + + my $req_string = $want eq 'any' ? 'any version required' : "version '$want' required"; + + if ($prefix) { + my $have = MM->parse_version( File::Spec->catfile($prefix, $file) ); + $have = "undef" unless defined $have; + push @reports, [$mod, $want, $have]; + + if ( $DO_VERIFY_PREREQS && $HAS_CPAN_META && $type eq 'requires' ) { + if ( $have !~ /\A$lax_version_re\z/ ) { + push @dep_errors, "$mod version '$have' cannot be parsed ($req_string)"; + } + elsif ( ! $full_prereqs->requirements_for( $phase, $type )->accepts_module( $mod => $have ) ) { + push @dep_errors, "$mod version '$have' is not in required range '$want'"; + } + } + } + else { + push @reports, [$mod, $want, "missing"]; + + if ( $DO_VERIFY_PREREQS && $type eq 'requires' ) { + push @dep_errors, "$mod is not installed ($req_string)"; + } + } + } + + if ( @reports ) { + push @full_reports, "=== $title ===\n\n"; + + my $ml = _max( map { length $_->[0] } @reports ); + my $wl = _max( map { length $_->[1] } @reports ); + my $hl = _max( map { length $_->[2] } @reports ); + + if ($type eq 'modules') { + splice @reports, 1, 0, ["-" x $ml, "", "-" x $hl]; + push @full_reports, map { sprintf(" %*s %*s\n", -$ml, $_->[0], $hl, $_->[2]) } @reports; + } + else { + splice @reports, 1, 0, ["-" x $ml, "-" x $wl, "-" x $hl]; + push @full_reports, map { sprintf(" %*s %*s %*s\n", -$ml, $_->[0], $wl, $_->[1], $hl, $_->[2]) } @reports; + } + + push @full_reports, "\n"; + } + } +} + +if ( @full_reports ) { + diag "\nVersions for all modules listed in $source (including optional ones):\n\n", @full_reports; +} + +if ( $cpan_meta_error || @dep_errors ) { + diag "\n*** WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING ***\n"; +} + +if ( $cpan_meta_error ) { + my ($orig_source) = grep { -f } 'MYMETA.json', 'MYMETA.yml'; + diag "\nCPAN::Meta->load_file('$orig_source') failed with: $cpan_meta_error\n"; +} + +if ( @dep_errors ) { + diag join("\n", + "\nThe following REQUIRED prerequisites were not satisfied:\n", + @dep_errors, + "\n" + ); +} + +# TEST +pass; + +# vim: ts=4 sts=4 sw=4 et: diff --git a/t/01basic.t b/t/01basic.t new file mode 100644 index 0000000..9a7c778 --- /dev/null +++ b/t/01basic.t @@ -0,0 +1,28 @@ +use strict; +use warnings; + +use Test::More tests => 3; + +use XML::LibXML; + +# TEST +ok(1, 'Loaded fine'); + +my $p = XML::LibXML->new(); +# TEST +ok ($p, 'Can initialize a new XML::LibXML instance'); + +my ($runtime_version) = (XML::LibXML::LIBXML_RUNTIME_VERSION() =~ /\A(\d+)/); + +# TEST +if (!is ( + XML::LibXML::LIBXML_VERSION, $runtime_version, + 'LIBXML__VERSION == LIBXML_RUNTIME_VERSION', +)) +{ + diag("DO NOT REPORT THIS FAILURE: Your setup of library paths is incorrect!"); +} + +diag( "\n\nCompiled against libxml2 version: ",XML::LibXML::LIBXML_VERSION, + "\nRunning libxml2 version: ",$runtime_version, + "\n\n"); diff --git a/t/02parse.t b/t/02parse.t new file mode 100644 index 0000000..b111507 --- /dev/null +++ b/t/02parse.t @@ -0,0 +1,994 @@ +# $Id$ + +## +# this test checks the parsing capabilities of XML::LibXML +# it relies on the success of t/01basic.t + +use strict; +use warnings; + +# Fix the locale for the error messages check to work: +# See https://rt.cpan.org/Public/Bug/Display.html?id=97805 . +use POSIX qw(locale_h); +use locale; + +POSIX::setlocale(LC_ALL, "C"); + +use Test::More tests => 533; +use IO::File; + +use XML::LibXML::Common qw(:libxml); +use XML::LibXML::SAX; +use XML::LibXML::SAX::Builder; + +use constant XML_DECL => "\n"; + +use Errno qw(ENOENT); + +# TEST*533 + +## +# test values +my @goodWFStrings = ( +'', +'', +XML_DECL . "", +''."\n", +''."\n", +XML_DECL. " \n", +XML_DECL. ' ', +XML_DECL. ' ', +XML_DECL. '&"\']]>', +XML_DECL. '<>&"'', +XML_DECL. '  ', +XML_DECL. 'foo', +XML_DECL. 'foo', +XML_DECL. 'foo', +XML_DECL. '', +XML_DECL. '', +#XML_DECL. '', +#'' + ); + +my @goodWFNSStrings = ( +XML_DECL. ''."\n", +XML_DECL. ''."\n", +XML_DECL. ''."\n", +XML_DECL. ''."\n", +XML_DECL. ''."\n", + ); + +my @goodWFDTDStrings = ( +XML_DECL. ''."\n".']>'."\n".'&foo;', +XML_DECL. ']>&foo;', +XML_DECL. ']>&foo;>', +XML_DECL. ']>&foo;>', +XML_DECL. ']>&foo;>', +XML_DECL. ']>', +XML_DECL. ']>', + ); + +my @badWFStrings = ( +"", # totally empty document +XML_DECL, # only XML Declaration +"", # comment only is like an empty document +']>', # no good either ... +"", # single tag (tag mismatch) +"foo", # trailing junk +"foo", # leading junk +"", # bad attribute +'&", # bad char +"�x20;", # bad char +"", # bad encoding +"&foo;", # undefind entity +">", # unterminated entity +XML_DECL. ']>', # bad placed entity +XML_DECL. ']>', # even worse +"", # bad comment +'', # bad either... (is this conform with the spec????) + ); + + my %goodPushWF = ( +single1 => [''], +single2 => ['',''], +single3 => [ XML_DECL, "", "" ], +single4 => [""], +single5 => ["<", "foo","bar", "/>"], +single6 => ['',"\n"], +single7 => ['',"\n"], +single8 => [''], +single9 => ['',"\n"], +multiple1 => [ '','',' ', ], +multiple2 => [ '<','/foobar> ', ], +multiple3 => [ '','&"\']]>',''], +multiple4 => [ '','&', ']]>', '' ], +multiple5 => [ '','&', ']]>', '' ], +multiple6 => ['','<>&"'',''], +multiple6 => ['','<',';&','gt;&a','mp;','"&ap','os;',''], +multiple7 => [ '', '  ','' ], +multiple8 => [ '', '&#x','20;','60;','' ], +multiple9 => [ '','moo','moo',' ', ], +multiple10 => [ '','moo',' ', ], +comment1 => [ '','' ], +comment2 => [ '','' ], +comment3 => [ '','' ], +comment4 => [ '','' ], +comment5 => [ 'fo','o', + wellformed7 => '', + wellformed8 => '', + wellformed9 => 'D', + wellformed10 => '', + wellformed11 => '', + wellbalance1 => '', + wellbalance2 => '', + wellbalance3 => '', + wellbalance4 => 'DI', + wellbalance5 => '', + wellbalance6 => '', + wellbalance7 => '', + wellbalance8 => 'DD', + wellbalance9 => 'D', + wellbalance10=> 'DD', + wellbalance11=> 'D', + wellbalance12=> 'D', + wellbalance13=> 'D', + wellbalance14=> '', + wellbalance15=> '', + wellbalance16=> 'D', + ); + + my @badWBStrings = ( + "", + "", + "bar", + "bar", + "&foo;", # undefined entity + "&", # bad char + "häh?", # bad encoding + "", # bad stays bad ;) + "", # bad stays bad ;) + ); + + + my $pparser = XML::LibXML->new; + + # 5.1 DOM CHUNK PARSER + + for ( 1..$MAX_WF_C ) { + my $frag = $pparser->parse_xml_chunk($chunks{'wellformed'.$_}); + isa_ok($frag, 'XML::LibXML::DocumentFragment'); + if ( $frag->nodeType == XML_DOCUMENT_FRAG_NODE + && $frag->hasChildNodes ) { + if ( $frag->firstChild->isSameNode( $frag->lastChild ) ) { + if ( $chunks{'wellformed' . $_} =~ /\\<\/A\>/ ) { + $_--; # because we cannot distinguish between and + } + + is($frag->toString, $chunks{'wellformed' . $_}, $chunks{'wellformed' . $_} . " is well formed"); + next; + } + } + fail("Unexpected fragment without child nodes"); + } + + for ( 1..$MAX_WB_C ) { + my $frag = $pparser->parse_xml_chunk($chunks{'wellbalance'.$_}); + isa_ok($frag, 'XML::LibXML::DocumentFragment'); + if ( $frag->nodeType == XML_DOCUMENT_FRAG_NODE + && $frag->hasChildNodes ) { + if ( $chunks{'wellbalance'.$_} =~ /<\/A>/ ) { + $_--; + } + is($frag->toString, $chunks{'wellbalance'.$_}, $chunks{'wellbalance'.$_} . " is well balanced"); + next; + } + fail("Can't test balancedness"); + } + + eval { my $fail = $pparser->parse_xml_chunk(undef); }; + like($@, qr/^Empty String at/, "error parsing undef xml chunk"); + + eval { my $fail = $pparser->parse_xml_chunk(""); }; + like($@, qr/^Empty String at/, "error parsing empty xml chunk"); + + foreach my $str ( @badWBStrings ) { + eval { my $fail = $pparser->parse_xml_chunk($str); }; + isnt($@, '', "Error parsing xml chunk: '" . shorten_string($str) . "'"); + } + + { + # 5.1.1 Segmenation fault tests + + my $sDoc = ''; + my $sChunk = ''; + + my $parser = XML::LibXML->new(); + my $doc = $parser->parse_xml_chunk( $sDoc, undef ); + my $chk = $parser->parse_xml_chunk( $sChunk,undef ); + + my $fc = $doc->firstChild; + + $doc->appendChild( $chk ); + + is( $doc->toString(), '', 'No segfault parsing string ""'); + } + + { + # 5.1.2 Segmenation fault tests + + my $sDoc = ''; + my $sChunk = ''; + + my $parser = XML::LibXML->new(); + my $doc = $parser->parse_xml_chunk( $sDoc, undef ); + my $chk = $parser->parse_xml_chunk( $sChunk,undef ); + + my $fc = $doc->firstChild; + + $doc->insertAfter( $chk, $fc ); + + is( $doc->toString(), '', 'No segfault parsing string ""'); + } + + { + # 5.1.3 Segmenation fault tests + + my $sDoc = ''; + my $sChunk = ''; + + my $parser = XML::LibXML->new(); + my $doc = $parser->parse_xml_chunk( $sDoc, undef ); + my $chk = $parser->parse_xml_chunk( $sChunk,undef ); + + my $fc = $doc->firstChild; + + $doc->insertBefore( $chk, $fc ); + + ok( $doc->toString(), '' ); + } + + pass("Made it to SAX test without seg fault"); + + # 5.2 SAX CHUNK PARSER + + my $handler = XML::LibXML::SAX::Builder->new(); + my $parser = XML::LibXML->new; + $parser->set_handler( $handler ); + for ( 1..$MAX_WF_C ) { + my $frag = $parser->parse_xml_chunk($chunks{'wellformed'.$_}); + isa_ok($frag, 'XML::LibXML::DocumentFragment'); + if ( $frag->nodeType == XML_DOCUMENT_FRAG_NODE + && $frag->hasChildNodes ) { + if ( $frag->firstChild->isSameNode( $frag->lastChild ) ) { + if ( $chunks{'wellformed'.$_} =~ /\\<\/A\>/ ) { + $_--; + } + is($frag->toString, $chunks{'wellformed'.$_}, $chunks{'wellformed'.$_} . ' is well formed'); + next; + } + } + fail("Couldn't pass well formed test since frag was bad"); + } + + for ( 1..$MAX_WB_C ) { + my $frag = $parser->parse_xml_chunk($chunks{'wellbalance'.$_}); + isa_ok($frag, 'XML::LibXML::DocumentFragment'); + if ( $frag->nodeType == XML_DOCUMENT_FRAG_NODE + && $frag->hasChildNodes ) { + if ( $chunks{'wellbalance'.$_} =~ /<\/A>/ ) { + $_--; + } + is($frag->toString, $chunks{'wellbalance'.$_}, $chunks{'wellbalance'.$_} . " is well balanced"); + next; + } + fail("Couldn't pass well balanced test since frag was bad"); + } +} + +{ + # 6 VALIDATING PARSER + + my %badstrings = ( + SIMPLE => ''."\n\n", + ); + my $parser = XML::LibXML->new(expand_entities => 1); + + $parser->validation(1); + my $doc; + eval { $doc = $parser->parse_string($badstrings{SIMPLE}); }; + isnt($@, '', "Failed to parse SIMPLE bad string"); + my $ql; +} + +{ + # 7 LINE NUMBERS + + my $goodxml = < + + + +EOXML + + my $badxml = < +]> + +EOXML + + my $parser = XML::LibXML->new(expand_entities => 1); + $parser->validation(1); + + eval { $parser->parse_string( $badxml ); }; + # correct line number may or may not be present + # depending on libxml2 version + like($@, qr/^:[03]:/, "line 03 found in error" ); + + $parser->line_numbers(1); + eval { $parser->parse_string( $badxml ); }; + like($@, qr/^:3:/, "line 3 found in error"); + + # switch off validation for the following tests + $parser->validation(0); + + my $doc; + eval { $doc = $parser->parse_string( $goodxml ); }; + + my $root = $doc->documentElement(); + is( $root->line_number(), 2, "line number is 2"); + + my @kids = $root->childNodes(); + is( $kids[1]->line_number(),3, "line number is 3" ); + + my $newkid = $root->appendChild( $doc->createElement( "bar" ) ); + is( $newkid->line_number(), 0, "line number is 0"); + + $parser->line_numbers(0); + eval { $doc = $parser->parse_string( $goodxml ); }; + + $root = $doc->documentElement(); + is( $root->line_number(), 0, "line number is 0"); + + @kids = $root->childNodes(); + is( $kids[1]->line_number(), 0, "line number is 0"); +} + +SKIP: { + skip("LibXML version is below 20600", 8) unless ( XML::LibXML::LIBXML_VERSION >= 20600 ); + # 8 Clean Namespaces + + my ( $xsDoc1, $xsDoc2 ); + $xsDoc1 = q{}; + $xsDoc2 = q{}; + + my $parser = XML::LibXML->new(); + $parser->clean_namespaces(1); + + my $fn1 = "example/xmlns/goodguy.xml"; + my $fn2 = "example/xmlns/badguy.xml"; + + is( $parser->parse_string( $xsDoc1 )->documentElement->toString(), + q{} ); + is( $parser->parse_string( $xsDoc2 )->documentElement->toString(), + $xsDoc2 ); + + is( $parser->parse_file( $fn1 )->documentElement->toString(), + q{} ); + is( $parser->parse_file( $fn2 )->documentElement->toString() , + $xsDoc2 ); + + my $fh1 = IO::File->new($fn1); + my $fh2 = IO::File->new($fn2); + + is( $parser->parse_fh( $fh1 )->documentElement->toString(), + q{} ); + is( $parser->parse_fh( $fh2 )->documentElement->toString() , + $xsDoc2 ); + + my @xaDoc1 = ('','' ,''); + my @xaDoc2 = ('','' , ''); + + my $doc; + + foreach ( @xaDoc1 ) { + $parser->parse_chunk( $_ ); + } + $doc = $parser->parse_chunk( "", 1 ); + is( $doc->documentElement->toString(), + q{} ); + + + foreach ( @xaDoc2 ) { + $parser->parse_chunk( $_ ); + } + $doc = $parser->parse_chunk( "", 1 ); + is( $doc->documentElement->toString() , + $xsDoc2 ); +}; + + +## +# test if external subsets are loaded correctly + +{ + my $xmldoc = < +&foo; +EOXML + my $parser = XML::LibXML->new(); + + $parser->load_ext_dtd(1); + + # first time it should work + my $doc = $parser->parse_string( $xmldoc ); + is( $doc->documentElement()->string_value(), " test " ); + + # second time it must not fail. + my $doc2 = $parser->parse_string( $xmldoc ); + is( $doc2->documentElement()->string_value(), " test " ); +} + +## +# Test ticket #7668 xinclude breaks entity expansion +# [CG] removed again, since #7668 claims the spec is incorrect + +## +# Test ticket #7913 +{ + my $xmldoc = < +&foo; +EOXML + my $parser = XML::LibXML->new(); + + $parser->load_ext_dtd(1); + + # first time it should work + my $doc = $parser->parse_string( $xmldoc ); + is( $doc->documentElement()->string_value(), " test " ); + + # lets see if load_ext_dtd(0) works + $parser->load_ext_dtd(0); + my $doc2; + eval { + $doc2 = $parser->parse_string( $xmldoc ); + }; + isnt($@, '', "error parsing $xmldoc"); + + $parser->validation(1); + + $parser->load_ext_dtd(0); + my $doc3; + eval { + $doc3 = $parser->parse_file( "example/article_external_bad.xml" ); + }; + + isa_ok( $doc3, 'XML::LibXML::Document'); + + $parser->load_ext_dtd(1); + eval { + $doc3 = $parser->parse_file( "example/article_external_bad.xml" ); + }; + + isnt($@, '', "error parsing example/article_external_bad.xml"); +} + +{ + + my $parser = XML::LibXML->new(); + + my $doc = $parser->parse_string('',"bar.xml"); + my $el = $doc->documentElement; + is( $doc->URI, "bar.xml" ); + is( $doc->baseURI, "bar.xml" ); + is( $el->baseURI, "foo.xml" ); + + $doc->setURI( "baz.xml" ); + is( $doc->URI, "baz.xml" ); + is( $doc->baseURI, "baz.xml" ); + is( $el->baseURI, "foo.xml" ); + + $doc->setBaseURI( "bag.xml" ); + is( $doc->URI, "bag.xml" ); + is( $doc->baseURI, "bag.xml" ); + is( $el->baseURI, "foo.xml" ); + + $el->setBaseURI( "bam.xml" ); + is( $doc->URI, "bag.xml" ); + is( $doc->baseURI, "bag.xml" ); + is( $el->baseURI, "bam.xml" ); + +} + + +{ + + my $parser = XML::LibXML->new(); + + my $doc = $parser->parse_html_string('',{ URI => "bar.html" }); + my $el = $doc->documentElement; + is( $doc->URI, "bar.html" ); + is( $doc->baseURI, "foo.html" ); + is( $el->baseURI, "foo.html" ); + + $doc->setURI( "baz.html" ); + is( $doc->URI, "baz.html" ); + is( $doc->baseURI, "foo.html" ); + is( $el->baseURI, "foo.html" ); + +} + +{ + my $parser = XML::LibXML->new(); + open(my $fh, '<:utf8', 't/data/chinese.xml'); + ok( $fh, 'open chinese.xml'); + eval { + $parser->parse_fh($fh); + }; + like( $@, qr/Read more bytes than requested/, + 'UTF-8 encoding layer throws exception' ); + close($fh); +} + +sub tsub { + my $doc = shift; + + my $th = {}; + $th->{d} = XML::LibXML::Document->createDocument; + my $e1 = $th->{d}->createElementNS("x","X:foo"); + + $th->{d}->setDocumentElement( $e1 ); + my $e2 = $th->{d}->createElementNS( "x","X:bar" ); + + $e1->appendChild( $e2 ); + + $e2->appendChild( $th->{d}->importNode( $doc->documentElement() ) ); + + return $th->{d}; +} + +sub tsub2 { + my ($doc,$query)=($_[0],@{$_[1]}); +# return [ $doc->findnodes($query) ]; + return [ $doc->findnodes(encodeToUTF8('iso-8859-1',$query)) ]; +} + +sub shorten_string { # Used for test naming. + my $string = shift; + return "'undef'" if(!defined $string); + + $string =~ s/\n/\\n/msg; + return $string if(length($string) < 25); + return $string = substr($string, 0, 10) . "..." . substr($string, -10); +} diff --git a/t/03doc.t b/t/03doc.t new file mode 100644 index 0000000..5da6795 --- /dev/null +++ b/t/03doc.t @@ -0,0 +1,770 @@ +# $Id$ + +## +# this test checks the DOM Document interface of XML::LibXML +# it relies on the success of t/01basic.t and t/02parse.t + +# it will ONLY test the DOM capabilities as specified in DOM Level3 +# XPath tests should be done in another test file + +# since all tests are run on a preparsed + +use strict; +use warnings; + +# Should be 168. +use Test::More tests => 193; + +use XML::LibXML; +use XML::LibXML::Common qw(:libxml); + +use IO::Handle; + +sub is_empty_str +{ + my $s = shift; + return (!defined($s) or (length($s) == 0)); +} + +# TEST:$c=0; +sub _check_element_node +{ + local $Test::Builder::Level = $Test::Builder::Level + 1; + + my ($node, $name, $blurb) = @_; + + # TEST:$c++; + ok($node, "$blurb - node was initialised"); + # TEST:$c++; + is($node->nodeType, XML_ELEMENT_NODE, "$blurb - node is an element node"); + # TEST:$c++; + is($node->nodeName, $name, "$blurb - node has the right name."); +} + +# TEST:$_check_element_node=$c; + +sub _check_created_element +{ + local $Test::Builder::Level = $Test::Builder::Level + 1; + my ($doc, $given_name, $name, $blurb) = @_; + + return _check_element_node( + $doc->createElement($given_name), + $name, + $blurb + ); +} +# TEST:$_check_created_element=$_check_element_node; + +sub _multi_arg_generic_count +{ + local $Test::Builder::Level = $Test::Builder::Level + 1; + + my ($doc, $method, $params) = @_; + + my ($meth_params, $want_count, $blurb) = @$params; + + my @elems = $doc->$method( @$meth_params ); + + return is (scalar(@elems), $want_count, $blurb); +} + +sub _generic_count +{ + local $Test::Builder::Level = $Test::Builder::Level + 1; + + my ($doc, $method, $params) = @_; + + my ($name, $want_count, $blurb) = @$params; + + return _multi_arg_generic_count( + $doc, $method, [[$name], $want_count, $blurb, ], + ); +} + +sub _count_local_name +{ + local $Test::Builder::Level = $Test::Builder::Level + 1; + my $doc = shift; + + return _generic_count($doc, 'getElementsByLocalName', [@_]); +} + +sub _count_tag_name +{ + local $Test::Builder::Level = $Test::Builder::Level + 1; + my $doc = shift; + + return _generic_count($doc, 'getElementsByTagName', [@_]); +} + +sub _count_children_by_local_name +{ + local $Test::Builder::Level = $Test::Builder::Level + 1; + my $doc = shift; + + return _generic_count($doc, 'getChildrenByLocalName', [@_]); +} + +sub _count_children_by_name +{ + local $Test::Builder::Level = $Test::Builder::Level + 1; + my $doc = shift; + + return _generic_count($doc, 'getChildrenByTagName', [@_]); +} + +sub _count_elements_by_name_ns +{ + local $Test::Builder::Level = $Test::Builder::Level + 1; + my ($doc, $ns_and_name, $want_count, $blurb) = @_; + + return _multi_arg_generic_count($doc, 'getElementsByTagNameNS', + [$ns_and_name, $want_count, $blurb] + ); +} + +sub _count_children_by_name_ns +{ + local $Test::Builder::Level = $Test::Builder::Level + 1; + my ($doc, $ns_and_name, $want_count, $blurb) = @_; + + return _multi_arg_generic_count($doc, 'getChildrenByTagNameNS', + [$ns_and_name, $want_count, $blurb] + ); +} + +{ + # Document Attributes + + my $doc = XML::LibXML::Document->createDocument(); + # TEST + ok($doc, ' TODO : Add test name'); + # TEST + ok( ! defined($doc->encoding), ' TODO : Add test name'); + # TEST + is( $doc->version, "1.0", ' TODO : Add test name' ); + # TEST + is( $doc->standalone, -1, ' TODO : Add test name' ); # is the value we get for undefined, + # actually the same as 0 but just not set. + # TEST + ok( !defined($doc->URI), ' TODO : Add test name'); # should be set by default. + # TEST + is( $doc->compression, -1, ' TODO : Add test name' ); # -1 indicates NO compression at all! + # while 0 indicates just no zip compression + # (big difference huh?) + + $doc->setEncoding( "iso-8859-1" ); + # TEST + is( $doc->encoding, "iso-8859-1", 'Encoding was set.' ); + + $doc->setVersion(12.5); + # TEST + is( $doc->version, "12.5", 'Version was set.' ); + + $doc->setStandalone(1); + # TEST + is( $doc->standalone, 1, 'Standalone was set.' ); + + $doc->setBaseURI( "localhost/here.xml" ); + # TEST + is( $doc->URI, "localhost/here.xml", 'URI is set.' ); + + my $doc2 = XML::LibXML::Document->createDocument("1.1", "iso-8859-2"); + # TEST + is( $doc2->encoding, "iso-8859-2", 'doc2 encoding was set.' ); + # TEST + is( $doc2->version, "1.1", 'doc2 version was set.' ); + # TEST + is( $doc2->standalone, -1, 'doc2 standalone' ); +} + +{ + # 2. Creating Elements + my $doc = XML::LibXML::Document->new(); + { + my $node = $doc->createDocumentFragment(); + # TEST + ok($node, ' TODO : Add test name'); + # TEST + is($node->nodeType, XML_DOCUMENT_FRAG_NODE, ' TODO : Add test name'); + } + + # TEST*$_check_created_element + _check_created_element($doc, 'foo', 'foo', 'Simple Element'); + + { + # document with encoding + my $encdoc = XML::LibXML::Document->new( "1.0" ); + $encdoc->setEncoding( "iso-8859-1" ); + + # TEST*$_check_created_element + _check_created_element( + $encdoc, 'foo', 'foo', 'Encdoc Element creation' + ); + + # SAX style document with encoding + my $node_def = { + Name => "object", + LocalName => "object", + Prefix => "", + NamespaceURI => "", + }; + + # TEST*$_check_created_element + _check_created_element( + $encdoc, $node_def->{Name}, 'object', + 'Encdoc element creation based on node_def->{name}', + ); + } + + { + # namespaced element test + my $node = $doc->createElementNS( "http://kungfoo", "foo:bar" ); + # TEST + ok($node, ' TODO : Add test name'); + # TEST + is($node->nodeType, XML_ELEMENT_NODE, ' TODO : Add test name'); + # TEST + is($node->nodeName, "foo:bar", ' TODO : Add test name'); + # TEST + is($node->prefix, "foo", ' TODO : Add test name'); + # TEST + is($node->localname, "bar", ' TODO : Add test name'); + # TEST + is($node->namespaceURI, "http://kungfoo", ' TODO : Add test name'); + } + + { + # bad element creation + # TEST:$badnames_count=5; + my @badnames = ( ";", "&", "<><", "/", "1A"); + + foreach my $name ( @badnames ) { + my $node = eval {$doc->createElement( $name );}; + # TEST*$badnames_count + ok( !(defined $node), ' TODO : Add test name' ); + } + + } + + { + my $node = $doc->createTextNode( "foo" ); + # TEST + ok($node, ' TODO : Add test name'); + # TEST + is($node->nodeType, XML_TEXT_NODE, ' TODO : Add test name' ); + # TEST + is($node->nodeValue, "foo", ' TODO : Add test name' ); + } + + { + my $node = $doc->createComment( "foo" ); + # TEST + ok($node, ' TODO : Add test name'); + # TEST + is($node->nodeType, XML_COMMENT_NODE, ' TODO : Add test name' ); + # TEST + is($node->nodeValue, "foo", ' TODO : Add test name' ); + # TEST + is($node->toString, "", ' TODO : Add test name'); + } + + { + my $node = $doc->createCDATASection( "foo" ); + # TEST + ok($node, ' TODO : Add test name'); + # TEST + is($node->nodeType, XML_CDATA_SECTION_NODE, ' TODO : Add test name' ); + # TEST + is($node->nodeValue, "foo", ' TODO : Add test name' ); + # TEST + is($node->toString, "", ' TODO : Add test name'); + } + + # -> Create Attributes + { + my $attr = $doc->createAttribute("foo", "bar"); + # TEST + ok($attr, ' TODO : Add test name'); + # TEST + is($attr->nodeType, XML_ATTRIBUTE_NODE, ' TODO : Add test name' ); + # TEST + is($attr->name, "foo", ' TODO : Add test name'); + # TEST + is($attr->value, "bar", ' TODO : Add test name' ); + # TEST + is($attr->hasChildNodes, 0, ' TODO : Add test name'); + my $content = $attr->firstChild; + # TEST + ok( $content, ' TODO : Add test name' ); + # TEST + is( $content->nodeType, XML_TEXT_NODE, ' TODO : Add test name' ); + } + { + # bad attribute creation + # TEST:$badnames_count=5; + my @badnames = ( ";", "&", "<><", "/", "1A"); + + foreach my $name ( @badnames ) { + my $node = eval {$doc->createAttribute( $name, "bar" );}; + # TEST*$badnames_count + ok( !defined($node), ' TODO : Add test name' ); + } + + } + { + my $elem = $doc->createElement('foo'); + my $attr = $doc->createAttribute(attr => 'e & f'); + $elem->addChild($attr); + # TEST + ok ($elem->toString() eq '', ' TODO : Add test name'); + $elem->removeAttribute('attr'); + $attr = $doc->createAttributeNS(undef,'attr2' => 'a & b'); + $elem->addChild($attr); + # TEST + ok ($elem->toString() eq '', ' TODO : Add test name'); + } + { + eval { + my $attr = $doc->createAttributeNS("http://kungfoo", "kung:foo","bar"); + }; + # TEST + ok($@, ' TODO : Add test name'); + + my $root = $doc->createElement( "foo" ); + $doc->setDocumentElement( $root ); + + my $attr; + eval { + $attr = $doc->createAttributeNS("http://kungfoo", "kung:foo","bar"); + }; + # TEST + ok($attr, ' TODO : Add test name'); + # TEST + is($attr->nodeName, "kung:foo", ' TODO : Add test name'); + # TEST + is($attr->name,"foo", ' TODO : Add test name' ); + # TEST + is($attr->value, "bar", ' TODO : Add test name' ); + + $attr->setValue( q(bar&) ); + # TEST + is($attr->getValue, q(bar&), ' TODO : Add test name' ); + } + { + # bad attribute creation + # TEST:$badnames_count=5; + my @badnames = ( ";", "&", "<><", "/", "1A"); + + foreach my $name ( @badnames ) { + my $node = eval {$doc->createAttributeNS( undef, $name, "bar" );}; + # TEST*$badnames_count + ok( (!defined $node), ' TODO : Add test name' ); + } + + } + + # -> Create PIs + { + my $pi = $doc->createProcessingInstruction( "foo", "bar" ); + # TEST + ok($pi, ' TODO : Add test name'); + # TEST + is($pi->nodeType, XML_PI_NODE, ' TODO : Add test name'); + # TEST + is($pi->nodeName, "foo", ' TODO : Add test name'); + # TEST + is($pi->textContent, "bar", ' TODO : Add test name'); + # TEST + is($pi->getData, "bar", ' TODO : Add test name'); + } + + { + my $pi = $doc->createProcessingInstruction( "foo" ); + # TEST + ok($pi, ' TODO : Add test name'); + # TEST + is($pi->nodeType, XML_PI_NODE, ' TODO : Add test name'); + # TEST + is($pi->nodeName, "foo", ' TODO : Add test name'); + my $data = $pi->textContent; + # undef or "" depending on libxml2 version + # TEST + ok( is_empty_str($data), ' TODO : Add test name' ); + $data = $pi->getData; + # TEST + ok( is_empty_str($data), ' TODO : Add test name' ); + $pi->setData(q(bar&)); + # TEST + is( $pi->getData, q(bar&), ' TODO : Add test name'); + # TEST + is($pi->textContent, q(bar&), ' TODO : Add test name'); + } +} + +{ + # Document Manipulation + # -> Document Elements + + my $doc = XML::LibXML::Document->new(); + my $node = $doc->createElement( "foo" ); + $doc->setDocumentElement( $node ); + my $tn = $doc->documentElement; + # TEST + ok($tn, ' TODO : Add test name'); + # TEST + ok($node->isSameNode($tn), ' TODO : Add test name'); + + my $node2 = $doc->createElement( "bar" ); + { my $warn; + eval { + local $SIG{__WARN__} = sub { $warn = 1 }; + # TEST + ok( !defined($doc->appendChild($node2)), ' TODO : Add test name' ); + }; + # TEST + ok(($@ or $warn), ' TODO : Add test name'); + } + my @cn = $doc->childNodes; + # TEST + is( scalar(@cn) , 1, ' TODO : Add test name'); + # TEST + ok($cn[0]->isSameNode($node), ' TODO : Add test name'); + + eval { + $doc->insertBefore($node2, $node); + }; + # TEST + ok ($@, ' TODO : Add test name'); + @cn = $doc->childNodes; + # TEST + is( scalar(@cn) , 1, ' TODO : Add test name'); + # TEST + ok($cn[0]->isSameNode($node), ' TODO : Add test name'); + + $doc->removeChild($node); + @cn = $doc->childNodes; + # TEST + is( scalar(@cn) , 0, ' TODO : Add test name'); + + for ( 1..2 ) { + my $nodeA = $doc->createElement( "x" ); + $doc->setDocumentElement( $nodeA ); + } + # TEST + ok(1, ' TODO : Add test name'); # must not segfault here :) + + $doc->setDocumentElement( $node2 ); + @cn = $doc->childNodes; + # TEST + is( scalar(@cn) , 1, ' TODO : Add test name'); + # TEST + ok($cn[0]->isSameNode($node2), ' TODO : Add test name'); + + my $node3 = $doc->createElementNS( "http://foo", "bar" ); + # TEST + ok($node3, ' TODO : Add test name'); + + # -> Processing Instructions + { + my $pi = $doc->createProcessingInstruction( "foo", "bar" ); + $doc->appendChild( $pi ); + @cn = $doc->childNodes; + # TEST + ok( $pi->isSameNode($cn[-1]), ' TODO : Add test name' ); + $pi->setData( 'bar="foo"' ); + # TEST + is( $pi->textContent, 'bar="foo"', ' TODO : Add test name'); + $pi->setData( foo=>"foo" ); + # TEST + is( $pi->textContent, 'foo="foo"', ' TODO : Add test name'); + } +} + +package Stringify; + +use overload q[""] => sub { return 'foobarXbaz'; }; + +sub new +{ + return bless \(my $x); +} + +package main; + +{ + # Document Storing + my $parser = XML::LibXML->new; + my $doc = $parser->parse_string("bar"); + + # TEST + + ok( $doc, ' TODO : Add test name' ); + + # -> to file handle + + { + open my $fh, '>', 'example/testrun.xml' + or die "Cannot open example/testrun.xml for writing - $!."; + + $doc->toFH( $fh ); + $fh->close; + # TEST + ok(1, ' TODO : Add test name'); + # now parse the file to check, if succeeded + my $tdoc = $parser->parse_file( "example/testrun.xml" ); + # TEST + ok( $tdoc, ' TODO : Add test name' ); + # TEST + ok( $tdoc->documentElement, ' TODO : Add test name' ); + # TEST + is( $tdoc->documentElement->nodeName, "foo", ' TODO : Add test name' ); + # TEST + is( $tdoc->documentElement->textContent, "bar", ' TODO : Add test name' ); + unlink "example/testrun.xml" ; + } + + # -> to named file + { + $doc->toFile( "example/testrun.xml" ); + # TEST + ok(1, ' TODO : Add test name'); + # now parse the file to check, if succeeded + my $tdoc = $parser->parse_file( "example/testrun.xml" ); + # TEST + ok( $tdoc, ' TODO : Add test name' ); + # TEST + ok( $tdoc->documentElement, ' TODO : Add test name' ); + # TEST + is( $tdoc->documentElement->nodeName, "foo", ' TODO : Add test name' ); + # TEST + is( $tdoc->documentElement->textContent, "bar", ' TODO : Add test name' ); + unlink "example/testrun.xml" ; + } + + # ELEMENT LIKE FUNCTIONS + { + my $parser2 = XML::LibXML->new(); + my $string1 = ""; + my $string2 = ''; + my $string3 = ''; + my $string4 = ''; + my $string5 = 'foobarXbaz'; + { + my $doc2 = $parser2->parse_string($string1); + # TEST + _count_tag_name($doc2, 'A', 3, q{3 As}); + # TEST + _count_tag_name($doc2, '*', 5, q{5 elements of all names}); + + # TEST + _count_elements_by_name_ns($doc2, ['*', 'B'], 2, + '2 Bs of any namespace' + ); + + # TEST + _count_local_name($doc2, 'A', 3, q{3 A's}); + + # TEST + _count_local_name($doc2, '*', 5, q{5 Sub-elements}); + } + { + my $doc2 = $parser2->parse_string($string2); + # TEST + _count_tag_name( $doc2, 'C:A', 3, q{C:A count}); + # TEST + _count_elements_by_name_ns($doc2, [ "xml://D", "A" ], 3, + q{3 elements of namespace xml://D and A}, + ); + # TEST + _count_elements_by_name_ns($doc2, ['*', 'A'], 3, + q{3 Elements A of any namespace} + ); + # TEST + _count_local_name($doc2, 'A', 3, q{3 As}); + } + { + my $doc2 = $parser2->parse_string($string3); + # TEST + _count_elements_by_name_ns($doc2, ["xml://D", "A"], 3, + q{3 Elements A of any namespace} + ); + # TEST + _count_local_name($doc2, 'A', 3, q{3 As}); + } +=begin taken_out + # This was taken out because the XML uses an undefined namespace. + # I don't know why this test was introduced in the first place, + # but it fails now + # + # This test fails in this bug report - + # https://rt.cpan.org/Ticket/Display.html?id=75403 + # -- Shlomi Fish + { + $parser2->recover(1); + local $SIG{'__WARN__'} = sub { + print "warning caught: @_\n"; + }; + # my $doc2 = $parser2->parse_string($string4); + #-TEST + # _count_local_name( $doc2, 'A', 3, q{3 As}); + } +=end taken_out + +=cut + # TEST:$count=3; + # Also test that we can parse from scalar references: + # See RT #64051 ( https://rt.cpan.org/Ticket/Display.html?id=64051 ) + # Also test that we can parse from references to scalars with + # overloaded strings: + # See RT #77864 ( https://rt.cpan.org/Public/Bug/Display.html?id=77864 ) + + my $obj = Stringify->new; + + foreach my $input ( $string5, (\$string5), $obj ) + { + my $doc2 = $parser2->parse_string($input); + # TEST*$count + _count_tag_name($doc2, 'C:A', 1, q{3 C:As}); + # TEST*$count + _count_tag_name($doc2, 'A', 3, q{3 As}); + # TEST*$count + _count_elements_by_name_ns($doc2, ["*", "A"], 4, + q{4 Elements of A of any namespace} + ); + # TEST*$count + _count_elements_by_name_ns($doc2, ['*', '*'], 5, + q{4 Elements of any namespace}, + ); + # TEST*$count + _count_elements_by_name_ns( $doc2, ["xml://D", "*" ], 2, + q{2 elements of any name in D} + ); + + my $A = $doc2->getDocumentElement; + # TEST*$count + _count_children_by_name($A, 'A', 1, q{1 A}); + # TEST*$count + _count_children_by_name($A, 'C:A', 1, q{C:A}); + # TEST*$count + _count_children_by_name($A, 'C:B', 0, q{No C:B children}); + # TEST*$count + _count_children_by_name($A, "*", 2, q{2 Childern in $A in total}); + # TEST*$count + _count_children_by_name_ns($A, ['*', 'A'], 2, + q{2 As of any namespace}); + # TEST*$count + _count_children_by_name_ns($A, [ "xml://D", "*" ], 1, + q{1 Child of D}, + ); + # TEST*$count + _count_children_by_name_ns($A, [ "*", "*" ], 2, + q{2 Children in total}, + ); + # TEST*$count + _count_children_by_local_name($A, 'A', 2, q{2 As}); + } + } +} + +{ + # Bug fixes (to be used with valgrind) + { + my $doc=XML::LibXML->createDocument(); # create a doc + my $x=$doc->createPI(foo=>"bar"); # create a PI + undef $doc; # should not free + undef $x; # free the PI + # TEST + ok(1, ' TODO : Add test name'); + } + { + my $doc=XML::LibXML->createDocument(); # create a doc + my $x=$doc->createAttribute(foo=>"bar"); # create an attribute + undef $doc; # should not free + undef $x; # free the attribute + # TEST + ok(1, ' TODO : Add test name'); + } + { + my $doc=XML::LibXML->createDocument(); # create a doc + my $x=$doc->createAttributeNS(undef,foo=>"bar"); # create an attribute + undef $doc; # should not free + undef $x; # free the attribute + # TEST + ok(1, ' TODO : Add test name'); + } + { + my $doc=XML::LibXML->new->parse_string(''); + my $x=$doc->createAttributeNS('http://foo.bar','x:foo'=>"bar"); # create an attribute + undef $doc; # should not free + undef $x; # free the attribute + # TEST + ok(1, ' TODO : Add test name'); + } + { + # rt.cpan.org #30610 + # valgrind this + my $object=XML::LibXML::Element->new( 'object' ); + my $xml = qq(\n); + my $lom_doc=XML::LibXML->new->parse_string($xml); + my $lom_root=$lom_doc->getDocumentElement(); + $object->appendChild( $lom_root ); + # TEST + ok(!defined($object->firstChild->ownerDocument), ' TODO : Add test name'); + } +} + + +{ + my $xml = q{ + +}; + my $out = q{ + +}; + my $dom = XML::LibXML->new->parse_string($xml); + # TEST + is($dom->getEncoding, "UTF-8", ' TODO : Add test name'); + $dom->setEncoding(); + # TEST + is($dom->getEncoding, undef, ' TODO : Add test name'); + # TEST + is($dom->toString, $out, ' TODO : Add test name'); +} + +# the following tests were added for #33810 +SKIP: +{ + if (! eval { require Encode; }) + { + skip "Encoding related tests require Encode", (3*8); + } + # TEST:$num_encs=3; + # The count. + # TEST:$c=0; + for my $enc (qw(UTF-16 UTF-16LE UTF-16BE)) { + my $xml = Encode::encode($enc,qq{ + + }); + my $dom = XML::LibXML->new->parse_string($xml); + # TEST:$c++; + is($dom->getEncoding,$enc, ' TODO : Add test name'); + # TEST:$c++; + is($dom->actualEncoding,$enc, ' TODO : Add test name'); + # TEST:$c++; + is($dom->getDocumentElement->getAttribute('foo'),'bar', ' TODO : Add test name'); + # TEST:$c++; + is($dom->getDocumentElement->getAttribute(Encode::encode('UTF-16','foo')), 'bar', ' TODO : Add test name'); + # TEST:$c++; + is($dom->getDocumentElement->getAttribute(Encode::encode($enc,'foo')), 'bar', ' TODO : Add test name'); + my $exp_enc = $enc eq 'UTF-16' ? 'UTF-16LE' : $enc; + # TEST:$c++; + is($dom->getDocumentElement->getAttribute('foo',1), Encode::encode($exp_enc,'bar'), ' TODO : Add test name'); + # TEST:$c++; + is($dom->getDocumentElement->getAttribute(Encode::encode('UTF-16','foo'),1), Encode::encode($exp_enc,'bar'), ' TODO : Add test name'); + # TEST:$c++; + is($dom->getDocumentElement->getAttribute(Encode::encode($enc,'foo'),1), Encode::encode($exp_enc,'bar'), ' TODO : Add test name'); + } + # TEST*$num_encs*$c +} diff --git a/t/04node.t b/t/04node.t new file mode 100644 index 0000000..361e298 --- /dev/null +++ b/t/04node.t @@ -0,0 +1,725 @@ +# -*- cperl -*- +# $Id$ + +## +# this test checks the DOM Node interface of XML::LibXML +# it relies on the success of t/01basic.t and t/02parse.t + +# it will ONLY test the DOM capabilities as specified in DOM Level3 +# XPath tests should be done in another test file + +# since all tests are run on a preparsed + +# Should be 166. +use Test::More tests => 195; + +use XML::LibXML; +use XML::LibXML::Common qw(:libxml); +use strict; +use warnings; +my $xmlstring = q{bar}; + +my $parser = XML::LibXML->new(); +my $doc = $parser->parse_string( $xmlstring ); + +# 1 Standalone Without NameSpaces +# 1.1 Node Attributes + +{ + my $node = $doc->documentElement; + my $rnode; + + # TEST + + ok($node, ' TODO : Add test name'); + # TEST + is($node->nodeType, XML_ELEMENT_NODE, ' TODO : Add test name'); + # TEST + is($node->nodeName, "foo", ' TODO : Add test name'); + # TEST + ok(!defined( $node->nodeValue ), ' TODO : Add test name'); + # TEST + ok($node->hasChildNodes, ' TODO : Add test name'); + # TEST + is($node->textContent, "bar&foo bar", ' TODO : Add test name'); + + { + my @children = $node->childNodes; + # TEST + is( scalar @children, 5, ' TODO : Add test name' ); + # TEST + is( $children[0]->nodeType, XML_TEXT_NODE, ' TODO : Add test name' ); + # TEST + is( $children[0]->nodeValue, "bar", ' TODO : Add test name' ); + # TEST + is( $children[4]->nodeType, XML_CDATA_SECTION_NODE, ' TODO : Add test name' ); + # TEST + is( $children[4]->nodeValue, "&foo bar", ' TODO : Add test name' ); + + my $fc = $node->firstChild; + # TEST + ok( $fc, ' TODO : Add test name' ); + # TEST + ok( $fc->isSameNode($children[0]), ' TODO : Add test name'); + # TEST + ok( $fc->baseURI =~ /unknown-/, ' TODO : Add test name' ); + + my $od = $fc->ownerDocument; + # TEST + ok( $od, ' TODO : Add test name' ); + # TEST + ok( $od->isSameNode($doc), ' TODO : Add test name'); + + my $xc = $fc->nextSibling; + # TEST + ok( $xc, ' TODO : Add test name' ); + # TEST + ok( $xc->isSameNode($children[1]), ' TODO : Add test name' ); + + $fc = $node->lastChild; + # TEST + ok( $fc, ' TODO : Add test name' ); + # TEST + ok( $fc->isSameNode($children[4]), ' TODO : Add test name'); + + $xc = $fc->previousSibling; + # TEST + ok( $xc, ' TODO : Add test name' ); + # TEST + ok( $xc->isSameNode($children[3]), ' TODO : Add test name' ); + $rnode = $xc; + + $xc = $fc->parentNode; + # TEST + ok( $xc, ' TODO : Add test name' ); + # TEST + ok( $xc->isSameNode($node), ' TODO : Add test name' ); + + $xc = $children[2]; + { + # 1.2 Attribute Node + # TEST + ok( $xc->hasAttributes, ' TODO : Add test name' ); + my $attributes = $xc->attributes; + # TEST + ok( $attributes, ' TODO : Add test name' ); + # TEST + is( ref($attributes), "XML::LibXML::NamedNodeMap", ' TODO : Add test name' ); + # TEST + is( $attributes->length, 1, ' TODO : Add test name' ); + my $attr = $attributes->getNamedItem("foo"); + + # TEST + + ok( $attr, ' TODO : Add test name' ); + # TEST + is( $attr->nodeType, XML_ATTRIBUTE_NODE, ' TODO : Add test name' ); + # TEST + is( $attr->nodeName, "foo", ' TODO : Add test name' ); + # TEST + is( $attr->nodeValue, "foobar", ' TODO : Add test name' ); + # TEST + is( $attr->hasChildNodes, 0, ' TODO : Add test name'); + } + + { + my @attributes = $xc->attributes; + # TEST + is( scalar( @attributes ), 1, ' TODO : Add test name' ); + } + + # 1.2 Node Cloning + { + my $cnode = $doc->createElement("foo"); + $cnode->setAttribute('aaa','AAA'); + $cnode->setAttributeNS('http://ns','x:bbb','BBB'); + my $c1node = $doc->createElement("bar"); + $cnode->appendChild( $c1node ); + + my $xnode = $cnode->cloneNode(0); + # TEST + ok( $xnode, ' TODO : Add test name' ); + # TEST + is( $xnode->nodeName, "foo", ' TODO : Add test name' ); + # TEST + ok( ! $xnode->hasChildNodes, ' TODO : Add test name' ); + # TEST + is( $xnode->getAttribute('aaa'),'AAA', ' TODO : Add test name' ); + # TEST + is( $xnode->getAttributeNS('http://ns','bbb'),'BBB', ' TODO : Add test name' ); + + $xnode = $cnode->cloneNode(1); + # TEST + ok( $xnode, ' TODO : Add test name' ); + # TEST + is( $xnode->nodeName, "foo", ' TODO : Add test name' ); + # TEST + ok( $xnode->hasChildNodes, ' TODO : Add test name' ); + # TEST + is( $xnode->getAttribute('aaa'),'AAA', ' TODO : Add test name' ); + # TEST + is( $xnode->getAttributeNS('http://ns','bbb'),'BBB', ' TODO : Add test name' ); + + my @cn = $xnode->childNodes; + # TEST + ok( @cn, ' TODO : Add test name' ); + # TEST + is( scalar(@cn), 1, ' TODO : Add test name'); + # TEST + is( $cn[0]->nodeName, "bar", ' TODO : Add test name' ); + # TEST + ok( !$cn[0]->isSameNode( $c1node ), ' TODO : Add test name' ); + + # clone namespaced elements + my $nsnode = $doc->createElementNS( "fooNS", "foo:bar" ); + + my $cnsnode = $nsnode->cloneNode(0); + # TEST + is( $cnsnode->nodeName, "foo:bar", ' TODO : Add test name' ); + # TEST + ok( $cnsnode->localNS(), ' TODO : Add test name' ); + # TEST + is( $cnsnode->namespaceURI(), 'fooNS', ' TODO : Add test name' ); + + # clone namespaced elements (recursive) + my $c2nsnode = $nsnode->cloneNode(1); + # TEST + is( $c2nsnode->toString(), $nsnode->toString(), ' TODO : Add test name' ); + } + + # 1.3 Node Value + my $string2 = "barfoo"; + { + my $doc2 = $parser->parse_string( $string2 ); + my $root = $doc2->documentElement; + # TEST + ok( ! defined($root->nodeValue), ' TODO : Add test name' ); + # TEST + is( $root->textContent, "barfoo", ' TODO : Add test name'); + } + } + + { + my $children = $node->childNodes; + # TEST + ok( defined $children, ' TODO : Add test name' ); + # TEST + is( ref($children), "XML::LibXML::NodeList", ' TODO : Add test name' ); + } + + # 2. (Child) Node Manipulation + + # 2.1 Valid Operations + + { + # 2.1.1 Single Node + + my $inode = $doc->createElement("kungfoo"); # already tested + my $jnode = $doc->createElement("kungfoo"); + my $xn = $node->insertBefore($inode, $rnode); + # TEST + ok( $xn, ' TODO : Add test name' ); + # TEST + ok( $xn->isSameNode($inode), ' TODO : Add test name' ); + + + $node->insertBefore( $jnode, undef ); + my @ta = $node->childNodes(); + $xn = pop @ta; + # TEST + ok( $xn->isSameNode( $jnode ), ' TODO : Add test name' ); + $jnode->unbindNode; + + my @cn = $node->childNodes; + # TEST + is(scalar(@cn), 6, ' TODO : Add test name'); + # TEST + ok( $cn[3]->isSameNode($inode), ' TODO : Add test name' ); + + $xn = $node->removeChild($inode); + # TEST + ok($xn, ' TODO : Add test name'); + # TEST + ok($xn->isSameNode($inode), ' TODO : Add test name'); + + @cn = $node->childNodes; + # TEST + is(scalar(@cn), 5, ' TODO : Add test name'); + # TEST + ok( $cn[3]->isSameNode($rnode), ' TODO : Add test name' ); + + $xn = $node->appendChild($inode); + # TEST + ok($xn, ' TODO : Add test name'); + # TEST + ok($xn->isSameNode($inode), ' TODO : Add test name'); + # TEST + ok($xn->isSameNode($node->lastChild), ' TODO : Add test name'); + + $xn = $node->removeChild($inode); + # TEST + ok($xn, ' TODO : Add test name'); + # TEST + ok($xn->isSameNode($inode), ' TODO : Add test name'); + # TEST + ok($cn[-1]->isSameNode($node->lastChild), ' TODO : Add test name'); + + $xn = $node->replaceChild( $inode, $rnode ); + # TEST + ok($xn, ' TODO : Add test name'); + # TEST + ok($xn->isSameNode($rnode), ' TODO : Add test name'); + + my @cn2 = $node->childNodes; + # TEST + is(scalar(@cn), 5, ' TODO : Add test name'); + # TEST + ok( $cn2[3]->isSameNode($inode), ' TODO : Add test name' ); + } + + { + # insertAfter Tests + my $anode = $doc->createElement("a"); + my $bnode = $doc->createElement("b"); + my $cnode = $doc->createElement("c"); + my $dnode = $doc->createElement("d"); + + $anode->insertAfter( $bnode, undef ); + # TEST + is( $anode->toString(), '', ' TODO : Add test name' ); + + $anode->insertAfter( $dnode, undef ); + # TEST + is( $anode->toString(), '', ' TODO : Add test name' ); + + $anode->insertAfter( $cnode, $bnode ); + # TEST + is( $anode->toString(), '', ' TODO : Add test name' ); + + } + + { + my ($inode, $jnode ); + + $inode = $doc->createElement("kungfoo"); # already tested + $jnode = $doc->createElement("foobar"); + + my $xn = $inode->insertBefore( $jnode, undef); + # TEST + ok( $xn, ' TODO : Add test name' ); + # TEST + ok( $xn->isSameNode( $jnode ), ' TODO : Add test name' ); + } + + { + # 2.1.2 Document Fragment + + my @cn = $doc->documentElement->childNodes; + my $rnode= $doc->documentElement; + + my $frag = $doc->createDocumentFragment; + my $node1= $doc->createElement("kung"); + my $node2= $doc->createElement("foo"); + + $frag->appendChild($node1); + $frag->appendChild($node2); + + my $xn = $node->appendChild( $frag ); + # TEST + ok($xn, ' TODO : Add test name'); + my @cn2 = $node->childNodes; + # TEST + is(scalar(@cn2), 7, ' TODO : Add test name'); + # TEST + ok($cn2[-1]->isSameNode($node2), ' TODO : Add test name'); + # TEST + ok($cn2[-2]->isSameNode($node1), ' TODO : Add test name'); + + $frag->appendChild( $node1 ); + $frag->appendChild( $node2 ); + + @cn2 = $node->childNodes; + # TEST + is(scalar(@cn2), 5, ' TODO : Add test name'); + + $xn = $node->replaceChild( $frag, $cn[3] ); + # TEST + ok($xn, ' TODO : Add test name'); + # TEST + ok($xn->isSameNode($cn[3]), ' TODO : Add test name'); + @cn2 = $node->childNodes; + # TEST + is(scalar(@cn2), 6, ' TODO : Add test name'); + + $frag->appendChild( $node1 ); + $frag->appendChild( $node2 ); + + $xn = $node->insertBefore( $frag, $cn[0] ); + # TEST + ok($xn, ' TODO : Add test name'); + # TEST + ok($node1->isSameNode($node->firstChild), ' TODO : Add test name'); + @cn2 = $node->childNodes; + # TEST + is(scalar(@cn2), 6, ' TODO : Add test name'); + } + + # 2.2 Invalid Operations + + + # 2.3 DOM extensions + { + my $str = "com"; + my $doc = XML::LibXML->new->parse_string( $str ); + my $elem= $doc->documentElement; + # TEST + ok( $elem, ' TODO : Add test name' ); + # TEST + ok( $elem->hasChildNodes, ' TODO : Add test name' ); + $elem->removeChildNodes; + # TEST + is( $elem->hasChildNodes,0, ' TODO : Add test name' ); + $elem->toString; + } +} + +# 3 Standalone With NameSpaces + +{ + my $doc = XML::LibXML::Document->new(); + my $URI ="http://kungfoo"; + my $pre = "foo"; + my $name= "bar"; + + my $elem = $doc->createElementNS($URI, $pre.":".$name); + + # TEST + + ok($elem, ' TODO : Add test name'); + # TEST + is($elem->nodeName, $pre.":".$name, ' TODO : Add test name'); + # TEST + is($elem->namespaceURI, $URI, ' TODO : Add test name'); + # TEST + is($elem->prefix, $pre, ' TODO : Add test name'); + # TEST + is($elem->localname, $name, ' TODO : Add test name' ); + + # TEST + + is( $elem->lookupNamespacePrefix( $URI ), $pre, ' TODO : Add test name'); + # TEST + is( $elem->lookupNamespaceURI( $pre ), $URI, ' TODO : Add test name'); + + my @ns = $elem->getNamespaces; + # TEST + is( scalar(@ns) ,1, ' TODO : Add test name' ); +} + +# 4. Document swtiching + +{ + # 4.1 simple document + my $docA = XML::LibXML::Document->new; + { + my $docB = XML::LibXML::Document->new; + my $e1 = $docB->createElement( "A" ); + my $e2 = $docB->createElement( "B" ); + my $e3 = $docB->createElementNS( "http://kungfoo", "C:D" ); + $e1->appendChild( $e2 ); + $e1->appendChild( $e3 ); + + $docA->setDocumentElement( $e1 ); + } + my $elem = $docA->documentElement; + my @c = $elem->childNodes; + my $xroot = $c[0]->ownerDocument; + # TEST + ok( $xroot->isSameNode($docA), ' TODO : Add test name' ); + + +} + +# 5. libxml2 specials + +{ + my $docA = XML::LibXML::Document->new; + my $e1 = $docA->createElement( "A" ); + my $e2 = $docA->createElement( "B" ); + my $e3 = $docA->createElement( "C" ); + + $e1->appendChild( $e2 ); + my $x = $e2->replaceNode( $e3 ); + my @cn = $e1->childNodes; + # TEST + ok(@cn, ' TODO : Add test name'); + # TEST + is( scalar(@cn), 1, ' TODO : Add test name' ); + # TEST + ok($cn[0]->isSameNode($e3), ' TODO : Add test name'); + # TEST + ok($x->isSameNode($e2), ' TODO : Add test name'); + + $e3->addSibling( $e2 ); + @cn = $e1->childNodes; + # TEST + is( scalar(@cn), 2, ' TODO : Add test name' ); + # TEST + ok($cn[0]->isSameNode($e3), ' TODO : Add test name'); + # TEST + ok($cn[1]->isSameNode($e2), ' TODO : Add test name'); +} + +# 6. implicit attribute manipulation + +{ + my $parser = XML::LibXML->new(); + my $doc = $parser->parse_string( '' ); + my $root = $doc->documentElement; + my $attributes = $root->attributes; + # TEST + ok($attributes, ' TODO : Add test name'); + + my $newAttr = $doc->createAttribute( "kung", "foo" ); + $attributes->setNamedItem( $newAttr ); + + my @att = $root->attributes; + # TEST + ok(@att, ' TODO : Add test name'); + # TEST + is(scalar(@att), 2, ' TODO : Add test name'); + $newAttr = $doc->createAttributeNS( "http://kungfoo", "x:kung", "foo" ); + + $attributes->setNamedItem($newAttr); + @att = $root->attributes; + # TEST + ok(@att, ' TODO : Add test name'); + # TEST + is(scalar(@att), 4, ' TODO : Add test name'); # because of the namespace ... + + $newAttr = $doc->createAttributeNS( "http://kungfoo", "x:kung", "bar" ); + $attributes->setNamedItem($newAttr); + @att = $root->attributes; + # TEST + ok(@att, ' TODO : Add test name'); + # TEST + is(scalar(@att), 4, ' TODO : Add test name'); + # TEST + ok($att[2]->isSameNode($newAttr), ' TODO : Add test name'); + + $attributes->removeNamedItem("x:kung"); + + @att = $root->attributes; + # TEST + ok(@att, ' TODO : Add test name'); + # TEST + is(scalar(@att), 3, ' TODO : Add test name'); + # TEST + is($attributes->length, 3, ' TODO : Add test name'); +} + +# 7. importing and adopting + +{ + my $parser = XML::LibXML->new; + my $doc1 = $parser->parse_string( "bar" ); + my $doc2 = XML::LibXML::Document->new; + + # TEST + + ok( $doc1 && $doc2, ' TODO : Add test name' ); + my $rnode1 = $doc1->documentElement; + # TEST + ok( $rnode1, ' TODO : Add test name' ); + my $rnode2 = $doc2->importNode( $rnode1 ); + # TEST + ok( ! $rnode2->isSameNode( $rnode1 ), ' TODO : Add test name' ) ; + $doc2->setDocumentElement( $rnode2 ); + + my $node = $rnode2->cloneNode(0); + # TEST + ok( $node, ' TODO : Add test name' ); + my $cndoc = $node->ownerDocument; + # TEST + ok( $cndoc, ' TODO : Add test name' ); + # TEST + ok( $cndoc->isSameNode( $doc2 ), ' TODO : Add test name' ); + + my $xnode = XML::LibXML::Element->new("test"); + + my $node2 = $doc2->importNode($xnode); + # TEST + ok( $node2, ' TODO : Add test name' ); + my $cndoc2 = $node2->ownerDocument; + # TEST + ok( $cndoc2, ' TODO : Add test name' ); + # TEST + ok( $cndoc2->isSameNode( $doc2 ), ' TODO : Add test name' ); + + my $doc3 = XML::LibXML::Document->new; + my $node3 = $doc3->adoptNode( $xnode ); + # TEST + ok( $node3, ' TODO : Add test name' ); + # TEST + ok( $xnode->isSameNode( $node3 ), ' TODO : Add test name' ); + # TEST + ok( $doc3->isSameNode( $node3->ownerDocument ), ' TODO : Add test name' ); + + my $xnode2 = XML::LibXML::Element->new("test"); + $xnode2->setOwnerDocument( $doc3 ); # alternate version of adopt node + # TEST + ok( $xnode2->ownerDocument, ' TODO : Add test name' ); + # TEST + ok( $doc3->isSameNode( $xnode2->ownerDocument ), ' TODO : Add test name' ); +} + +{ + # appending empty fragment + my $doc = XML::LibXML::Document->new(); + my $frag = $doc->createDocumentFragment(); + my $root = $doc->createElement( 'foo' ); + my $r = $root->appendChild( $frag ); + # TEST + ok( $r, ' TODO : Add test name' ); +} + +{ + my $doc = XML::LibXML::Document->new('1.0', 'UTF-8'); + my $schema = $doc->createElement('sphinx:schema'); + eval { $schema->appendChild( $schema ) }; + # TEST + like ($@, qr/HIERARCHY_REQUEST_ERR/, + ' Thrown HIERARCHY_REQUEST_ERR exception' + ); +} + +{ + my $doc = XML::LibXML::Document->new('1.0', 'UTF-8'); + my $attr = $doc->createAttribute('test','bar'); + my $ent = $doc->createEntityReference('foo'); + my $text = $doc->createTextNode('baz'); + $attr->appendChild($ent); + $attr->appendChild($text); + # TEST + ok($attr->toString() eq ' test="bar&foo;baz"', ' TODO : Add test name'); +} + +{ + my $string = <<'EOF'; + + + + + + + + + text + +EOF + + # TEST:$count=2; + foreach my $arg_to_parse ($string, \$string) + { + my $doc = XML::LibXML->load_xml(string=>$arg_to_parse); + my $r = $doc->getDocumentElement; + # TEST*$count + ok($r, ' TODO : Add test name'); + my @nonblank = $r->nonBlankChildNodes; + # TEST*$count + is(join(',',map $_->nodeName,@nonblank), 'a,b,#comment,#cdata-section,foo,c,#text', ' TODO : Add test name' ); + # TEST*$count + is($r->firstChild->nodeName, '#text', ' TODO : Add test name'); + + my @all = $r->childNodes; + # TEST*$count + is(join(',',map $_->nodeName,@all), '#text,a,#text,b,#text,#cdata-section,#text,#comment,#text,#cdata-section,#text,foo,#text,c,#text', ' TODO : Add test name' ); + + my $f = $r->firstNonBlankChild; + my $p; + # TEST*$count + is($f->nodeName, 'a', ' TODO : Add test name'); + # TEST*$count + is($f->nextSibling->nodeName, '#text', ' TODO : Add test name'); + # TEST*$count + is($f->previousSibling->nodeName, '#text', ' TODO : Add test name'); + # TEST*$count + ok( !$f->previousNonBlankSibling, ' TODO : Add test name' ); + + $p = $f; + $f=$f->nextNonBlankSibling; + # TEST*$count + is($f->nodeName, 'b', ' TODO : Add test name'); + # TEST*$count + is($f->nextSibling->nodeName, '#text', ' TODO : Add test name'); + # TEST*$count + ok( $f->previousNonBlankSibling->isSameNode($p), ' TODO : Add test name' ); + + $p = $f; + $f=$f->nextNonBlankSibling; + # TEST*$count + ok($f->isa('XML::LibXML::Comment'), ' TODO : Add test name'); + # TEST*$count + is($f->nextSibling->nodeName, '#text', ' TODO : Add test name'); + # TEST*$count + ok( $f->previousNonBlankSibling->isSameNode($p), ' TODO : Add test name' ); + + $p = $f; + $f=$f->nextNonBlankSibling; + # TEST*$count + ok($f->isa('XML::LibXML::CDATASection'), ' TODO : Add test name'); + # TEST*$count + is($f->nextSibling->nodeName, '#text', ' TODO : Add test name'); + # TEST*$count + ok( $f->previousNonBlankSibling->isSameNode($p), ' TODO : Add test name' ); + + $p = $f; + $f=$f->nextNonBlankSibling; + # TEST*$count + ok($f->isa('XML::LibXML::PI'), ' TODO : Add test name'); + # TEST*$count + is($f->nextSibling->nodeName, '#text', ' TODO : Add test name'); + # TEST*$count + ok( $f->previousNonBlankSibling->isSameNode($p), ' TODO : Add test name' ); + + $p = $f; + $f=$f->nextNonBlankSibling; + # TEST*$count + is($f->nodeName, 'c', ' TODO : Add test name'); + # TEST*$count + is($f->nextSibling->nodeName, '#text', ' TODO : Add test name'); + # TEST*$count + ok( $f->previousNonBlankSibling->isSameNode($p), ' TODO : Add test name' ); + + $p = $f; + $f=$f->nextNonBlankSibling; + # TEST*$count + is($f->nodeName, '#text', ' TODO : Add test name'); + # TEST*$count + is($f->nodeValue, "\n text\n", ' TODO : Add test name'); + # TEST*$count + ok(!$f->nextSibling, ' TODO : Add test name'); + # TEST*$count + ok( $f->previousNonBlankSibling->isSameNode($p), ' TODO : Add test name' ); + + $f=$f->nextNonBlankSibling; + # TEST*$count + ok(!defined $f, ' TODO : Add test name'); + + } +} + +{ + # RT #94149 + # https://rt.cpan.org/Ticket/Display.html?id=94149 + + my $orig = XML::LibXML::Text->new('Double '); + my $ret = $orig->addSibling(XML::LibXML::Text->new('Free')); + # TEST + is( $ret->textContent, 'Double Free', 'merge text nodes with addSibling' ); +} + diff --git a/t/05text.t b/t/05text.t new file mode 100644 index 0000000..f6a91cd --- /dev/null +++ b/t/05text.t @@ -0,0 +1,323 @@ +# $Id$ + +## +# this test checks the DOM Characterdata interface of XML::LibXML + +use strict; +use warnings; + +use Test::More tests => 59; + +use XML::LibXML; + +my $doc = XML::LibXML::Document->new(); + +{ + # 1. creation + my $foo = "foobar"; + my $textnode = $doc->createTextNode($foo); + # TEST + ok( $textnode, 'creation 1'); + # TEST + is( $textnode->nodeName(), '#text', 'creation 2'); + # TEST + is( $textnode->nodeValue(), $foo, 'creation 3',); + + { + # Test for https://rt.cpan.org/Ticket/Display.html?id=112470 + my @attributes = $textnode->attributes(); + # TEST + is_deeply( + (\@attributes), + [], + '::Text->attributes() returns an empty list in list context (RT#112470)', + ); + } + + # 2. substring + my $tnstr = $textnode->substringData( 1,2 ); + # TEST + is( $tnstr , "oo", 'substring 1'); + $tnstr = $textnode->substringData( 0,3 ); + # TEST + is( $tnstr , "foo", 'substring 2'); + # TEST + is( $textnode->nodeValue(), $foo, 'substring - text node unchanged' ); + + # 3. Expansion + $textnode->appendData( $foo ); + # TEST + is( $textnode->nodeValue(), $foo . $foo, 'expansion 1'); + + $textnode->insertData( 6, "FOO" ); + # TEST + is( $textnode->nodeValue(), $foo."FOO".$foo, 'expansion 2' ); + + $textnode->setData( $foo ); + $textnode->insertData( 6, "FOO" ); + # TEST + is( $textnode->nodeValue(), $foo."FOO", 'expansion 3'); + $textnode->setData( $foo ); + $textnode->insertData( 3, "" ); + # TEST + is( $textnode->nodeValue(), $foo, 'Empty insertion does not change value'); + + # 4. Removal + $textnode->deleteData( 1,2 ); + # TEST + is( $textnode->nodeValue(), "fbar", 'Removal 1'); + $textnode->setData( $foo ); + $textnode->deleteData( 1,10 ); + # TEST + is( $textnode->nodeValue(), "f", 'Removal 2'); + $textnode->setData( $foo ); + $textnode->deleteData( 10,1 ); + # TEST + is( $textnode->nodeValue(), $foo, 'Removal 3'); + $textnode->deleteData( 1,0 ); + # TEST + is( $textnode->nodeValue(), $foo, 'Removal 4'); + $textnode->deleteData( 0,0 ); + # TEST + is( $textnode->nodeValue(), $foo, 'Removal 5'); + $textnode->deleteData( 0,2 ); + # TEST + is( $textnode->nodeValue(), "obar", 'Removal 6'); + + # 5. Replacement + $textnode->setData( "test" ); + $textnode->replaceData( 1,2, "phish" ); + # TEST + is( $textnode->nodeValue(), "tphisht", 'Replacement 1'); + $textnode->setData( "test" ); + $textnode->replaceData( 1,4, "phish" ); + # TEST + is( $textnode->nodeValue(), "tphish", 'Replacement 2'); + $textnode->setData( "test" ); + $textnode->replaceData( 1,0, "phish" ); + # TEST + is( $textnode->nodeValue(), "tphishest", 'Replacement 3'); + + + # 6. XML::LibXML features + $textnode->setData( "test" ); + + $textnode->replaceDataString( "es", "new" ); + # TEST + is( $textnode->nodeValue(), "tnewt", 'replaceDataString() 1'); + + $textnode->replaceDataRegEx( 'n(.)w', '$1s' ); + # TEST + is( $textnode->nodeValue(), "test", 'replaceDataRegEx() 2'); + + $textnode->setData( "blue phish, white phish, no phish" ); + $textnode->replaceDataRegEx( 'phish', 'test' ); + # TEST + is( $textnode->nodeValue(), "blue test, white phish, no phish", + 'replaceDataRegEx 3',); + + # replace them all! + $textnode->replaceDataRegEx( 'phish', 'test', 'g' ); + # TEST + is( $textnode->nodeValue(), "blue test, white test, no test", + 'replaceDataRegEx g',); + + # check if special chars are encoded properly + $textnode->setData( "te?st" ); + $textnode->replaceDataString( "e?s", 'ne\w' ); + # TEST + is( $textnode->nodeValue(), 'tne\wt', ' TODO : Add test name' ); + + # check if "." is encoded properly + $textnode->setData( "h.thrt"); + $textnode->replaceDataString( "h.t", 'new', 1 ); + # TEST + is( $textnode->nodeValue(), 'newhrt', ' TODO : Add test name' ); + + # check if deleteDataString does not delete dots. + $textnode->setData( 'hitpit' ); + $textnode->deleteDataString( 'h.t' ); + # TEST + is( $textnode->nodeValue(), 'hitpit', ' TODO : Add test name' ); + + # check if deleteDataString works + $textnode->setData( 'hitpithit' ); + $textnode->deleteDataString( 'hit' ); + # TEST + is( $textnode->nodeValue(), 'pithit', ' TODO : Add test name' ); + + # check if deleteDataString all works + $textnode->setData( 'hitpithit' ); + $textnode->deleteDataString( 'hit', 1 ); + # TEST + is( $textnode->nodeValue(), 'pit', ' TODO : Add test name' ); + + # check if entities don't get translated + $textnode->setData(q(foo&bar)); + # TEST + is ( $textnode->getData(), q(foo&bar), ' TODO : Add test name' ); +} + +{ + # UTF-8 tests + + my $test_str = "te\xDFt"; + # Latin1 strings still fail. + utf8::upgrade($test_str); + + # 1. creation + my $textnode = $doc->createTextNode($test_str); + # TEST + ok( $textnode, 'UTF-8 creation 1'); + # TEST + is( $textnode->nodeValue(), $test_str, 'UTF-8 creation 2',); + my $foo_str = "\x{0444}oo\x{0431}ar"; + $textnode = $doc->createTextNode($foo_str); + # TEST + ok( $textnode, 'UTF-8 creation 3'); + # TEST + is( $textnode->nodeValue(), $foo_str, 'UTF-8 creation 4',); + + # 2. substring + my $tnstr = $textnode->substringData( 1,2 ); + # TEST + is( $tnstr , "oo", 'UTF-8 substring 1'); + $tnstr = $textnode->substringData( 0,3 ); + # TEST + is( $tnstr , "\x{0444}oo", 'UTF-8 substring 2'); + + # 3. Expansion + $textnode->appendData( $foo_str ); + # TEST + is( $textnode->nodeValue(), $foo_str . $foo_str, 'UTF-8 expansion 1'); + + my $ins_str = "\x{0424}OO"; + $textnode->insertData( 6, $ins_str ); + # TEST + is( $textnode->nodeValue(), $foo_str.$ins_str.$foo_str, + 'UTF-8 expansion 2' ); + + $textnode->setData( $foo_str ); + $textnode->insertData( 6, $ins_str ); + # TEST + is( $textnode->nodeValue(), $foo_str.$ins_str, 'UTF-8 expansion 3'); + + # 4. Removal + $textnode->setData( $foo_str ); + $textnode->deleteData( 1,3 ); + # TEST + is( $textnode->nodeValue(), "\x{0444}ar", 'UTF-8 Removal 1'); + $textnode->setData( $foo_str ); + $textnode->deleteData( 1,10 ); + # TEST + is( $textnode->nodeValue(), "\x{0444}", 'UTF-8 Removal 2'); + $textnode->setData( $foo_str ); + $textnode->deleteData( 6,100 ); + # TEST + is( $textnode->nodeValue(), $foo_str, 'UTF-8 Removal 3'); + + # 5. Replacement + my $phish_str = "ph\x{2160}sh"; + $textnode->setData( $test_str ); + $textnode->replaceData( 1,2, $phish_str ); + # TEST + is( $textnode->nodeValue(), "t".$phish_str."t", 'UTF-8 Replacement 1'); + $textnode->setData( $test_str ); + $textnode->replaceData( 1,4, $phish_str ); + # TEST + is( $textnode->nodeValue(), "t".$phish_str, 'UTF-8 Replacement 2'); + $textnode->setData( $test_str ); + $textnode->replaceData( 1,0, $phish_str ); + # TEST + is( $textnode->nodeValue(), "t".$phish_str."e\xDFt", + 'UTF-8 Replacement 3'); + + # 6. XML::LibXML features + $textnode->setData( $test_str ); + + my $new_str = "n\x{1D522}w"; + $textnode->replaceDataString( "e\xDF", $new_str ); + # TEST + is( $textnode->nodeValue(), "t".$new_str."t", + 'UTF-8 replaceDataString() 1'); + + $textnode->replaceDataRegEx( 'n(.)w', '$1s' ); + # TEST + is( $textnode->nodeValue(), "t\x{1D522}st", 'UTF-8 replaceDataRegEx() 2'); + + $textnode->setData( "blue $phish_str, white $phish_str, no $phish_str" ); + $textnode->replaceDataRegEx( $phish_str, $test_str ); + # TEST + is( $textnode->nodeValue(), + "blue $test_str, white $phish_str, no $phish_str", + 'UTF-8 replaceDataRegEx 3',); + + # replace them all! + $textnode->replaceDataRegEx( $phish_str, $test_str, 'g' ); + # TEST + is( $textnode->nodeValue(), + "blue $test_str, white $test_str, no $test_str", + 'UTF-8 replaceDataRegEx g',); + + # check if deleteDataString works + my $hit_str = "hi\x{1D54B}"; + my $pit_str = "\x{2119}it"; + $textnode->setData( "$hit_str$pit_str$hit_str" ); + $textnode->deleteDataString( $hit_str ); + # TEST + is( $textnode->nodeValue(), "$pit_str$hit_str", 'UTF-8 deleteDataString 1' ); + + # check if deleteDataString all works + $textnode->setData( "$hit_str$pit_str$hit_str" ); + $textnode->deleteDataString( $hit_str, 1 ); + # TEST + is( $textnode->nodeValue(), $pit_str, 'UTF-8 deleteDataString 2' ); +} + +{ + # standalone test + my $node = XML::LibXML::Text->new("foo"); + # TEST + ok($node, ' TODO : Add test name'); + # TEST + is($node->nodeValue, "foo", ' TODO : Add test name' ); +} + +{ + # CDATA node name test + + my $node = XML::LibXML::CDATASection->new("test"); + + # TEST + is( $node->string_value(), "test", ' TODO : Add test name' ); + # TEST + is( $node->nodeName(), "#cdata-section", ' TODO : Add test name' ); +} + +{ + # Comment node name test + + my $node = XML::LibXML::Comment->new("test"); + + # TEST + is( $node->string_value(), "test", ' TODO : Add test name' ); + # TEST + is( $node->nodeName(), "#comment", ' TODO : Add test name' ); +} + +{ + # Document node name test + + my $node = XML::LibXML::Document->new(); + + # TEST + is( $node->nodeName(), "#document", ' TODO : Add test name' ); +} +{ + # Document fragment node name test + + my $node = XML::LibXML::DocumentFragment->new(); + + # TEST + is( $node->nodeName(), "#document-fragment", ' TODO : Add test name' ); +} diff --git a/t/06elements.t b/t/06elements.t new file mode 100644 index 0000000..84fedbb --- /dev/null +++ b/t/06elements.t @@ -0,0 +1,568 @@ +# $Id$ + +## +# this test checks the DOM element and attribute interface of XML::LibXML + +use strict; +use warnings; + +# Should be 187. +use Test::More tests => 191; + +use XML::LibXML; + +my $foo = "foo"; +my $bar = "bar"; +my $nsURI = "http://foo"; +my $prefix = "x"; +my $attname1 = "A"; +my $attvalue1 = "a"; +my $attname2 = "B"; +my $attvalue2 = "b"; +my $attname3 = "C"; + +# TEST:$badnames=4; +my @badnames= ("1A", "<><", "&", "-:"); + +# 1. bound node +{ + my $doc = XML::LibXML::Document->new(); + my $elem = $doc->createElement( $foo ); + # TEST + ok($elem, ' TODO : Add test name'); + # TEST + is($elem->tagName, $foo, ' TODO : Add test name'); + + { + foreach my $name ( @badnames ) { + eval { $elem->setNodeName( $name ); }; + # TEST*$badnames + ok( $@, "setNodeName throws an exception for $name" ); + } + } + + $elem->setAttribute( $attname1, $attvalue1 ); + # TEST + ok( $elem->hasAttribute($attname1), ' TODO : Add test name' ); + # TEST + is( $elem->getAttribute($attname1), $attvalue1, ' TODO : Add test name'); + + my $attr = $elem->getAttributeNode($attname1); + # TEST + ok($attr, ' TODO : Add test name'); + # TEST + is($attr->name, $attname1, ' TODO : Add test name'); + # TEST + is($attr->value, $attvalue1, ' TODO : Add test name'); + + $elem->setAttribute( $attname1, $attvalue2 ); + # TEST + is($elem->getAttribute($attname1), $attvalue2, ' TODO : Add test name'); + # TEST + is($attr->value, $attvalue2, ' TODO : Add test name'); + + my $attr2 = $doc->createAttribute($attname2, $attvalue1); + # TEST + ok($attr2, ' TODO : Add test name'); + + $elem->setAttributeNode($attr2); + # TEST + ok($elem->hasAttribute($attname2), ' TODO : Add test name' ); + # TEST + is($elem->getAttribute($attname2),$attvalue1, ' TODO : Add test name'); + + my $tattr = $elem->getAttributeNode($attname2); + # TEST + ok($tattr->isSameNode($attr2), ' TODO : Add test name'); + + $elem->setAttribute($attname2, ""); + # TEST + ok($elem->hasAttribute($attname2), ' TODO : Add test name' ); + # TEST + is($elem->getAttribute($attname2), "", ' TODO : Add test name'); + + $elem->setAttribute($attname3, ""); + # TEST + ok($elem->hasAttribute($attname3), ' TODO : Add test name' ); + # TEST + is($elem->getAttribute($attname3), "", ' TODO : Add test name'); + + { + foreach my $name ( @badnames ) { + eval {$elem->setAttribute( $name, "X" );}; + # TEST*$badnames + ok( $@, "setAttribute throws an exxception for '$name'" ); + } + + } + + + # 1.1 Namespaced Attributes + + $elem->setAttributeNS( $nsURI, $prefix . ":". $foo, $attvalue2 ); + # TEST + ok( $elem->hasAttributeNS( $nsURI, $foo ), ' TODO : Add test name' ); + # TEST + ok( ! $elem->hasAttribute( $foo ), ' TODO : Add test name' ); + # TEST + ok( $elem->hasAttribute( $prefix.":".$foo ), ' TODO : Add test name' ); + # warn $elem->toString() , "\n"; + $tattr = $elem->getAttributeNodeNS( $nsURI, $foo ); + # TEST + ok($tattr, ' TODO : Add test name'); + # TEST + is($tattr->name, $foo, ' TODO : Add test name'); + # TEST + is($tattr->nodeName, $prefix .":".$foo, ' TODO : Add test name'); + # TEST + is($tattr->value, $attvalue2, ' TODO : Add test name' ); + + $elem->removeAttributeNode( $tattr ); + # TEST + ok( !$elem->hasAttributeNS($nsURI, $foo), ' TODO : Add test name' ); + + + # empty NS + $elem->setAttributeNS( '', $foo, $attvalue2 ); + # TEST + ok( $elem->hasAttribute( $foo ), ' TODO : Add test name' ); + $tattr = $elem->getAttributeNode( $foo ); + # TEST + ok($tattr, ' TODO : Add test name'); + # TEST + is($tattr->name, $foo, ' TODO : Add test name'); + # TEST + is($tattr->nodeName, $foo, ' TODO : Add test name'); + # TEST + ok(!defined($tattr->namespaceURI), ' TODO : Add test name'); + # TEST + is($tattr->value, $attvalue2, ' TODO : Add test name' ); + + # TEST + + ok($elem->hasAttribute($foo) == 1, ' TODO : Add test name'); + # TEST + ok($elem->hasAttributeNS(undef, $foo) == 1, ' TODO : Add test name'); + # TEST + ok($elem->hasAttributeNS('', $foo) == 1, ' TODO : Add test name'); + + $elem->removeAttributeNode( $tattr ); + # TEST + ok( !$elem->hasAttributeNS('', $foo), ' TODO : Add test name' ); + # TEST + ok( !$elem->hasAttributeNS(undef, $foo), ' TODO : Add test name' ); + + # node based functions + my $e2 = $doc->createElement($foo); + $doc->setDocumentElement($e2); + my $nsAttr = $doc->createAttributeNS( $nsURI.".x", $prefix . ":". $foo, $bar); + # TEST + ok( $nsAttr, ' TODO : Add test name' ); + $elem->setAttributeNodeNS($nsAttr); + # TEST + ok( $elem->hasAttributeNS($nsURI.".x", $foo), ' TODO : Add test name' ); + $elem->removeAttributeNS( $nsURI.".x", $foo); + # TEST + ok( !$elem->hasAttributeNS($nsURI.".x", $foo), ' TODO : Add test name' ); + + # warn $elem->toString; + $elem->setAttributeNS( $nsURI, $prefix . ":". $attname1, $attvalue2 ); + # warn $elem->toString; + + + $elem->removeAttributeNS("",$attname1); + # warn $elem->toString; + + # TEST + + ok( ! $elem->hasAttribute($attname1), ' TODO : Add test name' ); + # TEST + ok( $elem->hasAttributeNS($nsURI,$attname1), ' TODO : Add test name' ); + # warn $elem->toString; + + { + foreach my $name ( @badnames ) { + eval {$elem->setAttributeNS( undef, $name, "X" );}; + # TEST*$badnames + ok( $@, "setAttributeNS throws an exception for '$name'"); + } + } +} + +# 2. unbound node +{ + my $elem = XML::LibXML::Element->new($foo); + # TEST + ok($elem, ' TODO : Add test name'); + # TEST + is($elem->tagName, $foo, ' TODO : Add test name'); + + $elem->setAttribute( $attname1, $attvalue1 ); + # TEST + ok( $elem->hasAttribute($attname1), ' TODO : Add test name' ); + # TEST + is( $elem->getAttribute($attname1), $attvalue1, ' TODO : Add test name'); + + my $attr = $elem->getAttributeNode($attname1); + # TEST + ok($attr, ' TODO : Add test name'); + # TEST + is($attr->name, $attname1, ' TODO : Add test name'); + # TEST + is($attr->value, $attvalue1, ' TODO : Add test name'); + + $elem->setAttributeNS( $nsURI, $prefix . ":". $foo, $attvalue2 ); + # TEST + ok( $elem->hasAttributeNS( $nsURI, $foo ), ' TODO : Add test name' ); + # warn $elem->toString() , "\n"; + my $tattr = $elem->getAttributeNodeNS( $nsURI, $foo ); + # TEST + ok($tattr, ' TODO : Add test name'); + # TEST + is($tattr->name, $foo, ' TODO : Add test name'); + # TEST + is($tattr->nodeName, $prefix .":".$foo, ' TODO : Add test name'); + # TEST + is($tattr->value, $attvalue2, ' TODO : Add test name' ); + + $elem->removeAttributeNode( $tattr ); + # TEST + ok( !$elem->hasAttributeNS($nsURI, $foo), ' TODO : Add test name' ); + # warn $elem->toString() , "\n"; +} + +# 3. Namespace handling +# 3.1 Namespace switching +{ + my $elem = XML::LibXML::Element->new($foo); + # TEST + ok($elem, ' TODO : Add test name'); + + my $doc = XML::LibXML::Document->new(); + my $e2 = $doc->createElement($foo); + $doc->setDocumentElement($e2); + my $nsAttr = $doc->createAttributeNS( $nsURI, $prefix . ":". $foo, $bar); + # TEST + ok( $nsAttr, ' TODO : Add test name' ); + + $elem->setAttributeNodeNS($nsAttr); + # TEST + ok( $elem->hasAttributeNS($nsURI, $foo), ' TODO : Add test name' ); + + # TEST + ok( ! defined $nsAttr->ownerDocument, ' TODO : Add test name'); + # warn $elem->toString() , "\n"; +} + +# 3.2 default Namespace and Attributes +{ + my $doc = XML::LibXML::Document->new(); + my $elem = $doc->createElementNS( "foo", "root" ); + $doc->setDocumentElement( $elem ); + + $elem->setNamespace( "foo", "bar" ); + + $elem->setAttributeNS( "foo", "x:attr", "test" ); + $elem->setAttributeNS( undef, "attr2", "test" ); + + # TEST + + is( $elem->getAttributeNS( "foo", "attr" ), "test", ' TODO : Add test name' ); + # TEST + is( $elem->getAttributeNS( "", "attr2" ), "test", ' TODO : Add test name' ); + + # warn $doc->toString; + # actually this doesn't work correctly with libxml2 <= 2.4.23 + $elem->setAttributeNS( "foo", "attr2", "bar" ); + # TEST + is( $elem->getAttributeNS( "foo", "attr2" ), "bar", ' TODO : Add test name' ); + # warn $doc->toString; +} + +# 4. Text Append and Normalization +# 4.1 Normalization on an Element node +{ + my $doc = XML::LibXML::Document->new(); + my $t1 = $doc->createTextNode( "bar1" ); + my $t2 = $doc->createTextNode( "bar2" ); + my $t3 = $doc->createTextNode( "bar3" ); + my $e = $doc->createElement("foo"); + my $e2 = $doc->createElement("bar"); + $e->appendChild( $e2 ); + $e->appendChild( $t1 ); + $e->appendChild( $t2 ); + $e->appendChild( $t3 ); + + my @cn = $e->childNodes; + + # this is the correct behaviour for DOM. the nodes are still + # referred + # TEST + is( scalar( @cn ), 4, ' TODO : Add test name' ); + + $e->normalize; + + @cn = $e->childNodes; + # TEST + is( scalar( @cn ), 2, ' TODO : Add test name' ); + + # TEST + + ok(! defined $t2->parentNode, ' TODO : Add test name'); + # TEST + ok(! defined $t3->parentNode, ' TODO : Add test name'); +} + +# 4.2 Normalization on a Document node +{ + my $doc = XML::LibXML::Document->new(); + my $t1 = $doc->createTextNode( "bar1" ); + my $t2 = $doc->createTextNode( "bar2" ); + my $t3 = $doc->createTextNode( "bar3" ); + my $e = $doc->createElement("foo"); + my $e2 = $doc->createElement("bar"); + $doc->setDocumentElement($e); + $e->appendChild( $e2 ); + $e->appendChild( $t1 ); + $e->appendChild( $t2 ); + $e->appendChild( $t3 ); + + my @cn = $e->childNodes; + + # this is the correct behaviour for DOM. the nodes are still + # referred + # TEST + is( scalar( @cn ), 4, ' TODO : Add test name' ); + + $doc->normalize; + + @cn = $e->childNodes; + # TEST + is( scalar( @cn ), 2, ' TODO : Add test name' ); + + # TEST + + ok(! defined $t2->parentNode, ' TODO : Add test name'); + # TEST + ok(! defined $t3->parentNode, ' TODO : Add test name'); +} + + +# 5. XML::LibXML extensions +{ + my $plainstring = "foo"; + my $stdentstring= "$foo & this"; + + my $doc = XML::LibXML::Document->new(); + my $elem = $doc->createElement( $foo ); + $doc->setDocumentElement( $elem ); + + $elem->appendText( $plainstring ); + # TEST + is( $elem->string_value , $plainstring, ' TODO : Add test name' ); + + $elem->appendText( $stdentstring ); + # TEST + is( $elem->string_value , $plainstring.$stdentstring, ' TODO : Add test name' ); + + $elem->appendTextChild( "foo"); + $elem->appendTextChild( "foo" => "foo&bar" ); + + my @cn = $elem->childNodes; + # TEST + ok( scalar(@cn), ' TODO : Add test name' ); + # TEST + is( scalar(@cn), 3, ' TODO : Add test name' ); + # TEST + ok( !$cn[1]->hasChildNodes, ' TODO : Add test name'); + # TEST + ok( $cn[2]->hasChildNodes, ' TODO : Add test name'); +} + +# 6. XML::LibXML::Attr nodes +{ + my $dtd = <<'EOF'; + + + + + +]> +EOF + my $ns = q(urn:xx); + my $xml_nons = qq(); + my $xml_ns = qq(); + + # TEST:$xml=2; + for my $xml ($xml_nons, $xml_ns) { + my $parser = new XML::LibXML; + $parser->complete_attributes(0); + $parser->expand_entities(0); + my $doc = $parser->parse_string($dtd.$xml); + + # TEST*$xml + + ok ($doc, ' TODO : Add test name'); + my $root = $doc->getDocumentElement; + { + my $attr = $root->getAttributeNode('foo'); + # TEST*$xml + ok ($attr, ' TODO : Add test name'); + # TEST*$xml + is (ref($attr), 'XML::LibXML::Attr', ' TODO : Add test name'); + # TEST*$xml + ok ($root->isSameNode($attr->ownerElement), ' TODO : Add test name'); + # TEST*$xml + is ($attr->value, '"barENT"', ' TODO : Add test name'); + # TEST*$xml + is ($attr->serializeContent, '"bar&ent;"', ' TODO : Add test name'); + # TEST*$xml + is ($attr->toString, ' foo=""bar&ent;""', ' TODO : Add test name'); + } + { + my $attr = $root->getAttributeNodeNS(undef,'foo'); + # TEST*$xml + ok ($attr, ' TODO : Add test name'); + # TEST*$xml + is (ref($attr), 'XML::LibXML::Attr', ' TODO : Add test name'); + # TEST*$xml + ok ($root->isSameNode($attr->ownerElement), ' TODO : Add test name'); + # TEST*$xml + is ($attr->value, '"barENT"', ' TODO : Add test name'); + } + # fixed values are defined + # TEST*$xml + is ($root->getAttribute('fixed'),'foo', ' TODO : Add test name'); + + SKIP: + { + if (XML::LibXML::LIBXML_VERSION() < 20627) + { + skip('skipping for libxml2 < 2.6.27', 1); + } + # TEST*$xml + is($root->getAttributeNS($ns,'ns_fixed'),'ns_foo', 'ns_fixed is ns_foo') + } + + # TEST*$xml + is ($root->getAttribute('a:ns_fixed'),'ns_foo', ' TODO : Add test name'); + + # TEST*$xml + + is ($root->hasAttribute('fixed'),0, ' TODO : Add test name'); + # TEST*$xml + is ($root->hasAttributeNS($ns,'ns_fixed'),0, ' TODO : Add test name'); + # TEST*$xml + is ($root->hasAttribute('a:ns_fixed'),0, ' TODO : Add test name'); + + + # but no attribute nodes correspond to them + # TEST*$xml + ok (!defined $root->getAttributeNode('a:ns_fixed'), ' TODO : Add test name'); + # TEST*$xml + ok (!defined $root->getAttributeNode('fixed'), ' TODO : Add test name'); + # TEST*$xml + ok (!defined $root->getAttributeNode('name'), ' TODO : Add test name'); + # TEST*$xml + ok (!defined $root->getAttributeNode('baz'), ' TODO : Add test name'); + # TEST*$xml + ok (!defined $root->getAttributeNodeNS($ns,'foo'), ' TODO : Add test name'); + # TEST*$xml + ok (!defined $root->getAttributeNodeNS($ns,'fixed'), ' TODO : Add test name'); + # TEST*$xml + ok (!defined $root->getAttributeNodeNS($ns,'ns_fixed'), ' TODO : Add test name'); + # TEST*$xml + ok (!defined $root->getAttributeNodeNS(undef,'fixed'), ' TODO : Add test name'); + # TEST*$xml + ok (!defined $root->getAttributeNodeNS(undef,'name'), ' TODO : Add test name'); + # TEST*$xml + ok (!defined $root->getAttributeNodeNS(undef,'baz'), ' TODO : Add test name'); + } + + # TEST:$xml=2; + { + my @names = ("nons", "ns"); + for my $xml ($xml_nons, $xml_ns) { + my $n = shift(@names); + my $parser = new XML::LibXML; + $parser->complete_attributes(1); + $parser->expand_entities(1); + my $doc = $parser->parse_string($dtd.$xml); + # TEST*$xml + ok ($doc, "Could parse document $n"); + my $root = $doc->getDocumentElement; + { + my $attr = $root->getAttributeNode('foo'); + # TEST*$xml + ok ($attr, "Attribute foo exists for $n"); + # TEST*$xml + isa_ok ($attr, 'XML::LibXML::Attr', + "Attribute is of type XML::LibXML::Attr - $n"); + # TEST*$xml + ok ($root->isSameNode($attr->ownerElement), + "attr owner element is root - $n"); + # TEST*$xml + is ($attr->value, q{"barENT"}, + "attr value is OK - $n"); + # TEST*$xml + is ($attr->serializeContent, + '"barENT"', + "serializeContent - $n"); + # TEST*$xml + is ($attr->toString, ' foo=""barENT""', + "toString - $n"); + } + # fixed values are defined + # TEST*$xml + is ($root->getAttribute('fixed'),'foo', ' TODO : Add test name'); + # TEST*$xml + is ($root->getAttributeNS($ns,'ns_fixed'),'ns_foo', ' TODO : Add test name'); + # TEST*$xml + is ($root->getAttribute('a:ns_fixed'),'ns_foo', ' TODO : Add test name'); + + # and attribute nodes are created + { + my $attr = $root->getAttributeNode('fixed'); + # TEST*$xml + is (ref($attr), 'XML::LibXML::Attr', ' TODO : Add test name'); + # TEST*$xml + is ($attr->value,'foo', ' TODO : Add test name'); + # TEST*$xml + is ($attr->toString, ' fixed="foo"', ' TODO : Add test name'); + } + { + my $attr = $root->getAttributeNode('a:ns_fixed'); + # TEST*$xml + is (ref($attr), 'XML::LibXML::Attr', ' TODO : Add test name'); + # TEST*$xml + is ($attr->value,'ns_foo', ' TODO : Add test name'); + } + { + my $attr = $root->getAttributeNodeNS($ns,'ns_fixed'); + # TEST*$xml + is (ref($attr), 'XML::LibXML::Attr', ' TODO : Add test name'); + # TEST*$xml + is ($attr->value,'ns_foo', ' TODO : Add test name'); + # TEST*$xml + is ($attr->toString, ' a:ns_fixed="ns_foo"', ' TODO : Add test name'); + } + + # TEST*$xml + + ok (!defined $root->getAttributeNode('ns_fixed'), ' TODO : Add test name'); + # TEST*$xml + ok (!defined $root->getAttributeNode('name'), ' TODO : Add test name'); + # TEST*$xml + ok (!defined $root->getAttributeNode('baz'), ' TODO : Add test name'); + # TEST*$xml + ok (!defined $root->getAttributeNodeNS($ns,'foo'), ' TODO : Add test name'); + # TEST*$xml + ok (!defined $root->getAttributeNodeNS($ns,'fixed'), ' TODO : Add test name'); + # TEST*$xml + ok (!defined $root->getAttributeNodeNS(undef,'name'), ' TODO : Add test name'); + # TEST*$xml + ok (!defined $root->getAttributeNodeNS(undef,'baz'), ' TODO : Add test name'); + } + } +} diff --git a/t/07dtd.t b/t/07dtd.t new file mode 100644 index 0000000..8f1b89f --- /dev/null +++ b/t/07dtd.t @@ -0,0 +1,333 @@ +# $Id$ + +use strict; +use warnings; + +# Should be 54. +use Test::More tests => 54; + +use lib './t/lib'; +use TestHelpers qw(slurp); + +use XML::LibXML; +use XML::LibXML::Common qw(:libxml); + +my $htmlPublic = "-//W3C//DTD XHTML 1.0 Transitional//EN"; +my $htmlSystem = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"; + +{ + my $doc = XML::LibXML::Document->new; + my $dtd = $doc->createExternalSubset( "html", + $htmlPublic, + $htmlSystem + ); + + # TEST + ok( $dtd->isSameNode( $doc->externalSubset ), ' TODO : Add test name' ); + # TEST + is( $dtd->publicId, $htmlPublic, ' TODO : Add test name' ); + # TEST + is( $dtd->systemId, $htmlSystem, ' TODO : Add test name' ); + # TEST + is( $dtd->getName, 'html', ' TODO : Add test name' ); + +} + +{ + my $doc = XML::LibXML::Document->new; + my $dtd = $doc->createInternalSubset( "html", + $htmlPublic, + $htmlSystem + ); + # TEST + ok( $dtd->isSameNode( $doc->internalSubset ), ' TODO : Add test name' ); + + $doc->setExternalSubset( $dtd ); + # TEST + ok(!defined ($doc->internalSubset), ' TODO : Add test name' ); + # TEST + ok( $dtd->isSameNode( $doc->externalSubset ), ' TODO : Add test name' ); + + # TEST + + is( $dtd->getPublicId, $htmlPublic, ' TODO : Add test name' ); + # TEST + is( $dtd->getSystemId, $htmlSystem, ' TODO : Add test name' ); + + $doc->setInternalSubset( $dtd ); + # TEST + ok(!defined ($doc->externalSubset), ' TODO : Add test name' ); + # TEST + ok( $dtd->isSameNode( $doc->internalSubset ), ' TODO : Add test name' ); + + my $dtd2 = $doc->createDTD( "huhu", + "-//W3C//DTD XHTML 1.0 Transitional//EN", + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" + ); + + $doc->setInternalSubset( $dtd2 ); + # TEST + ok( !defined($dtd->parentNode), ' TODO : Add test name' ); + # TEST + ok( $dtd2->isSameNode( $doc->internalSubset ), ' TODO : Add test name' ); + + + my $dtd3 = $doc->removeInternalSubset; + # TEST + ok( $dtd3->isSameNode($dtd2), ' TODO : Add test name' ); + # TEST + ok( !defined($doc->internalSubset), ' TODO : Add test name' ); + + $doc->setExternalSubset( $dtd2 ); + + $dtd3 = $doc->removeExternalSubset; + # TEST + ok( $dtd3->isSameNode($dtd2), ' TODO : Add test name' ); + # TEST + ok( !defined($doc->externalSubset), ' TODO : Add test name' ); +} + +{ + my $parser = XML::LibXML->new(); + + my $doc = $parser->parse_file( "example/dtd.xml" ); + + # TEST + + ok($doc, ' TODO : Add test name'); + + my $dtd = $doc->internalSubset; + # TEST + is( $dtd->getName, 'doc', ' TODO : Add test name' ); + # TEST + is( $dtd->publicId, undef, ' TODO : Add test name' ); + # TEST + is( $dtd->systemId, undef, ' TODO : Add test name' ); + + my $entity = $doc->createEntityReference( "foo" ); + # TEST + ok($entity, ' TODO : Add test name'); + # TEST + is($entity->nodeType, XML_ENTITY_REF_NODE, ' TODO : Add test name' ); + + # TEST + + ok( $entity->hasChildNodes, ' TODO : Add test name' ); + # TEST + is( $entity->firstChild->nodeType, XML_ENTITY_DECL, ' TODO : Add test name' ); + # TEST + is( $entity->firstChild->nodeValue, " test ", ' TODO : Add test name' ); + + my $edcl = $entity->firstChild; + # TEST + is( $edcl->previousSibling->nodeType, XML_ELEMENT_DECL, ' TODO : Add test name' ); + + { + my $doc2 = XML::LibXML::Document->new; + my $e = $doc2->createElement("foo"); + $doc2->setDocumentElement( $e ); + + my $dtd2 = $doc->internalSubset->cloneNode(1); + # TEST + ok($dtd2, ' TODO : Add test name'); + +# $doc2->setInternalSubset( $dtd2 ); +# warn $doc2->toString; + +# $e->appendChild( $entity ); +# warn $doc2->toString; + } +} + + +{ + my $parser = XML::LibXML->new(); + $parser->validation(1); + $parser->keep_blanks(1); + my $doc=$parser->parse_string(<<'EOF'); + + +]> + + +EOF + + # TEST + ok($doc->validate(), ' TODO : Add test name'); + + # TEST + ok($doc->is_valid(), ' TODO : Add test name'); + +} + +{ + my $parser = XML::LibXML->new(); + $parser->validation(0); + $parser->load_ext_dtd(0); # This should make libxml not try to get the DTD + + my $xml = ' + foo

bar

'; + my $doc = eval { + $parser->parse_string($xml); + }; + + # TEST + ok(!$@, ' TODO : Add test name'); + if ($@) { + warn "Parsing error: $@\n"; + } + + # TEST + ok($doc, ' TODO : Add test name'); +} + +{ + my $bad = 'example/bad.dtd'; + # TEST + ok( -f $bad, ' TODO : Add test name' ); + eval { XML::LibXML::Dtd->new("-//Foo//Test DTD 1.0//EN", 'example/bad.dtd') }; + # TEST + ok ($@, ' TODO : Add test name'); + + undef $@; + my $dtd = slurp($bad); + + # TEST + ok( length($dtd) > 5, ' TODO : Add test name' ); + eval { XML::LibXML::Dtd->parse_string($dtd) }; + # TEST + ok ($@, ' TODO : Add test name'); + + my $xml = "\n"; + + { + my $parser = XML::LibXML->new; + $parser->load_ext_dtd(0); + $parser->validation(0); + my $doc = $parser->parse_string($xml); + # TEST + ok( $doc, ' TODO : Add test name' ); + } + { + my $parser = XML::LibXML->new; + $parser->load_ext_dtd(1); + $parser->validation(0); + undef $@; + eval { $parser->parse_string($xml) }; + # TEST + ok( $@, ' TODO : Add test name' ); + } +} + +{ + # RT #71076: https://rt.cpan.org/Public/Bug/Display.html?id=71076 + + my $parser = XML::LibXML->new(); + my $doc = $parser->parse_string(<<'EOF'); + + +]> + + +EOF + my $dtd = $doc->internalSubset; + + # TEST + ok( !$dtd->hasAttributes, 'hasAttributes' ); + # TEST + is_deeply( [ $dtd->attributes ], [], 'attributes' ); +} + +# Remove DTD nodes + +sub test_remove_dtd { + my ($test_name, $remove_sub) = @_; + + my $parser = XML::LibXML->new; + my $doc = $parser->parse_file('example/dtd.xml'); + my $dtd = $doc->internalSubset; + + $remove_sub->($doc, $dtd); + + # TEST*3 + ok( !$doc->internalSubset, "remove DTD via $test_name" ); +} + +test_remove_dtd( "unbindNode", sub { + my ($doc, $dtd) = @_; + $dtd->unbindNode; +} ); +test_remove_dtd( "removeChild", sub { + my ($doc, $dtd) = @_; + $doc->removeChild($dtd); +} ); +test_remove_dtd( "removeChildNodes", sub { + my ($doc, $dtd) = @_; + $doc->removeChildNodes; +} ); + +# Insert DTD nodes + +sub test_insert_dtd { + my ($test_name, $insert_sub) = @_; + + my $parser = XML::LibXML->new; + my $src_doc = $parser->parse_file('example/dtd.xml'); + my $dtd = $src_doc->internalSubset; + my $doc = $parser->parse_file('example/dtd.xml'); + + $insert_sub->($doc, $dtd); + + # TEST*11 + ok( $doc->internalSubset->isSameNode($dtd), "insert DTD via $test_name" ); +} + +test_insert_dtd( "insertBefore internalSubset", sub { + my ($doc, $dtd) = @_; + $doc->insertBefore($dtd, $doc->internalSubset); +} ); +test_insert_dtd( "insertBefore documentElement", sub { + my ($doc, $dtd) = @_; + $doc->insertBefore($dtd, $doc->documentElement); +} ); +test_insert_dtd( "insertAfter internalSubset", sub { + my ($doc, $dtd) = @_; + $doc->insertAfter($dtd, $doc->internalSubset); +} ); +test_insert_dtd( "insertAfter documentElement", sub { + my ($doc, $dtd) = @_; + $doc->insertAfter($dtd, $doc->documentElement); +} ); +test_insert_dtd( "replaceChild internalSubset", sub { + my ($doc, $dtd) = @_; + $doc->replaceChild($dtd, $doc->internalSubset); +} ); +test_insert_dtd( "replaceChild documentElement", sub { + my ($doc, $dtd) = @_; + $doc->replaceChild($dtd, $doc->documentElement); +} ); +test_insert_dtd( "replaceNode internalSubset", sub { + my ($doc, $dtd) = @_; + $doc->internalSubset->replaceNode($dtd); +} ); +test_insert_dtd( "replaceNode documentElement", sub { + my ($doc, $dtd) = @_; + $doc->documentElement->replaceNode($dtd); +} ); +test_insert_dtd( "appendChild", sub { + my ($doc, $dtd) = @_; + $doc->appendChild($dtd); +} ); +test_insert_dtd( "addSibling internalSubset", sub { + my ($doc, $dtd) = @_; + $doc->internalSubset->addSibling($dtd); +} ); +test_insert_dtd( "addSibling documentElement", sub { + my ($doc, $dtd) = @_; + $doc->documentElement->addSibling($dtd); +} ); + diff --git a/t/08findnodes.t b/t/08findnodes.t new file mode 100644 index 0000000..016c85a --- /dev/null +++ b/t/08findnodes.t @@ -0,0 +1,249 @@ +use strict; +use warnings; + +# Should be 45. +use Test::More tests => 45; + +use XML::LibXML; + +# to test if findnodes works. +# i added findnodes to the node class, so a query can be started +# everywhere. + +my $file = "example/dromeds.xml"; + +# init the file parser +my $parser = XML::LibXML->new(); +my $dom = $parser->parse_file( $file ); + +if ( defined $dom ) { + # get the root document + my $elem = $dom->getDocumentElement(); + + # first very simple path starting at root + my @list = $elem->findnodes( "species" ); + # TEST + is( scalar(@list), 3, ' TODO : Add test name' ); + + # a simple query starting somewhere ... + my $node = $list[0]; + my @slist = $node->findnodes( "humps" ); + # TEST + is( scalar(@slist), 1, ' TODO : Add test name' ); + + # find a single node + @list = $elem->findnodes( "species[\@name='Llama']" ); + # TEST + is( scalar( @list ), 1, ' TODO : Add test name' ); + + # find with not conditions + @list = $elem->findnodes( "species[\@name!='Llama']/disposition" ); + # TEST + is( scalar(@list), 2, ' TODO : Add test name' ); + + + @list = $elem->findnodes( 'species/@name' ); + # warn $elem->toString(); + + # TEST + + ok( scalar @list && $list[0]->toString() eq ' name="Camel"', ' TODO : Add test name' ); + + my $x = XML::LibXML::Text->new( 1234 ); + if( defined $x ) { + # TEST + is( $x->getData(), "1234", ' TODO : Add test name' ); + } + + my $telem = $dom->createElement('test'); + $telem->appendWellBalancedChunk('c'); + + finddoc($dom); + # TEST + ok(1, ' TODO : Add test name'); +} +# TEST + +ok( $dom, ' TODO : Add test name' ); + +# test to make sure that multiple array findnodes() returns +# don't segfault perl; it'll happen after the second one if it does +for (0..3) { + my $doc = XML::LibXML->new->parse_string( +' + +
'); + my @nds = $doc->findnodes("processing-instruction('xsl-stylesheet')"); +} + +my $doc = $parser->parse_string(<<'EOT'); + + + + + +EOT + +my $root = $doc->getDocumentElement; +my @a = $root->findnodes('//a:foo'); +# TEST + +is(@a, 1, ' TODO : Add test name'); + +my @b = $root->findnodes('//b:bar'); +# TEST + +is(@b, 1, ' TODO : Add test name'); + +my @none = $root->findnodes('//b:foo'); +@none = (@none, $root->findnodes('//foo')); +# TEST + +is(@none, 0, ' TODO : Add test name'); + +my @doc = $root->findnodes('document("example/test.xml")'); +# TEST + +ok(@doc, ' TODO : Add test name'); +# warn($doc[0]->toString); + +# this query should result an empty array! +my @nodes = $root->findnodes( "/humpty/dumpty" ); +# TEST + +is( scalar(@nodes), 0, ' TODO : Add test name' ); + + +my $docstring = q{ + +}; + $doc = $parser->parse_string( $docstring ); + $root = $doc->documentElement; + +my @ns = $root->findnodes('namespace::*'); +# TEST + +is(scalar(@ns), 2, ' TODO : Add test name' ); + +# bad xpaths +# TEST:$badxpath=4; +my @badxpath = ( + 'abc:::def', + 'foo///bar', + '...', + '/-', + ); + +foreach my $xp ( @badxpath ) { + my $res; + eval { $res = $root->findnodes( $xp ); }; + # TEST*$badxpath + ok($@, ' TODO : Add test name'); + eval { $res = $root->find( $xp ); }; + # TEST*$badxpath + ok($@, ' TODO : Add test name'); + eval { $res = $root->findvalue( $xp ); }; + # TEST*$badxpath + ok($@, ' TODO : Add test name'); + eval { $res = $root->findnodes( encodeToUTF8( "iso-8859-1", $xp ) ); }; + # TEST*$badxpath + ok($@, ' TODO : Add test name'); + eval { $res = $root->find( encodeToUTF8( "iso-8859-1", $xp ) );}; + # TEST*$badxpath + ok($@, ' TODO : Add test name'); +} + + +{ + # as reported by jian lou: + # 1. getElementByTagName("myTag") is not working is + # "myTag" is a node directly under root. Same problem + # for findNodes("//myTag") + # 2. When I add new nodes into DOM tree by + # appendChild(). Then try to find them by + # getElementByTagName("newNodeTag"), the newly created + # nodes are not returned. ... + # + # this seems not to be a problem by XML::LibXML itself, but newer versions + # of libxml2 (newer is 2.4.27 or later) + # + my $doc = XML::LibXML->createDocument(); + my $root= $doc->createElement( "A" ); + $doc->setDocumentElement($root); + + my $b= $doc->createElement( "B" ); + $root->appendChild( $b ); + + my @list = $doc->findnodes( '//A' ); + # TEST + ok( scalar @list, ' TODO : Add test name' ); + # TEST + ok( $list[0]->isSameNode( $root ), ' TODO : Add test name' ); + + @list = $doc->findnodes( '//B' ); + # TEST + ok( scalar @list, ' TODO : Add test name' ); + # TEST + ok( $list[0]->isSameNode( $b ), ' TODO : Add test name' ); + + + # @list = $doc->getElementsByTagName( "A" ); + # ok( scalar @list ); + # ok( $list[0]->isSameNode( $root ) ); + + @list = $root->getElementsByTagName( 'B' ); + # TEST + ok( scalar @list, ' TODO : Add test name' ); + # TEST + ok( $list[0]->isSameNode( $b ), ' TODO : Add test name' ); +} + +{ + # test potential unbinding-segfault-problem + my $doc = XML::LibXML->createDocument(); + my $root= $doc->createElement( "A" ); + $doc->setDocumentElement($root); + + my $b= $doc->createElement( "B" ); + $root->appendChild( $b ); + my $c= $doc->createElement( "C" ); + $b->appendChild( $c ); + $b= $doc->createElement( "B" ); + $root->appendChild( $b ); + $c= $doc->createElement( "C" ); + $b->appendChild( $c ); + + my @list = $root->findnodes( "B" ); + # TEST + is( scalar(@list) , 2, ' TODO : Add test name' ); + foreach my $node ( @list ) { + my @subnodes = $node->findnodes( "C" ); + $node->unbindNode() if ( scalar( @subnodes ) ); + # TEST*2 + ok(1, ' TODO : Add test name'); + } +} + +{ + # findnode remove problem + + my $xmlstr = "12"; + + my $doc = $parser->parse_string( $xmlstr ); + my $root = $doc->documentElement; + my ( $lastc ) = $root->findnodes( 'b/c[last()]' ); + # TEST + ok( $lastc, ' TODO : Add test name' ); + + $root->removeChild( $lastc ); + # TEST + is( $root->toString(), $xmlstr, ' TODO : Add test name' ); +} + +# --------------------------------------------------------------------------- # +sub finddoc { + my $doc = shift; + return unless defined $doc; + my $rn = $doc->documentElement; + $rn->findnodes("/"); +} diff --git a/t/09xpath.t b/t/09xpath.t new file mode 100644 index 0000000..f4193d1 --- /dev/null +++ b/t/09xpath.t @@ -0,0 +1,254 @@ +use strict; +use warnings; + +# Should be 53 +use Test::More tests => 54; + +use XML::LibXML; + +my $xmlstring = < + + test 1 + + + test 2 + +
+EOSTR + +{ + my $parser = XML::LibXML->new(); + + my $doc = $parser->parse_string( $xmlstring ); + + # TEST + ok($doc, 'Parsing successful.'); + + { + my @nodes = $doc->findnodes( "/foo/bar" ); + # TEST + is ( scalar( @nodes ), 2, 'Two bar nodes' ); + + # TEST + ok( $doc->isSameNode($nodes[0]->ownerDocument), + 'Doc is the same as the owner document.' ); + + my $compiled = XML::LibXML::XPathExpression->new("/foo/bar"); + foreach my $idx (1..3) { + @nodes = $doc->findnodes( $compiled ); + # TEST*3 + is( scalar( @nodes ), 2, "Two nodes for /foo/bar - try No. $idx" ); + } + + # TEST + ok( $doc->isSameNode($nodes[0]->ownerDocument), + 'Same owner as previous one', + ); + + my $n = $doc->createElement( "foobar" ); + + my $p = $nodes[1]->parentNode; + $p->insertBefore( $n, $nodes[1] ); + + # TEST + + ok( $p->isSameNode( $doc->documentElement ), 'Same as document elem' ); + @nodes = $p->childNodes; + # TEST + is( scalar( @nodes ), 6, 'Found child nodes' ); + } + + { + my $result = $doc->find( "/foo/bar" ); + # TEST + ok( $result, ' TODO : Add test name' ); + # TEST + ok( $result->isa( "XML::LibXML::NodeList" ), ' TODO : Add test name' ); + # TEST + is( $result->size, 2, ' TODO : Add test name' ); + + # TEST + + ok( $doc->isSameNode($$result[0]->ownerDocument), ' TODO : Add test name' ); + + $result = $doc->find( XML::LibXML::XPathExpression->new("/foo/bar") ); + # TEST + ok( $result, ' TODO : Add test name' ); + # TEST + ok( $result->isa( "XML::LibXML::NodeList" ), ' TODO : Add test name' ); + # TEST + is( $result->size, 2, ' TODO : Add test name' ); + + # TEST + + ok( $doc->isSameNode($$result[0]->ownerDocument), ' TODO : Add test name' ); + + $result = $doc->find( "string(/foo/bar)" ); + # TEST + ok( $result, ' TODO : Add test name' ); + # TEST + ok( $result->isa( "XML::LibXML::Literal" ), ' TODO : Add test name' ); + # TEST + ok( $result->string_value =~ /test 1/, ' TODO : Add test name' ); + + $result = $doc->find( "string(/foo/bar)" ); + # TEST + ok( $result, ' TODO : Add test name' ); + # TEST + ok( $result->isa( "XML::LibXML::Literal" ), ' TODO : Add test name' ); + # TEST + ok( $result->string_value =~ /test 1/, ' TODO : Add test name' ); + + $result = $doc->find( XML::LibXML::XPathExpression->new("count(/foo/bar)") ); + # TEST + ok( $result, ' TODO : Add test name' ); + # TEST + ok( $result->isa( "XML::LibXML::Number" ), ' TODO : Add test name' ); + # TEST + is( $result->value, 2, ' TODO : Add test name' ); + + $result = $doc->find( "contains(/foo/bar[1], 'test 1')" ); + # TEST + ok( $result, ' TODO : Add test name' ); + # TEST + ok( $result->isa( "XML::LibXML::Boolean" ), ' TODO : Add test name' ); + # TEST + is( $result->string_value, "true", ' TODO : Add test name' ); + + $result = $doc->find( XML::LibXML::XPathExpression->new("contains(/foo/bar[1], 'test 1')") ); + # TEST + ok( $result, ' TODO : Add test name' ); + # TEST + ok( $result->isa( "XML::LibXML::Boolean" ), ' TODO : Add test name' ); + # TEST + is( $result->string_value, "true", ' TODO : Add test name' ); + + $result = $doc->find( "contains(/foo/bar[3], 'test 1')" ); + # TEST + ok( $result == 0, ' TODO : Add test name' ); + + # TEST + + ok( $doc->exists("/foo/bar[2]"), ' TODO : Add test name' ); + # TEST + is( $doc->exists("/foo/bar[3]"), 0, ' TODO : Add test name' ); + # TEST + is( $doc->exists("-7.2"),1, ' TODO : Add test name' ); + # TEST + is( $doc->exists("0"),0, ' TODO : Add test name' ); + # TEST + is( $doc->exists("'foo'"),1, ' TODO : Add test name' ); + # TEST + is( $doc->exists("''"),0, ' TODO : Add test name' ); + # TEST + is( $doc->exists("'0'"),1, ' TODO : Add test name' ); + + my ($node) = $doc->findnodes("/foo/bar[1]" ); + # TEST + ok( $node, ' TODO : Add test name' ); + # TEST + ok ($node->exists("following-sibling::bar"), ' TODO : Add test name'); + } + + { + # test the strange segfault after xpathing + my $root = $doc->documentElement(); + foreach my $bar ( $root->findnodes( 'bar' ) ) { + $root->removeChild($bar); + } + # TEST + ok(1, ' TODO : Add test name'); + # warn $root->toString(); + + $doc = $parser->parse_string( $xmlstring ); + my @bars = $doc->findnodes( '//bar' ); + + foreach my $node ( @bars ) { + $node->parentNode()->removeChild( $node ); + } + # TEST + ok(1, ' TODO : Add test name'); + } +} + +{ + # from #39178 + my $p = XML::LibXML->new; + my $doc = $p->parse_file("example/utf-16-2.xml"); + # TEST + ok($doc, ' TODO : Add test name'); + my @nodes = $doc->findnodes("/cml/*"); + # TEST + ok (@nodes == 2, ' TODO : Add test name'); + # TEST + is ($nodes[1]->textContent, "utf-16 test with umlauts: \x{e4}\x{f6}\x{fc}\x{c4}\x{d6}\x{dc}\x{df}", ' TODO : Add test name'); +} + +{ + # from #36576 + my $p = XML::LibXML->new; + my $doc = $p->parse_html_file("example/utf-16-1.html"); + # TEST + ok($doc, ' TODO : Add test name'); + use utf8; + my @nodes = $doc->findnodes("//p"); + # TEST + ok (@nodes == 1, ' TODO : Add test name'); + + # TEST + _utf16_content_test(\@nodes, 'nodes content is fine.'); +} + +{ + # from #36576 + my $p = XML::LibXML->new; + my $doc = $p->parse_html_file("example/utf-16-2.html"); + # TEST + ok($doc, ' TODO : Add test name'); + my @nodes = $doc->findnodes("//p"); + # TEST + is (scalar(@nodes), 1, 'Found one p'); + # TEST + _utf16_content_test(\@nodes, 'p content is fine.'); +} + +{ + # from #69096 + my $doc = XML::LibXML::Document->createDocument('1.0', 'utf-8'); + my $root = $doc->createElement('root'); + $doc->setDocumentElement($root); + my $e = $doc->createElement("child"); + my $e2 = $doc->createElement("child"); + my $t1 = $doc->createTextNode( "te" ); + my $t2 = $doc->createTextNode( "st" ); + $root->appendChild($e); + $root->appendChild($e2); + $e2->appendChild($t1); + $e2->appendChild($t2); + + $doc->normalize(); + my @cn = $doc->findnodes('//child[text()="test"]'); + # TEST + is( scalar( @cn ), 1, 'xpath testing adjacent text nodes' ); +} + +sub _utf16_content_test +{ + local $Test::Builder::Level = $Test::Builder::Level + 1; + + my ($nodes_ref, $blurb) = @_; + + SKIP: + { + if (XML::LibXML::LIBXML_RUNTIME_VERSION() < 20700) + { + skip "UTF-16 and HTML broken in libxml2 < 2.7", 1; + } + + is ($nodes_ref->[0]->textContent, + "utf-16 test with umlauts: \x{e4}\x{f6}\x{fc}\x{c4}\x{d6}\x{dc}\x{df}", + $blurb, + ); + } +} diff --git a/t/10ns.t b/t/10ns.t new file mode 100644 index 0000000..b6f43d7 --- /dev/null +++ b/t/10ns.t @@ -0,0 +1,584 @@ +# -*- cperl -*- + +use strict; +use warnings; + +# Should be 137. +use Test::More tests => 137; + +use XML::LibXML; +use XML::LibXML::Common qw(:libxml); + +my $parser = XML::LibXML->new(); + +my $xml1 = < +EOX + +my $xml2 = < +EOX + +my $xml3 = < + + + + + + + + + + + + +EOX + +print "# 1. single namespace \n"; + +{ + my $doc1 = $parser->parse_string( $xml1 ); + my $elem = $doc1->documentElement; + # TEST + is($elem->lookupNamespaceURI( "b" ), "http://whatever", ' TODO : Add test name' ); + my @cn = $elem->childNodes; + # TEST + is($cn[0]->lookupNamespaceURI( "b" ), "http://whatever", ' TODO : Add test name' ); + # TEST + is($cn[1]->namespaceURI, "http://whatever", ' TODO : Add test name' ); +} + +print "# 2. multiple namespaces \n"; + +{ + my $doc2 = $parser->parse_string( $xml2 ); + + my $elem = $doc2->documentElement; + # TEST + is($elem->lookupNamespaceURI( "b" ), "http://whatever", ' TODO : Add test name'); + # TEST + is($elem->lookupNamespaceURI( "c" ), "http://kungfoo", ' TODO : Add test name'); + my @cn = $elem->childNodes; + + # TEST + + is($cn[0]->lookupNamespaceURI( "b" ), "http://whatever", ' TODO : Add test name' ); + # TEST + is($cn[0]->lookupNamespaceURI( "c" ), "http://kungfoo", ' TODO : Add test name'); + + # TEST + + is($cn[1]->namespaceURI, "http://whatever", ' TODO : Add test name' ); + # TEST + is($cn[2]->namespaceURI, "http://kungfoo", ' TODO : Add test name' ); +} + +print "# 3. nested names \n"; + +{ + my $doc3 = $parser->parse_string( $xml3 ); + my $elem = $doc3->documentElement; + my @cn = $elem->childNodes; + my @xs = grep { $_->nodeType == XML_ELEMENT_NODE } @cn; + + my @x1 = $xs[1]->childNodes; my @x2 = $xs[2]->childNodes; + + # TEST + + is( $x1[1]->namespaceURI , "http://kungfoo", ' TODO : Add test name' ); + # TEST + is( $x2[1]->namespaceURI , "http://foobar", ' TODO : Add test name' ); + + # namespace scopeing + # TEST + ok( !defined($elem->lookupNamespacePrefix( "http://kungfoo" )), ' TODO : Add test name' ); + # TEST + ok( !defined($elem->lookupNamespacePrefix( "http://foobar" )), ' TODO : Add test name' ); +} + +print "# 4. post creation namespace setting\n"; +{ + my $e1 = XML::LibXML::Element->new("foo"); + my $e2 = XML::LibXML::Element->new("bar:foo"); + my $e3 = XML::LibXML::Element->new("foo"); + $e3->setAttribute( "kung", "foo" ); + my $a = $e3->getAttributeNode("kung"); + + $e1->appendChild($e2); + $e2->appendChild($e3); + # TEST + ok( $e2->setNamespace("http://kungfoo", "bar"), ' TODO : Add test name' ); + # TEST + ok( $a->setNamespace("http://kungfoo", "bar"), ' TODO : Add test name' ); + # TEST + is( $a->nodeName, "bar:kung", ' TODO : Add test name' ); +} + +print "# 5. importing namespaces\n"; + +{ + + my $doca = XML::LibXML->createDocument; + my $docb = XML::LibXML->new()->parse_string( < +EOX + + my $b = $docb->documentElement->firstChild; + + my $c = $doca->importNode( $b ); + + my @attra = $c->attributes; + # TEST + is( scalar(@attra), 1, ' TODO : Add test name' ); + # TEST + is( $attra[0]->nodeType, 18, ' TODO : Add test name' ); + my $d = $doca->adoptNode($b); + + # TEST + + ok( $d->isSameNode( $b ), ' TODO : Add test name' ); + my @attrb = $d->attributes; + # TEST + is( scalar(@attrb), 1, ' TODO : Add test name' ); + # TEST + is( $attrb[0]->nodeType, 18, ' TODO : Add test name' ); +} + +print "# 6. lossless setting of namespaces with setAttribute\n"; +# reported by Kurt George Gjerde +{ + my $doc = XML::LibXML->createDocument; + my $root = $doc->createElementNS('http://example.com', 'document'); + $root->setAttribute('xmlns:xxx', 'http://example.com'); + $root->setAttribute('xmlns:yyy', 'http://yonder.com'); + $doc->setDocumentElement( $root ); + + my $strnode = $root->toString(); + # TEST + ok ( $strnode =~ /xmlns:xxx/ and $strnode =~ /xmlns=/, ' TODO : Add test name' ); +} + +print "# 7. namespaced attributes\n"; +{ + my $doc = XML::LibXML->new->parse_string(<<'EOF'); + +EOF + my $root = $doc->getDocumentElement(); + # namespaced attributes + $root->setAttribute('xxx:attr', 'value'); + # TEST + ok ( $root->getAttributeNode('xxx:attr'), ' TODO : Add test name' ); + # TEST + is ( $root->getAttribute('xxx:attr'), 'value', ' TODO : Add test name' ); + print $root->toString(1),"\n"; + # TEST + ok ( $root->getAttributeNodeNS('http://example.com','attr'), ' TODO : Add test name' ); + # TEST + is ( $root->getAttributeNS('http://example.com','attr'), 'value', ' TODO : Add test name' ); + # TEST + is ( $root->getAttributeNode('xxx:attr')->getNamespaceURI(), 'http://example.com', ' TODO : Add test name'); + + #change encoding to UTF-8 and retest + $doc->setEncoding('UTF-8'); + # namespaced attributes + $root->setAttribute('xxx:attr', 'value'); + # TEST + ok ( $root->getAttributeNode('xxx:attr'), ' TODO : Add test name' ); + # TEST + is ( $root->getAttribute('xxx:attr'), 'value', ' TODO : Add test name' ); + print $root->toString(1),"\n"; + # TEST + ok ( $root->getAttributeNodeNS('http://example.com','attr'), ' TODO : Add test name' ); + # TEST + is ( $root->getAttributeNS('http://example.com','attr'), 'value', ' TODO : Add test name' ); + # TEST + is ( $root->getAttributeNode('xxx:attr')->getNamespaceURI(), + 'http://example.com', ' TODO : Add test name'); +} + +print "# 8. changing namespace declarations\n"; +{ + my $xmlns = 'http://www.w3.org/2000/xmlns/'; + + my $doc = XML::LibXML->createDocument; + my $root = $doc->createElementNS('http://example.com', 'document'); + $root->setAttributeNS($xmlns, 'xmlns:xxx', 'http://example.com'); + $root->setAttribute('xmlns:yyy', 'http://yonder.com'); + $doc->setDocumentElement( $root ); + + # can we get the namespaces ? + # TEST + is ( $root->getAttribute('xmlns:xxx'), 'http://example.com', ' TODO : Add test name'); + # TEST + is ( $root->getAttributeNS($xmlns,'xmlns'), 'http://example.com', ' TODO : Add test name' ); + # TEST + is ( $root->getAttribute('xmlns:yyy'), 'http://yonder.com', ' TODO : Add test name'); + # TEST + is ( $root->lookupNamespacePrefix('http://yonder.com'), 'yyy', ' TODO : Add test name'); + # TEST + is ( $root->lookupNamespaceURI('yyy'), 'http://yonder.com', ' TODO : Add test name'); + + # can we change the namespaces ? + # TEST + ok ( $root->setAttribute('xmlns:yyy', 'http://newyonder.com'), ' TODO : Add test name' ); + # TEST + is ( $root->getAttribute('xmlns:yyy'), 'http://newyonder.com', ' TODO : Add test name'); + # TEST + is ( $root->lookupNamespacePrefix('http://newyonder.com'), 'yyy', ' TODO : Add test name'); + # TEST + is ( $root->lookupNamespaceURI('yyy'), 'http://newyonder.com', ' TODO : Add test name'); + + # can we change the default namespace ? + $root->setAttribute('xmlns', 'http://other.com' ); + # TEST + is ( $root->getAttribute('xmlns'), 'http://other.com', ' TODO : Add test name' ); + # TEST + is ( $root->lookupNamespacePrefix('http://other.com'), "", ' TODO : Add test name' ); + # TEST + is ( $root->lookupNamespaceURI(''), 'http://other.com', ' TODO : Add test name' ); + + # non-existent namespaces + # TEST + is ( $root->lookupNamespaceURI('foo'), undef, ' TODO : Add test name' ); + # TEST + is ( $root->lookupNamespacePrefix('foo'), undef, ' TODO : Add test name' ); + # TEST + is ( $root->getAttribute('xmlns:foo'), undef, ' TODO : Add test name' ); + + # changing namespace declaration URI and prefix + # TEST + ok ( $root->setNamespaceDeclURI('yyy', 'http://changed.com'), ' TODO : Add test name' ); + # TEST + is ( $root->getAttribute('xmlns:yyy'), 'http://changed.com', ' TODO : Add test name'); + # TEST + is ( $root->lookupNamespaceURI('yyy'), 'http://changed.com', ' TODO : Add test name'); + eval { $root->setNamespaceDeclPrefix('yyy','xxx'); }; + # TEST + ok ( $@, ' TODO : Add test name' ); # prefix occupied + eval { $root->setNamespaceDeclPrefix('yyy',''); }; + # TEST + ok ( $@, ' TODO : Add test name' ); # prefix occupied + # TEST + ok ( $root->setNamespaceDeclPrefix('yyy', 'zzz'), ' TODO : Add test name' ); + # TEST + is ( $root->lookupNamespaceURI('yyy'), undef, ' TODO : Add test name' ); + # TEST + is ( $root->lookupNamespaceURI('zzz'), 'http://changed.com', ' TODO : Add test name' ); + # TEST + ok ( $root->setNamespaceDeclURI('zzz',undef ), ' TODO : Add test name' ); + # TEST + is ( $root->lookupNamespaceURI('zzz'), undef, ' TODO : Add test name' ); + + my $strnode = $root->toString(); + # TEST + ok ( $strnode !~ /xmlns:zzz/, ' TODO : Add test name' ); + + # changing the default namespace declaration + # TEST + ok ( $root->setNamespaceDeclURI('','http://test'), ' TODO : Add test name' ); + # TEST + is ( $root->lookupNamespaceURI(''), 'http://test', ' TODO : Add test name' ); + # TEST + is ( $root->getNamespaceURI(), 'http://test', ' TODO : Add test name' ); + + # changing prefix of the default ns declaration + # TEST + ok ( $root->setNamespaceDeclPrefix('','foo'), ' TODO : Add test name' ); + # TEST + is ( $root->lookupNamespaceURI(''), undef, ' TODO : Add test name' ); + # TEST + is ( $root->lookupNamespaceURI('foo'), 'http://test', ' TODO : Add test name' ); + # TEST + is ( $root->getNamespaceURI(), 'http://test', ' TODO : Add test name' ); + # TEST + is ( $root->prefix(), 'foo', ' TODO : Add test name' ); + + # turning a ns declaration to a default ns declaration + # TEST + ok ( $root->setNamespaceDeclPrefix('foo',''), ' TODO : Add test name' ); + # TEST + is ( $root->lookupNamespaceURI('foo'), undef, ' TODO : Add test name' ); + # TEST + is ( $root->lookupNamespaceURI(''), 'http://test', ' TODO : Add test name' ); + # TEST + is ( $root->lookupNamespaceURI(undef), 'http://test', ' TODO : Add test name' ); + # TEST + is ( $root->getNamespaceURI(), 'http://test', ' TODO : Add test name' ); + # TEST + is ( $root->prefix(), undef, ' TODO : Add test name' ); + + # removing the default ns declaration + # TEST + ok ( $root->setNamespaceDeclURI('',undef), ' TODO : Add test name' ); + # TEST + is ( $root->lookupNamespaceURI(''), undef, ' TODO : Add test name' ); + # TEST + is ( $root->getNamespaceURI(), undef, ' TODO : Add test name' ); + + $strnode = $root->toString(); + # TEST + ok ( $strnode !~ /xmlns=/, ' TODO : Add test name' ); + + # namespaced attributes + $root->setAttribute('xxx:attr', 'value'); + # TEST + ok ( $root->getAttributeNode('xxx:attr'), ' TODO : Add test name' ); + # TEST + is ( $root->getAttribute('xxx:attr'), 'value', ' TODO : Add test name' ); + print $root->toString(1),"\n"; + # TEST + ok ( $root->getAttributeNodeNS('http://example.com','attr'), ' TODO : Add test name' ); + # TEST + is ( $root->getAttributeNS('http://example.com','attr'), 'value', ' TODO : Add test name' ); + # TEST + is ( $root->getAttributeNode('xxx:attr')->getNamespaceURI(), 'http://example.com', ' TODO : Add test name'); + + # removing other xmlns declarations + $root->addNewChild('http://example.com', 'xxx:foo'); + # TEST + ok( $root->setNamespaceDeclURI('xxx',undef), ' TODO : Add test name' ); + # TEST + is ( $root->lookupNamespaceURI('xxx'), undef, ' TODO : Add test name' ); + # TEST + is ( $root->getNamespaceURI(), undef, ' TODO : Add test name' ); + # TEST + is ( $root->firstChild->getNamespaceURI(), undef, ' TODO : Add test name' ); + # TEST + is ( $root->prefix(), undef, ' TODO : Add test name' ); + # TEST + is ( $root->firstChild->prefix(), undef, ' TODO : Add test name' ); + + + print $root->toString(1),"\n"; + # check namespaced attributes + # TEST + is ( $root->getAttributeNode('xxx:attr'), undef, ' TODO : Add test name' ); + # TEST + is ( $root->getAttributeNodeNS('http://example.com', 'attr'), undef, ' TODO : Add test name' ); + # TEST + ok ( $root->getAttributeNode('attr'), ' TODO : Add test name' ); + # TEST + is ( $root->getAttribute('attr'), 'value', ' TODO : Add test name' ); + # TEST + ok ( $root->getAttributeNodeNS(undef,'attr'), ' TODO : Add test name' ); + # TEST + is ( $root->getAttributeNS(undef,'attr'), 'value', ' TODO : Add test name' ); + # TEST + is ( $root->getAttributeNode('attr')->getNamespaceURI(), undef, ' TODO : Add test name'); + + + $strnode = $root->toString(); + # TEST + ok ( $strnode !~ /xmlns=/, ' TODO : Add test name' ); + # TEST + ok ( $strnode !~ /xmlns:xxx=/, ' TODO : Add test name' ); + # TEST + ok ( $strnode =~ /setNamespaceDeclPrefix('xxx',undef), ' TODO : Add test name' ); + + # TEST + + is ( $doc->findnodes('/document/foo')->size(), 1, ' TODO : Add test name' ); + # TEST + is ( $doc->findnodes('/document[foo]')->size(), 1, ' TODO : Add test name' ); + # TEST + is ( $doc->findnodes('/document[*]')->size(), 1, ' TODO : Add test name' ); + # TEST + is ( $doc->findnodes('/document[@attr and foo]')->size(), 1, ' TODO : Add test name' ); + # TEST + is ( $doc->findvalue('/document/@attr'), 'value', ' TODO : Add test name' ); + + my $xp = XML::LibXML::XPathContext->new($doc); + # TEST + is ( $xp->findnodes('/document/foo')->size(), 1, ' TODO : Add test name' ); + # TEST + is ( $xp->findnodes('/document[foo]')->size(), 1, ' TODO : Add test name' ); + # TEST + is ( $xp->findnodes('/document[*]')->size(), 1, ' TODO : Add test name' ); + + # TEST + + is ( $xp->findnodes('/document[@attr and foo]')->size(), 1, ' TODO : Add test name' ); + # TEST + is ( $xp->findvalue('/document/@attr'), 'value', ' TODO : Add test name' ); + + # TEST + + is ( $root->firstChild->prefix(), undef, ' TODO : Add test name' ); +} + +print "# 9. namespace reconciliation\n"; +{ + my $doc = XML::LibXML->createDocument( 'http://default', 'root' ); + my $root = $doc->documentElement; + $root->setNamespace( 'http://children', 'child', 0 ); + + $root->appendChild( my $n = $doc->createElementNS( 'http://default', 'branch' )); + # appending an element in the same namespace will + # strip its declaration + # TEST + ok( !defined($n->getAttribute( 'xmlns' )), ' TODO : Add test name' ); + + $n->appendChild( my $a = $doc->createElementNS( 'http://children', 'child:a' )); + $n->appendChild( my $b = $doc->createElementNS( 'http://children', 'child:b' )); + + $n->appendChild( my $c = $doc->createElementNS( 'http://children', 'child:c' )); + # appending $c strips the declaration + # TEST + ok( !defined($c->getAttribute('xmlns:child')), ' TODO : Add test name' ); + + # add another prefix for children + $c->setAttribute( 'xmlns:foo', 'http://children' ); + # TEST + is( $c->getAttribute( 'xmlns:foo' ), 'http://children', ' TODO : Add test name' ); + + $n->appendChild( my $d = $doc->createElementNS( 'http://other', 'branch' )); + # appending an element with a new default namespace + # will leave it declared + # TEST + is( $d->getAttribute( 'xmlns' ), 'http://other', ' TODO : Add test name' ); + + my $doca = XML::LibXML->createDocument( 'http://default/', 'root' ); + $doca->adoptNode( $a ); + $doca->adoptNode( $b ); + $doca->documentElement->appendChild( $a ); + $doca->documentElement->appendChild( $b ); + + # Because the child namespace isn't defined in $doca + # it should get declared on both child nodes $a and $b + # TEST + is( $a->getAttribute( 'xmlns:child' ), 'http://children', ' TODO : Add test name' ); + # TEST + is( $b->getAttribute( 'xmlns:child' ), 'http://children', ' TODO : Add test name' ); + + $doca = XML::LibXML->createDocument( 'http://children', 'child:root' ); + $doca->adoptNode( $a ); + $doca->documentElement->appendChild( $a ); + + # $doca declares the child namespace, so the declaration + # should now get stripped from $a + # TEST + ok( !defined($a->getAttribute( 'xmlns:child' )), ' TODO : Add test name' ); + + $doca->documentElement->removeChild( $a ); + + # $a should now have its namespace re-declared + # TEST + is( $a->getAttribute( 'xmlns:child' ), 'http://children', ' TODO : Add test name' ); + + $doca->documentElement->appendChild( $a ); + + # $doca declares the child namespace, so the declaration + # should now get stripped from $a + # TEST + ok( !defined($a->getAttribute( 'xmlns:child' )), ' TODO : Add test name' ); + + + $doc = XML::LibXML::Document->new; + $n = $doc->createElement( 'didl' ); + $n->setAttribute( "xmlns:xsi"=>"http://www.w3.org/2001/XMLSchema-instance" ); + + $a = $doc->createElement( 'dc' ); + $a->setAttribute( "xmlns:xsi"=>"http://www.w3.org/2001/XMLSchema-instance" ); + $a->setAttribute( "xsi:schemaLocation"=>"http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives +.org/OAI/2.0/oai_dc.xsd" ); + + $n->appendChild( $a ); + + # the declaration for xsi should be stripped + # TEST + ok( !defined($a->getAttribute( 'xmlns:xsi' )), ' TODO : Add test name' ); + + $n->removeChild( $a ); + + # should be a new declaration for xsi in $a + # TEST + is( $a->getAttribute( 'xmlns:xsi' ), 'http://www.w3.org/2001/XMLSchema-instance', ' TODO : Add test name' ); + + $b = $doc->createElement( 'foo' ); + $b->setAttribute( 'xsi:bar', 'bar' ); + $n->appendChild( $b ); + $n->removeChild( $b ); + + # a prefix without a namespace can't be reliably compared, + # so $b doesn't acquire a declaration from $n! + # TEST + ok( !defined($b->getAttribute( 'xmlns:xsi' )), ' TODO : Add test name' ); + + # tests for reconciliation during setAttributeNodeNS + my $attr = $doca->createAttributeNS( + 'http://children', 'child:attr','value' + ); + # TEST + ok($attr, ' TODO : Add test name'); + my $child= $doca->documentElement->firstChild; + # TEST + ok($child, ' TODO : Add test name'); + $child->setAttributeNodeNS($attr); + # TEST + ok ( !defined($child->getAttribute( 'xmlns:child' )), ' TODO : Add test name' ); + + # due to libxml2 limitation, XML::LibXML declares the namespace + # on the root element + $attr = $doca->createAttributeNS('http://other','other:attr','value'); + # TEST + ok($attr, ' TODO : Add test name'); + $child->setAttributeNodeNS($attr); + # + # TEST + ok ( !defined($child->getAttribute( 'xmlns:other' )), ' TODO : Add test name' ); + # TEST + ok ( defined($doca->documentElement->getAttribute( 'xmlns:other' )), ' TODO : Add test name' ); +} + +print "# 10. xml namespace\n"; +{ + my $docOne = XML::LibXML->new->parse_string( + '' + ); + my $docTwo = XML::LibXML->new->parse_string( + '' + ); + + my $inc = $docOne->getElementById('test'); + my $rep = $docTwo->getElementById('foo'); + $inc->parentNode->replaceChild($rep, $inc); + # TEST + is($inc->getAttributeNS('http://www.w3.org/XML/1998/namespace','id'),'test', ' TODO : Add test name'); + # TEST + ok($inc->isSameNode($docOne->getElementById('test')), ' TODO : Add test name'); +} + +print "# 11. empty namespace\n"; +{ + my $doc = XML::LibXML->load_xml(string => $xml1); + my $node = $doc->find('/a/b:c')->[0]; + + # TEST + ok($node->setNamespace(""), 'removing ns from elemenet'); + # TEST + is($node->prefix, undef, 'ns prefix removed from element'); + # TEST + is($node->namespaceURI, undef, 'ns removed from element'); + # TEST + is($node->getName, 'c', 'ns removed from element name'); + + my $attr = $doc->find('/a/x/@b:href')->[0]; + + # TEST + ok($attr->setNamespace("", ""), 'removing ns from attr'); + # TEST + is($attr->prefix, undef, 'ns prefix removed from attr'); + # TEST + is($attr->namespaceURI, undef, 'ns removed from attr'); + # TEST + is($attr->getName, 'href', 'ns removed from attr name'); +} diff --git a/t/11memory.t b/t/11memory.t new file mode 100644 index 0000000..256d925 --- /dev/null +++ b/t/11memory.t @@ -0,0 +1,598 @@ +use strict; +use warnings; + +use lib './t/lib'; +use TestHelpers qw(slurp); + +use Test::More; + +plan skip_all => "These tests are for authors only!" unless $ENV{AUTHOR_TESTING} or $ENV{RELEASE_TESTING}; + +use constant TIMES_THROUGH => $ENV{MEMORY_TIMES} || 100_000; + +if (! (($^O eq 'linux') || ($^O eq 'cygwin')) ) +{ + plan skip_all => 'Only runs on Linux and Cygwin.'; +} +elsif (! $ENV{MEMORY_TEST} ) +{ + plan skip_all => "developers only (set MEMORY_TEST=1 to run these tests)\n"; +} +else +{ + # Should be 25. + plan tests => 25; +} + +use XML::LibXML; +use XML::LibXML::SAX::Builder; +{ + +# require Devel::Peek; + my $peek = 0; + + # TEST + ok(1, 'Start.'); + + # BASELINE + check_mem(1); + + # MAKE DOC IN SUB + { + my $doc = make_doc(); + # TEST + ok($doc, 'Make doc in sub 1.'); + # TEST + ok($doc->toString, 'Make doc in sub 1 - toString().'); + } + check_mem(); + # MAKE DOC IN SUB II + # same test as the first one. if this still leaks, it's + # our problem, otherwise it's perl :/ + { + my $doc = make_doc(); + # TEST + ok($doc, 'Make doc in sub 2 - doc.'); + + # TEST + ok($doc->toString, 'Make doc in sub 2 - toString()'); + } + check_mem(); + + { + my $elem = XML::LibXML::Element->new("foo"); + my $elem2= XML::LibXML::Element->new("bar"); + $elem->appendChild($elem2); + # TEST + ok( $elem->toString, 'appendChild.' ); + } + check_mem(); + + # SET DOCUMENT ELEMENT + { + my $doc2 = XML::LibXML::Document->new(); + make_doc_elem( $doc2 ); + # TEST + ok( $doc2, 'SetDocElem'); + # TEST + ok( $doc2->documentElement, 'SetDocElem documentElement.' ); + } + check_mem(); + + # multiple parsers: + # MULTIPLE PARSERS + XML::LibXML->new(); # first parser + check_mem(1); + + for (1..TIMES_THROUGH) { + my $parser = XML::LibXML->new(); + } + # TEST + ok(1, 'Initialise multiple parsers.'); + + check_mem(); + # multiple parses + for (1..TIMES_THROUGH) { + my $parser = XML::LibXML->new(); + my $dom = $parser->parse_string("foo"); + } + # TEST + ok(1, 'multiple parses'); + + check_mem(); + + # multiple failing parses + # MULTIPLE FAILURES + for (1..TIMES_THROUGH) { + # warn("$_\n") unless $_ % 100; + my $parser = XML::LibXML->new(); + eval { + my $dom = $parser->parse_string("foo"); # Thats meant to be an error, btw! + }; + } + # TEST + ok(1, 'Multiple failures.'); + + check_mem(); + + # building custom docs + my $doc = XML::LibXML::Document->new(); + for (1..TIMES_THROUGH) { + my $elem = $doc->createElement('x'); + + if($peek) { + warn("Doc before elem\n"); + # Devel::Peek::Dump($doc); + warn("Elem alone\n"); + # Devel::Peek::Dump($elem); + } + + $doc->setDocumentElement($elem); + + if ($peek) { + warn("Elem after attaching\n"); + # Devel::Peek::Dump($elem); + warn("Doc after elem\n"); + # Devel::Peek::Dump($doc); + } + } + if ($peek) { + warn("Doc should be freed\n"); + # Devel::Peek::Dump($doc); + } + # TEST + ok(1, 'customDocs'); + check_mem(); + + { + my $doc = XML::LibXML->createDocument; + for (1..TIMES_THROUGH) { + make_doc2( $doc ); + } + } + # TEST + ok(1, 'customDocs No. 2'); + check_mem(); + + # DTD string parsing + + my $dtdstr = slurp('example/test.dtd'); + $dtdstr =~ s/\r//g; + $dtdstr =~ s/[\r\n]*$//; + + # TEST + + ok($dtdstr, '$dtdstr'); + + for ( 1..TIMES_THROUGH ) { + my $dtd = XML::LibXML::Dtd->parse_string($dtdstr); + } + # TEST + ok(1, 'after dtdstr'); + check_mem(); + + # DTD URI parsing + # parse a DTD from a SYSTEM ID + for ( 1..TIMES_THROUGH ) { + my $dtd = XML::LibXML::Dtd->new('ignore', 'example/test.dtd'); + } + # TEST + ok(1, 'DTD URI parsing.'); + check_mem(); + + # Document validation + { + # is_valid() + my $dtd = XML::LibXML::Dtd->parse_string($dtdstr); + my $xml; + eval { + local $SIG{'__WARN__'} = sub { }; + $xml = XML::LibXML->new->parse_file('example/article_bad.xml'); + }; + for ( 1..TIMES_THROUGH ) { + my $good; + eval { + local $SIG{'__WARN__'} = sub { }; + $good = $xml->is_valid($dtd); + }; + } + # TEST + ok(1, 'is_valid()'); + check_mem(); + + print "# validate() \n"; + for ( 1..TIMES_THROUGH ) { + eval { + local $SIG{'__WARN__'} = sub { }; + $xml->validate($dtd); + }; + } + # TEST + ok(1, 'validate()'); + check_mem(); + + } + + print "# FIND NODES \n"; + my $xml=<<'dromeds.xml'; + + + + 1 or 2 + Cranky + + + 1 (sort of) + Aloof + + + (see Llama) + Friendly + + +dromeds.xml + + { + # my $str = ""; + my $str = $xml; + my $doc = XML::LibXML->new->parse_string( $str ); + for ( 1..TIMES_THROUGH ) { + processMessage($xml, '/dromedaries/species' ); +# my @nodes = $doc->findnodes("/foo/bar/foo"); + } + # TEST + ok(1, 'after processMessage'); + check_mem(); + + } + + { + my $str = ""; + my $doc = XML::LibXML->new->parse_string( $str ); + for ( 1..TIMES_THROUGH ) { + my $nodes = $doc->find("/foo/bar/foo"); + } + # TEST + ok(1, '->find.'); + check_mem(); + + } + +# { +# print "# ENCODING TESTS \n"; +# my $string = "test � � is a test string to test iso encoding"; +# my $encstr = encodeToUTF8( "iso-8859-1" , $string ); +# for ( 1..TIMES_THROUGH ) { +# my $str = encodeToUTF8( "iso-8859-1" , $string ); +# } +# ok(1); +# check_mem(); + +# for ( 1..TIMES_THROUGH ) { +# my $str = encodeToUTF8( "iso-8859-2" , "abc" ); +# } +# ok(1); +# check_mem(); +# +# for ( 1..TIMES_THROUGH ) { +# my $str = decodeFromUTF8( "iso-8859-1" , $encstr ); +# } +# ok(1); +# check_mem(); +# } + { + note("NAMESPACE TESTS"); + + my $string = ''; + + my $doc = XML::LibXML->new()->parse_string( $string ); + + for (1..TIMES_THROUGH) { + my @ns = $doc->documentElement()->getNamespaces(); + # warn "ns : " . $_->localname . "=>" . $_->href foreach @ns; + my $prefix = $_->localname foreach @ns; + my $name = $doc->documentElement->nodeName; + } + check_mem(); + # TEST + ok(1, 'namespace tests.'); + } + + { + note('SAX PARSER'); + + my %xmlStrings = ( + "SIMPLE" => "", + "SIMPLE TEXT" => " some text some text some text ", + "SIMPLE COMMENT" => " ", + "SIMPLE CDATA" => " ", + "SIMPLE ATTRIBUTE" => ' ', + "NAMESPACES SIMPLE" => '', + "NAMESPACES ATTRIBUTE" => '', + ); + + my $handler = sax_null->new; + my $parser = XML::LibXML->new; + $parser->set_handler( $handler ); + + check_mem(); + + foreach my $key ( keys %xmlStrings ) { + print "# $key \n"; + for (1..TIMES_THROUGH) { + my $doc = $parser->parse_string( $xmlStrings{$key} ); + } + + check_mem(); + } + # TEST + ok (1, 'SAX PARSER'); + } + + { + note('PUSH PARSER'); + + my %xmlStrings = ( + "SIMPLE" => ["","",""], + "SIMPLE TEXT" => [" ","some text some text some text"," "], + "SIMPLE COMMENT" => [" + + +EOT + + my $expecting = [ + start_document => [ 2, 1 ], + start_element => [ 2, 6 ], + characters => [ 4, 1 ], + comment => [ 4, 17 ], + characters => [ 5, 1 ], + start_cdata => [ 5, 20 ], + characters => [ 5, 20 ], + end_cdata => [ 5, 20 ], + characters => [ 6, 1 ], + end_element => [ 6, 8 ], + end_document => [ 6, 8 ], + ]; + + # TEST + is_deeply( \@stack, $expecting, "Check locator positions" ); +} + + +########### Namespace test ( empty namespaces ) ######## + +{ + my $h = "SAXNS2Tester"; + my $xml = ""; + my @tests = ( +sub { + XML::LibXML::SAX ->new( Handler => $h )->parse_string( $xml ); + # TEST + $SAXNS2Tester_start_element_stacker->test([qw(true)], 'XML::LibXML::SAX'); +}, + +sub { + XML::LibXML::SAX::Parser->new( Handler => $h )->parse_string( $xml ); + # TEST + $SAXNS2Tester_start_element_stacker->test([qw(true)], 'XML::LibXML::SAX::Parser'); +}, +); + + $_->() for @tests; + + +} + + +########### Error Handling ########### +{ + my $xml = 'Text'; + + my $handler = SAXErrorTester->new; + + foreach my $pkg (qw(XML::LibXML::SAX::Parser XML::LibXML::SAX)) { + undef $@; + eval { + $pkg->new(Handler => $handler)->parse_string($xml); + }; + # TEST*2 + ok($@, ' TODO : Add test name'); # We got an error + } + + $handler = SAXErrorCallbackTester->new; + eval { XML::LibXML::SAX->new(Handler => $handler )->parse_string($xml) }; + # TEST + ok($@, ' TODO : Add test name'); # We got an error + # TEST + ok( $handler->{fatal_called}, ' TODO : Add test name' ); + +} + +########### XML::LibXML::SAX::parse_chunk test ########### + +{ + my $chunk = 'LOGOUT'; + my $builder = XML::LibXML::SAX::Builder->new( Encoding => 'UTF-8' ); + my $parser = XML::LibXML::SAX->new( Handler => $builder ); + $parser->start_document(); + $builder->start_element({Name=>'foo'}); + $parser->parse_chunk($chunk); + $parser->parse_chunk($chunk); + $builder->end_element({Name=>'foo'}); + $parser->end_document(); + # TEST + is($builder->result()->documentElement->toString(), ''.$chunk.$chunk.'', ' TODO : Add test name'); +} + + +######## TEST error exceptions ############## +{ + + package MySAXHandler; + use strict; + use warnings; + use parent 'XML::SAX::Base'; + use Carp; + sub start_element { + my( $self, $elm) = @_; + if ( $elm->{LocalName} eq 'TVChannel' ) { + die bless({ Message => "My exception"},"MySAXException"); + } + } +} +{ + use strict; + use warnings; + my $parser = XML::LibXML::SAX->new( Handler => MySAXHandler->new( )) ; + eval { $parser->parse_string( <<'EOF' ) }; + + Moin + +EOF + # TEST + is(ref($@), 'MySAXException', ' TODO : Add test name'); + # TEST + is(ref($@) && $@->{Message}, "My exception", ' TODO : Add test name'); +} +########### Helper class ############# + +package SAXTester; +use Test::More; + +sub new { + my $class = shift; + return bless {}, $class; +} + +sub start_document { + + $SAXTester_start_document_counter->cb()->(); + + return; +} + +sub end_document { + $SAXTester_end_document_counter->cb()->(); + return; +} + +sub start_element { + my ($self, $el) = @_; + + $SAXTester_start_element_stacker->cb()->($el); + + # foreach my $attr (keys %{$el->{Attributes}}) { + # warn("Attr: $attr = $el->{Attributes}->{$attr}\n"); + # } + # warn("start_element: $el->{Name}\n"); + + return; +} + +sub end_element { + my ($self, $el) = @_; + # warn("end_element: $el->{Name}\n"); +} + +sub characters { + my ($self, $chars) = @_; + # warn("characters: $chars->{Data}\n"); +} + +1; + +package SAXNSTester; +use Test::More; + +sub new { + bless {}, shift; +} + +sub start_element { + my ($self, $node) = @_; + + $SAXNSTester_start_element_stacker->cb()->($node); + + return; +} + +sub end_element { + my ($self, $node) = @_; + # warn("end_element: $node->{Name}\n"); +} + +sub start_prefix_mapping { + my ($self, $node) = @_; + + $SAXNSTester_start_prefix_mapping_stacker->cb()->($node); + + return; +} + +sub end_prefix_mapping { + my ($self, $node) = @_; + + $SAXNSTester_end_prefix_mapping_stacker->cb()->($node); + + return; +} + +1; + +package SAXNS2Tester; +use Test::More; + +#sub new { +# my $class = shift; +# return bless {}, $class; +#} + +sub start_element { + my $self = shift; + my ( $elt ) = @_; + + $SAXNS2Tester_start_element_stacker->cb()->($elt); + + return; +} + + +package SAXLocatorTester; +use Test::More; + +sub new { + my ($class, $cb) = @_; + my $self = bless {}, $class; + + for my $method ( qw( + start_document end_document + start_element end_element + start_cdata end_cdata + start_dtd end_dtd + characters + comment + ) ) { + no strict 'refs'; + *$method = sub { $cb->( $_[0], $method, @_[1..$#_]) }; + } + + return $self; +} + +sub set_document_locator { + my ($self, $locator) = @_; + $self->{locator} = $locator; +} + +1; + +package SAXErrorTester; +use Test::More; + +sub new { + bless {}, shift; +} + +sub end_document { + print "End doc: @_\n"; + return 1; # Shouldn't be reached +} + +package SAXErrorCallbackTester; +use Test::More; + +sub fatal_error { + $_[0]->{fatal_called} = 1; +} + +sub start_element { + # test if we can do other stuff + XML::LibXML->new->parse_string(""); + return; +} +sub new { + bless {}, shift; +} + +sub end_document { + print "End doc: @_\n"; + return 1; # Shouldn't be reached +} + + +1; diff --git a/t/15nodelist.t b/t/15nodelist.t new file mode 100644 index 0000000..eed92d3 --- /dev/null +++ b/t/15nodelist.t @@ -0,0 +1,166 @@ + +use strict; +use warnings; + +use Test::More tests => 29; + +use XML::LibXML; +use IO::Handle; + +# TEST +ok(1, ' TODO : Add test name'); + +my $dom = XML::LibXML->new->parse_fh(*DATA); + +# TEST +ok($dom, ' TODO : Add test name'); + +{ + my $nodelist = $dom->documentElement->childNodes; + # TEST + # 0 is #text + is ($nodelist->item(1)->nodeName, 'BBB', 'item is 0-indexed'); +} + +my @nodelist = $dom->findnodes('//BBB'); + +# TEST +is(scalar(@nodelist), 5, ' TODO : Add test name'); + +my $nodelist = $dom->findnodes('//BBB'); +# TEST +is($nodelist->size, 5, ' TODO : Add test name'); + +# TEST +is($nodelist->string_value, "OK", ' TODO : Add test name'); # first node in set + +# TEST +is($nodelist->to_literal, "OKNOT OK", ' TODO : Add test name'); + +# TEST +is($nodelist->to_literal_delimited(','), "OK,,,,NOT OK", 'TODO : Add test name'); + +# TEST +is_deeply([$nodelist->to_literal_list()], ['OK', '', '', '', 'NOT OK'], 'TODO : Add test name'); + +{ + my $other_nodelist = $dom->findnodes('//BBB'); + while ($other_nodelist->to_literal() !~ m/\ANOT OK/) + { + $other_nodelist->shift(); + } + + # This is a test for: + # https://rt.cpan.org/Ticket/Display.html?id=57737 + + # TEST + ok (scalar(($other_nodelist) lt ($nodelist)), "Comparison is OK."); + + # TEST + ok (scalar(($nodelist) gt ($other_nodelist)), "Comparison is OK."); +} + +# TEST +is($dom->findvalue("//BBB"), "OKNOT OK", ' TODO : Add test name'); + +# TEST +is(ref($dom->find("1 and 2")), "XML::LibXML::Boolean", ' TODO : Add test name'); + +# TEST +is(ref($dom->find("'Hello World'")), "XML::LibXML::Literal", ' TODO : Add test name'); + +# TEST +is(ref($dom->find("32 + 13")), "XML::LibXML::Number", ' TODO : Add test name'); + +# TEST +is(ref($dom->find("//CCC")), "XML::LibXML::NodeList", ' TODO : Add test name'); + +my $numbers = XML::LibXML::NodeList->new(1..10); +my $oddify = sub { $_ + ($_%2?0:9) }; # add 9 to even numbers +my @map = $numbers->map($oddify); + +# TEST +is(scalar(@map), 10, 'map called in list context returns list'); + +# TEST +is(join('|',@map), '1|11|3|13|5|15|7|17|9|19', 'mapped data correct'); + +my $map = $numbers->map($oddify); + +# TEST +isa_ok($map => 'XML::LibXML::NodeList', '$map'); + +my @map2 = $map->map(sub { $_ > 10 ? () : ($_,$_,$_) }); + +# TEST +is(join('|',@map2), '1|1|1|3|3|3|5|5|5|7|7|7|9|9|9', 'mapping can add/remove nodes'); + +my @grep = $numbers->grep(sub {$_%2}); +my $grep = $numbers->grep(sub {$_%2}); + +# TEST +is(join('|',@grep), '1|3|5|7|9', 'grep works'); + +# TEST +isa_ok($grep => 'XML::LibXML::NodeList', '$grep'); + +my $shuffled = XML::LibXML::NodeList->new(qw/1 4 2 3 6 5 9 7 8 10/); +my @alphabetical = $shuffled->sort(sub { my ($a, $b) = @_; $a cmp $b }); +my @numeric = $shuffled->sort(sub { my ($a, $b) = @_; $a <=> $b }); + +# TEST +is(join('|',@alphabetical), '1|10|2|3|4|5|6|7|8|9', 'sort works 1'); + +# TEST +is(join('|',@numeric), '1|2|3|4|5|6|7|8|9|10', 'sort works 2'); + +my $reverse = XML::LibXML::NodeList->new; +my $return = $numbers->foreach( sub { $reverse->unshift($_) } ); + +# TEST +is( + blessed_refaddr($return), + blessed_refaddr($numbers), + 'foreach returns $self', + ); + +# TEST +is(join('|',@$reverse), '10|9|8|7|6|5|4|3|2|1', 'foreach works'); + +my $biggest = $shuffled->reduce(sub { $_[0] > $_[1] ? $_[0] : $_[1] }, -1); +my $smallest = $shuffled->reduce(sub { $_[0] < $_[1] ? $_[0] : $_[1] }, 9999); + +# TEST +is($biggest, 10, 'reduce works 1'); + +# TEST +is($smallest, 1, 'reduce works 2'); + +my @reverse = $numbers->reverse; + +# TEST +is(join('|',@reverse), '10|9|8|7|6|5|4|3|2|1', 'reverse works'); + +# modified version of Scalar::Util::PP::refaddr +# only works with blessed references +sub blessed_refaddr { + return undef unless length(ref($_[0])); + my $addr; + if(defined(my $pkg = ref($_[0]))) { + $addr .= bless $_[0], 'Scalar::Util::Fake'; + bless $_[0], $pkg; + } + no warnings 'portable'; + $addr =~ /0x(\w+)/; + hex($1); +} + + +__DATA__ + +OK + + + +NOT OK + diff --git a/t/16docnodes.t b/t/16docnodes.t new file mode 100644 index 0000000..db7bc1f --- /dev/null +++ b/t/16docnodes.t @@ -0,0 +1,72 @@ +use strict; +use warnings; + +use XML::LibXML; +# Should be 11. +use Test::More tests => 11; + +# this test fails under XML-LibXML-1.00 with a segfault after the +# second parsing. it was fixed by putting in code in getChildNodes +# to handle the special case where the node was the document node + + my $input = < + + A B + + A + + B + A B + C + + +EOD + +for my $time (0 .. 2) { + my $parser = XML::LibXML->new(); + my $doc = $parser->parse_string($input); + my @a = $doc->getChildnodes; + # TEST*3 + is(scalar(@a), 1, "1 Child node - time $time"); +} + +my $parser = XML::LibXML->new(); +my $doc = $parser->parse_string($input); +for my $time (0 .. 2) { + my $e = $doc->getFirstChild; + # TEST*3 + isa_ok ($e, 'XML::LibXML::Element', + "first child is an Element - time No. $time" + ); +} + +for my $time (0 .. 2) { + my $e = $doc->getLastChild; + # TEST*3 + isa_ok($e,'XML::LibXML::Element', + "last child is an element - time No. $time" + ); +} + +## +# Test Ticket 7645 +{ + my $in = pack('U', 0x00e4); + my $doc = XML::LibXML::Document->new(); + + my $node = XML::LibXML::Element->new('test'); + $node->setAttribute(contents => $in); + $doc->setDocumentElement($node); + + # TEST + is( $node->serialize(), '', 'Node serialise works.' ); + + $doc->setEncoding('utf-8'); + # Second output + # TEST + is( $node->serialize(), + encodeToUTF8( 'iso-8859-1', '' ), + 'UTF-8 node serialize', + ); +} diff --git a/t/17callbacks.t b/t/17callbacks.t new file mode 100644 index 0000000..e2d9859 --- /dev/null +++ b/t/17callbacks.t @@ -0,0 +1,322 @@ +# $Id$ + +use strict; +use warnings; + +use lib './t/lib'; +use TestHelpers qw(slurp); +use Counter; +use Stacker; + +# Should be 25. +use Test::More tests => 25; +use XML::LibXML; + +sub _create_counter_pair +{ + my ($worker_cb, $predicate_cb) = @_; + + my $non_global_counter = Counter->new( + { + gen_cb => sub { + my $inc_cb = shift; + return sub { + return $worker_cb->( + sub { + if (!$predicate_cb->()) + { + $inc_cb->() + } + return; + } + )->(@_); + } + }, + } + ); + + my $global_counter = Counter->new( + { + gen_cb => sub { + my $inc_cb = shift; + return sub { + return $worker_cb->( + sub { + if ($predicate_cb->()) + { + $inc_cb->() + } + return; + } + )->(@_); + } + }, + } + ); + + return ($non_global_counter, $global_counter); +} + +my ($open1_non_global_counter, $open1_global_counter) = + _create_counter_pair( + sub { + my $cond_cb = shift; + return sub { + my $fn = shift; + # warn("open: $f\n"); + + if (open my $fh, '<', $fn) + { + $cond_cb->(); + return $fh; + } + else + { + return 0; + } + }; + }, + sub { return defined($XML::LibXML::open_cb); }, + ); + +my $open2_counter = Counter->new( + { + gen_cb => sub { + my $inc_cb = shift; + return sub { + my ($fn) = @_; + # warn("open2: $_[0]\n"); + + $fn =~ s/([^\d])(\.xml)$/${1}4$2/; # use a different file + my ($ret, $verdict); + if ($verdict = open (my $file, '<', $fn)) + { + $ret = $file; + } + else + { + $ret = 0; + } + + $inc_cb->(); + + return $ret; + }; + }, + } +); + +my ($match1_non_global_counter, $match1_global_counter) = + _create_counter_pair( + sub { + my $cond_cb = shift; + return sub { + $cond_cb->(); + + return 1; + }; + }, + sub { return defined($XML::LibXML::match_cb); }, + ); + +my ($close1_non_global_counter, $close1_global_counter) = + _create_counter_pair( + sub { + my $cond_cb = shift; + return sub { + my ($fh) = @_; + # warn("open: $f\n"); + + $cond_cb->(); + + if ($fh) + { + $fh->close(); + } + + return 1; + }; + }, + sub { return defined($XML::LibXML::close_cb); }, + ); + +my ($read1_non_global_counter, $read1_global_counter) = + _create_counter_pair( + sub { + my $cond_cb = shift; + return sub { + my ($fh) = @_; + # warn "read!"; + my $rv = undef; + my $n = 0; + if ( $fh ) { + $n = $fh->read( $rv , $_[1] ); + if ($n > 0) + { + $cond_cb->(); + } + } + return $rv; + }; + }, + sub { return defined($XML::LibXML::read_cb); }, + ); + +{ + # first test checks if local callbacks work + my $parser = XML::LibXML->new(); + # TEST + ok($parser, 'Parser was initted.'); + + $parser->match_callback( $match1_non_global_counter->cb() ); + $parser->read_callback( $read1_non_global_counter->cb() ); + $parser->open_callback( $open1_non_global_counter->cb() ); + $parser->close_callback( $close1_non_global_counter->cb() ); + + $parser->expand_xinclude( 1 ); + + my $dom = $parser->parse_file("example/test.xml"); + + # TEST + $read1_non_global_counter->test(2, 'read1 for expand_include called twice.'); + # TEST + $close1_non_global_counter->test(2, 'close1 for expand_include called twice.'); + # TEST + $match1_non_global_counter->test(2, 'match1 for expand_include called twice.'); + + # TEST + $open1_non_global_counter->test(2, 'expand_include open1 worked.'); + + # TEST + ok($dom, 'DOM was returned.'); + # warn $dom->toString(); + + my $root = $dom->getDocumentElement(); + + my @nodes = $root->findnodes( 'xml/xsl' ); + # TEST + ok( scalar(@nodes), 'Found nodes.' ); +} + +{ + # test per parser callbacks. These tests must not fail! + + my $parser = XML::LibXML->new(); + my $parser2 = XML::LibXML->new(); + + # TEST + ok($parser, '$parser was init.'); + # TEST + ok($parser2, '$parser2 was init.'); + + $parser->match_callback( $match1_non_global_counter->cb() ); + $parser->read_callback( $read1_non_global_counter->cb() ); + $parser->open_callback( $open1_non_global_counter->cb() ); + $parser->close_callback( $close1_non_global_counter->cb() ); + + $parser->expand_xinclude( 1 ); + + $parser2->match_callback( \&match2 ); + $parser2->read_callback( \&read2 ); + $parser2->open_callback( $open2_counter->cb() ); + $parser2->close_callback( \&close2 ); + + $parser2->expand_xinclude( 1 ); + + my $dom1 = $parser->parse_file( "example/test.xml"); + my $dom2 = $parser2->parse_file("example/test.xml"); + + # TEST + $read1_non_global_counter->test(2, 'read1 for $parser out of ($parser,$parser2)'); + # TEST + $close1_non_global_counter->test(2, 'close1 for $parser out of ($parser,$parser2)'); + + # TEST + $match1_non_global_counter->test(2, 'match1 for $parser out of ($parser,$parser2)'); + # TEST + $open1_non_global_counter->test(2, 'expand_include for $parser out of ($parser,$parser2)'); + # TEST + $open2_counter->test(2, 'expand_include for $parser2 out of ($parser,$parser2)'); + # TEST + ok($dom1, '$dom1 was returned'); + # TEST + ok($dom2, '$dom2 was returned'); + + my $val1 = ( $dom1->findnodes( "/x/xml/text()") )[0]->string_value(); + my $val2 = ( $dom2->findnodes( "/x/xml/text()") )[0]->string_value(); + + $val1 =~ s/^\s*|\s*$//g; + $val2 =~ s/^\s*|\s*$//g; + + # TEST + + is( $val1, "test", ' TODO : Add test name' ); + # TEST + is( $val2, "test 4", ' TODO : Add test name' ); +} + +chdir("example/complex") || die "chdir: $!"; + +my $str = slurp('complex.xml'); + +{ + # tests if callbacks are called correctly within DTDs + my $parser2 = XML::LibXML->new(); + $parser2->expand_xinclude( 1 ); + my $dom = $parser2->parse_string($str); + # TEST + ok($dom, '$dom was init.'); +} + + +$XML::LibXML::match_cb = $match1_global_counter->cb(); +$XML::LibXML::open_cb = $open1_global_counter->cb(); +$XML::LibXML::read_cb = $read1_global_counter->cb(); +$XML::LibXML::close_cb = $close1_global_counter->cb(); + +{ + # tests if global callbacks are working + my $parser = XML::LibXML->new(load_ext_dtd => 1); + # TEST + ok($parser, '$parser was init'); + + # TEST + ok($parser->parse_string($str), 'parse_string returns a true value.'); + + # TEST + $open1_global_counter->test(3, 'open1 for global counter.'); + + # TEST + $match1_global_counter->test(3, 'match1 for global callback.'); + + # TEST + $close1_global_counter->test(3, 'close1 for global callback.'); + + # TEST + $read1_global_counter->test(3, 'read1 for global counter.'); +} + +sub match2 { + # warn "match2: $_[0]\n"; + return 1; +} + +sub close2 { + # warn "close2 $_[0]\n"; + if ( $_[0] ) { + $_[0]->close(); + } + return 1; +} + +sub read2 { + # warn "read2!"; + my $rv = undef; + my $n = 0; + if ( $_[0] ) { + $n = $_[0]->read( $rv , $_[1] ); + # warn "read!" if $n > 0; + } + return $rv; +} + diff --git a/t/18docfree.t b/t/18docfree.t new file mode 100644 index 0000000..9a2cff6 --- /dev/null +++ b/t/18docfree.t @@ -0,0 +1,14 @@ +use strict; +use warnings; + +use Test::More tests => 1; +use XML::LibXML; + +{ + my $doc = XML::LibXML::Document->new(); + $doc = XML::LibXML::Document->new(); +} +# used to get "Attempt to free unreferenced scalar" here +# TEST +pass('docfree Out of scope is OK - no "Attempt to free unreferenced scalar"'); + diff --git a/t/19die_on_invalid_utf8_rt_58848.t b/t/19die_on_invalid_utf8_rt_58848.t new file mode 100644 index 0000000..aa8ad10 --- /dev/null +++ b/t/19die_on_invalid_utf8_rt_58848.t @@ -0,0 +1,53 @@ +# This is a test for: +# https://rt.cpan.org/Ticket/Display.html?id=58848 + +use strict; +use warnings; + +use Test::More tests => 1; + +use XML::LibXML; + +{ + eval { + XML::LibXML->new->parse_file('example/thedieline.rss'); + }; + + my $err = $@; + + # TEST + like ("$err", qr{parser error : Input is not proper UTF-8}, + 'Parser error.', + ); +} + + +=head1 COPYRIGHT & LICENSE + +Copyright 2011 by Shlomi Fish + +This program is distributed under the MIT (X11) License: +L + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +=cut diff --git a/t/19encoding.t b/t/19encoding.t new file mode 100644 index 0000000..ae11c71 --- /dev/null +++ b/t/19encoding.t @@ -0,0 +1,173 @@ +## +# $Id$ +# +# This should test the XML::LibXML internal encoding/ decoding. +# Since most of the internal encoding code is dependent on +# the perl version the module is built for. only the encodeToUTF8() and +# decodeFromUTF8() functions are supposed to be general, while all the +# magic code is only available for more recent perl version (5.6+) +# +# Added note by Shlomi Fish: we are now perl-5.8.x and above so I removed +# the 5.6.x+ test. + +use strict; +use warnings; + +use Test::More; + +{ + my $tests = 1; + my $basics = 0; + my $magic = 6; + my $step = $basics + $magic; + + $tests += $step; + + if ( defined $ENV{TEST_LANGUAGES} ) { + if ( $ENV{TEST_LANGUAGES} eq "all" ) { + $tests += 2 * $step; + } elsif ( $ENV{TEST_LANGUAGES} eq "EUC-JP" + or $ENV{TEST_LANGUAGES} eq "KOI8-R" ) { + $tests += $step; + } + } + plan tests => $tests; +} + +use XML::LibXML::Common; +use XML::LibXML; + +# TEST +ok(1, 'Loading'); + +my $p = XML::LibXML->new(); + +# encoding tests +# ok there is the UTF16 test still missing + +my $tstr_utf8 = 'test'; +my $tstr_iso_latin1 = "täst"; + +my $domstrlat1 = q{ +täst +}; + +{ + # magic encoding tests + + my $dom_latin1 = XML::LibXML::Document->new('1.0', 'iso-8859-1'); + my $elemlat1 = $dom_latin1->createElement( $tstr_iso_latin1 ); + + $dom_latin1->setDocumentElement( $elemlat1 ); + + # TEST + is( decodeFromUTF8( 'iso-8859-1' ,$elemlat1->toString()), + "<$tstr_iso_latin1/>", ' TODO : Add test name'); + # TEST + is( $elemlat1->toString(0,1), "<$tstr_iso_latin1/>", ' TODO : Add test name'); + + my $elemlat2 = $dom_latin1->createElement( "Öl" ); + # TEST + is( $elemlat2->toString(0,1), "<Öl/>", ' TODO : Add test name'); + + $elemlat1->appendText( $tstr_iso_latin1 ); + + # TEST + is( decodeFromUTF8( 'iso-8859-1' ,$elemlat1->string_value()), + $tstr_iso_latin1, ' TODO : Add test name'); + # TEST + is( $elemlat1->string_value(1), $tstr_iso_latin1, ' TODO : Add test name'); + + # TEST + is( $dom_latin1->toString(), $domstrlat1, ' TODO : Add test name' ); + +} + +exit(0) unless defined $ENV{TEST_LANGUAGES}; + +if ( $ENV{TEST_LANGUAGES} eq 'all' or $ENV{TEST_LANGUAGES} eq "EUC-JP" ) { + # japanese encoding (EUC-JP) + + my $tstr_euc_jp = 'À¸ÇþÀ¸ÊÆÀ¸Íñ'; + my $domstrjp = q{ +<À¸ÇþÀ¸ÊÆÀ¸Íñ>À¸ÇþÀ¸ÊÆÀ¸Íñ +}; + + + { + my $dom_euc_jp = XML::LibXML::Document->new('1.0', 'EUC-JP'); + my $elemjp = $dom_euc_jp->createElement( $tstr_euc_jp ); + + + # TEST + + is( decodeFromUTF8( 'EUC-JP' , $elemjp->nodeName()), + $tstr_euc_jp, ' TODO : Add test name' ); + # TEST + is( decodeFromUTF8( 'EUC-JP' ,$elemjp->toString()), + "<$tstr_euc_jp/>", ' TODO : Add test name'); + # TEST + is( $elemjp->toString(0,1), "<$tstr_euc_jp/>", ' TODO : Add test name'); + + $dom_euc_jp->setDocumentElement( $elemjp ); + $elemjp->appendText( $tstr_euc_jp ); + + # TEST + + is( decodeFromUTF8( 'EUC-JP' ,$elemjp->string_value()), + $tstr_euc_jp, ' TODO : Add test name'); + # TEST + is( $elemjp->string_value(1), $tstr_euc_jp, ' TODO : Add test name'); + + # TEST + + is( $dom_euc_jp->toString(), $domstrjp, ' TODO : Add test name' ); + } + +} + +if ( $ENV{TEST_LANGUAGES} eq 'all' or $ENV{TEST_LANGUAGES} eq "KOI8-R" ) { + # cyrillic encoding (KOI8-R) + + my $tstr_koi8r = 'ÐÒÏÂÁ'; + my $domstrkoi = q{ +<ÐÒÏÂÁ>ÐÒÏÂÁ +}; + + + { + my ($dom_koi8, $elemkoi8); + + $dom_koi8 = XML::LibXML::Document->new('1.0', 'KOI8-R'); + $elemkoi8 = $dom_koi8->createElement( $tstr_koi8r ); + + # TEST + + is( decodeFromUTF8( 'KOI8-R' ,$elemkoi8->nodeName()), + $tstr_koi8r, ' TODO : Add test name' ); + + # TEST + + is( decodeFromUTF8( 'KOI8-R' ,$elemkoi8->toString()), + "<$tstr_koi8r/>", ' TODO : Add test name'); + # TEST + is( $elemkoi8->toString(0,1), "<$tstr_koi8r/>", ' TODO : Add test name'); + + $elemkoi8->appendText( $tstr_koi8r ); + + # TEST + + is( decodeFromUTF8( 'KOI8-R' ,$elemkoi8->string_value()), + $tstr_koi8r, ' TODO : Add test name'); + # TEST + is( $elemkoi8->string_value(1), + $tstr_koi8r, ' TODO : Add test name'); + $dom_koi8->setDocumentElement( $elemkoi8 ); + + # TEST + + is( $dom_koi8->toString(), + $domstrkoi, ' TODO : Add test name' ); + + } +} diff --git a/t/20extras.t b/t/20extras.t new file mode 100644 index 0000000..a5ffcec --- /dev/null +++ b/t/20extras.t @@ -0,0 +1,57 @@ +# $Id$ + +use strict; +use warnings; + +use Test::More tests => 12; + +use XML::LibXML; + +my $string = ""; + +my $parser = XML::LibXML->new(); + +{ + my $doc = $parser->parse_string( $string ); + # TEST + ok($doc, ' TODO : Add test name'); + local $XML::LibXML::skipXMLDeclaration = 1; + # TEST + is( $doc->toString(), $string, ' TODO : Add test name' ); + local $XML::LibXML::setTagCompression = 1; + # TEST + is( $doc->toString(), "", ' TODO : Add test name' ); +} + +{ + local $XML::LibXML::skipDTD = 1; + $parser->expand_entities(0); + my $doc = $parser->parse_file( "example/dtd.xml" ); + # TEST + ok($doc, ' TODO : Add test name'); + my $test = "\nThis is a valid document &foo; !\n"; + # TEST + is( $doc->toString, $test, ' TODO : Add test name' ); +} + +{ + my $doc = $parser->parse_string( $string ); + # TEST + ok($doc, ' TODO : Add test name'); + my $dclone = $doc->cloneNode(1); # deep + # TEST + ok( ! $dclone->isSameNode($doc), ' TODO : Add test name' ); + # TEST + ok( $dclone->getDocumentElement(), ' TODO : Add test name' ); + # TEST + ok( $doc->toString() eq $dclone->toString(), ' TODO : Add test name' ); + + my $clone = $doc->cloneNode(); # shallow + # TEST + ok( ! $clone->isSameNode($doc), ' TODO : Add test name' ); + # TEST + ok( ! $clone->getDocumentElement(), ' TODO : Add test name' ); + $doc->getDocumentElement()->unbindNode(); + # TEST + ok( $doc->toString() eq $clone->toString(), ' TODO : Add test name' ); +} diff --git a/t/21catalog.t b/t/21catalog.t new file mode 100644 index 0000000..8fecff7 --- /dev/null +++ b/t/21catalog.t @@ -0,0 +1,30 @@ + +use strict; +use warnings; + +use Test::More tests => 1; + +use XML::LibXML; + +# XML::LibXML->load_catalog( "example/catalog.xml" ); + +# the following document should not be able to get parsed +# if the catalog is not available + +my $doc = XML::LibXML->new( catalog => "example/catalog.xml" )->parse_string(< +
+Something here +12345 +2001-04-01 +XML.com +
Foo
+Here's some leading text +And here is the rest... +
+EOF + +# TEST +ok($doc, 'Doc was parsed with catalog'); diff --git a/t/23rawfunctions.t b/t/23rawfunctions.t new file mode 100644 index 0000000..427401c --- /dev/null +++ b/t/23rawfunctions.t @@ -0,0 +1,23 @@ + +use strict; +use warnings; + +use Test::More tests => 2; + +use XML::LibXML; + +my $doc = XML::LibXML->createDocument; + +my $t1 = $doc->createTextNode( "foo" ); +my $t2 = $doc->createTextNode( "bar" ); + +$t1->addChild( $t2 ); + +eval { + my $v = $t2->nodeValue; +}; +# TEST +ok($@, 'An exception was thrown'); + +# TEST +ok(1, 'End'); diff --git a/t/24c14n.t b/t/24c14n.t new file mode 100644 index 0000000..bccb89e --- /dev/null +++ b/t/24c14n.t @@ -0,0 +1,225 @@ +# -*- cperl -*- +# $Id$ + +## +# these testcases are for xml canonization interfaces. +# + +# should be 23. +use Test::More tests => 23; +use strict; +use warnings; + +use XML::LibXML; +use XML::LibXML::Common qw(:libxml); + +my $parser = XML::LibXML->new; + +{ + my $doc = $parser->parse_string( "
" ); + + my $c14n_res = $doc->toStringC14N(); + # TEST + is( $c14n_res, " ", ' TODO : Add test name' ); + + $c14n_res = $doc->toStringC14N(1); + # TEST + is( $c14n_res, " ", ' TODO : Add test name' ); +} + +{ + my $doc = $parser->parse_string( 'e&f<]]> ' ); + + my $c14n_res = $doc->toStringC14N(); + # TEST + is( $c14n_res, ' >e&f< ', ' TODO : Add test name' ); + $c14n_res = $doc->toStringC14N(1); + # TEST + is( $c14n_res, ' >e&f< ', ' TODO : Add test name' ); +} + +{ + my $doc = $parser->parse_string( '' ); + + my $c14n_res; + $c14n_res = $doc->toStringC14N(0); + # TEST + is( $c14n_res, '', ' TODO : Add test name' ); +} + +{ + my $doc = $parser->parse_string( '' ); + + my $c14n_res; + $c14n_res = $doc->toStringC14N(0); + # TEST + is( $c14n_res, '', ' TODO : Add test name' ); +} + + +# ----------------------------------------------------------------- # +# The C14N says: remove unused namespaces, libxml2 just orders them +# ----------------------------------------------------------------- # +{ + my $doc = $parser->parse_string( '' ); + + my $c14n_res; + $c14n_res = $doc->toStringC14N(0); + # TEST + is( $c14n_res, '', ' TODO : Add test name' ); + + # would be correct, but will not work. + # ok( $c14n_res, '' ); +} + +# ----------------------------------------------------------------- # +# The C14N says: remove redundant namespaces +# ----------------------------------------------------------------- # +{ + my $doc = $parser->parse_string( '' ); + + my $c14n_res; + $c14n_res = $doc->toStringC14N(0); + # TEST + is( $c14n_res, '', ' TODO : Add test name' ); +} + +{ + my $doc = $parser->parse_string( '' ); + + my $c14n_res; + $c14n_res = $doc->toStringC14N(0); + # TEST + is( $c14n_res, '', ' TODO : Add test name' ); +} + +{ + my $doc = $parser->parse_string( < + +EOX + + my $c14n_res; + $c14n_res = $doc->toStringC14N(0); + # TEST + is( $c14n_res, '', ' TODO : Add test name' ); +} + +# canonize with xpath expressions +{ + my $doc = $parser->parse_string( < + +EOX + my $c14n_res; + $c14n_res = $doc->toStringC14N(0, "//d" ); + # TEST + is( $c14n_res, '', ' TODO : Add test name' ); +} + +{ + my $doc = $parser->parse_string( < + +EOX + my $rootnode=$doc->documentElement; + my $c14n_res; + $c14n_res = $rootnode->toStringC14N(0, "//*[local-name()='d']"); + # TEST + is( $c14n_res, '', ' TODO : Add test name' ); + ($rootnode) = $doc->findnodes("//*[local-name()='d']"); + $c14n_res = $rootnode->toStringC14N(); + # TEST + is( $c14n_res, '', ' TODO : Add test name' ); + $rootnode = $doc->documentElement->firstChild; + $c14n_res = $rootnode->toStringC14N(0); + # TEST + is( $c14n_res, '', ' TODO : Add test name' ); +} + +# exclusive canonicalization + +if (20620 > XML::LibXML::LIBXML_VERSION) { + skip("skipping Exclusive C14N tests for libxml2 < 2.6.17") for 15..20; +} else { + my $xml1 = < + + + + +EOX + + my $xml2 = < + + + + +EOX + my $xpath = "(//. | //@* | //namespace::*)[ancestor-or-self::*[name()='n1:elem2']]"; + my $result = qq(\n \n ); + my $result_n0n2 = qq(\n \n ); + my $doc1 = $parser->parse_string( $xml1 ); + my $doc2 = $parser->parse_string( $xml2 ); + + { + my $c14n_res = $doc1->toStringEC14N(0, $xpath); + # TEST + is( $c14n_res, $result, ' TODO : Add test name'); + } + { + my $c14n_res = $doc2->toStringEC14N(0, $xpath); + # TEST + is( $c14n_res, $result, ' TODO : Add test name'); + } + { + my $c14n_res = $doc1->toStringEC14N(0, $xpath,[]); + # TEST + is( $c14n_res, $result, ' TODO : Add test name'); + } + { + my $c14n_res = $doc2->toStringEC14N(0, $xpath,[]); + # TEST + is( $c14n_res, $result, ' TODO : Add test name'); + } + { + my $c14n_res = $doc2->toStringEC14N(0, $xpath,['n1','n3']); + # TEST + is( $c14n_res, $result, ' TODO : Add test name'); + } + { + my $c14n_res = $doc2->toStringEC14N(0, $xpath,['n0','n2']); + # TEST + is( $c14n_res, $result_n0n2, ' TODO : Add test name'); + } + +} + +{ + +my $xml = <<'EOF'; +http://www.behealth.be/webservices/tsa/TSConsultTSBagRequesthttps://www.ehealth.fgov.be/timestampauthority_1_5/timestampauthorityurn:www.sve.man.ac.uk-54690551758351720271010843310http://www.w3.org/2005/08/addressing/anonymoustsa_0406798006_01803002317537321226995312781 +EOF + +my $xpath = q{(//. | //@* | //namespace::*)[ancestor-or-self::x:MessageID]}; +my $xpath2 = q{(//. | //@* | //namespace::*)[ancestor-or-self::*[local-name()='MessageID' and namespace-uri()='http://www.w3.org/2005/08/addressing']]}; + +my $doc = XML::LibXML->load_xml(string=>$xml); +my $xpc = XML::LibXML::XPathContext->new($doc); +$xpc->registerNs(x => "http://www.w3.org/2005/08/addressing"); +my $expect = 'urn:www.sve.man.ac.uk-54690551758351720271010843310'; +# TEST + +is( $doc->toStringEC14N( 0, $xpath2, [qw(soap)] ), $expect, ' TODO : Add test name' ); +# TEST + +is( $doc->toStringEC14N( 0, $xpath, $xpc, [qw(soap)] ), $expect, ' TODO : Add test name' ); +# TEST + +is( $doc->toStringEC14N( 0, $xpath2, $xpc, [qw(soap)] ), $expect, ' TODO : Add test name' ); + +} diff --git a/t/25relaxng.t b/t/25relaxng.t new file mode 100644 index 0000000..93e6188 --- /dev/null +++ b/t/25relaxng.t @@ -0,0 +1,161 @@ +# $Id$ + +## +# Testcases for the RelaxNG interface +# + +use strict; +use warnings; + +use lib './t/lib'; +use TestHelpers qw(slurp); + +use Test::More; + +BEGIN { + use XML::LibXML; + + if ( XML::LibXML::LIBXML_VERSION >= 20510 ) { + plan tests => 17; + } + else { + plan skip_all => 'Skip No RNG Support compiled'; + } +}; + +if ( XML::LibXML::LIBXML_VERSION >= 20510 ) { + +my $xmlparser = XML::LibXML->new(); + +my $file = "test/relaxng/schema.rng"; +my $badfile = "test/relaxng/badschema.rng"; +my $validfile = "test/relaxng/demo.xml"; +my $invalidfile = "test/relaxng/invaliddemo.xml"; +my $demo4 = "test/relaxng/demo4.rng"; +my $netfile = "test/relaxng/net.rng"; + +print "# 1 parse schema from a file\n"; +{ + my $rngschema = XML::LibXML::RelaxNG->new( location => $file ); + # TEST + ok ( $rngschema, ' TODO : Add test name' ); + + eval { $rngschema = XML::LibXML::RelaxNG->new( location => $badfile ); }; + # TEST + ok( $@, ' TODO : Add test name' ); +} + +print "# 2 parse schema from a string\n"; +{ + my $string = slurp($file); + + my $rngschema = XML::LibXML::RelaxNG->new( string => $string ); + # TEST + ok ( $rngschema, ' TODO : Add test name' ); + + $string = slurp($badfile); + + eval { $rngschema = XML::LibXML::RelaxNG->new( string => $string ); }; + # TEST + ok( $@, ' TODO : Add test name' ); +} + +print "# 3 parse schema from a document\n"; +{ + my $doc = $xmlparser->parse_file( $file ); + my $rngschema = XML::LibXML::RelaxNG->new( DOM => $doc ); + # TEST + ok ( $rngschema, ' TODO : Add test name' ); + + $doc = $xmlparser->parse_file( $badfile ); + eval { $rngschema = XML::LibXML::RelaxNG->new( DOM => $doc ); }; + # TEST + ok( $@, ' TODO : Add test name' ); +} + +print "# 4 validate a document\n"; +{ + my $doc = $xmlparser->parse_file( $validfile ); + my $rngschema = XML::LibXML::RelaxNG->new( location => $file ); + + my $valid = 0; + eval { $valid = $rngschema->validate( $doc ); }; + # TEST + is( $valid, 0, ' TODO : Add test name' ); + + $doc = $xmlparser->parse_file( $invalidfile ); + $valid = 0; + eval { $valid = $rngschema->validate( $doc ); }; + # TEST + ok ( $@, ' TODO : Add test name' ); +} + +print "# 5 re-validate a modified document\n"; +{ + my $rng = XML::LibXML::RelaxNG->new(location => $demo4); + my $seed_xml = <<'EOXML'; + + +EOXML + + my $doc = $xmlparser->parse_string($seed_xml); + my $rootElem = $doc->documentElement; + my $bogusElem = $doc->createElement('bogus-element'); + + eval{$rng->validate($doc);}; + # TEST + ok ($@, ' TODO : Add test name'); + + $rootElem->setAttribute('name', 'rootElem'); + eval{ $rng->validate($doc); }; + # TEST + ok (!$@, ' TODO : Add test name'); + + $rootElem->appendChild($bogusElem); + eval{$rng->validate($doc);}; + # TEST + ok ($@, ' TODO : Add test name'); + + $bogusElem->unlinkNode(); + eval{$rng->validate($doc);}; + # TEST + ok (!$@, ' TODO : Add test name'); + + $rootElem->removeAttribute('name'); + eval{$rng->validate($doc);}; + # TEST + ok ($@, ' TODO : Add test name'); + +} + +print "# 6 check that no_network => 1 works\n"; +{ + my $rng = eval { XML::LibXML::RelaxNG->new( location => $netfile, no_network => 1 ) }; + # TEST + like( $@, qr{I/O error : Attempt to load network entity}, 'RNG from file location with external import and no_network => 1 throws an exception.' ); + # TEST + ok( !defined $rng, 'RNG from file location with external import and no_network => 1 is not loaded.' ); +} +{ + my $rng = eval { XML::LibXML::RelaxNG->new( string => <<'EOF', no_network => 1 ) }; + + + + + + + + + + + + +EOF + # TEST + like( $@, qr{I/O error : Attempt to load network entity}, 'RNG from buffer with external import and no_network => 1 throws an exception.' ); + # TEST + ok( !defined $rng, 'RNG from buffer with external import and no_network => 1 is not loaded.' ); +} + + +} # Version >= 20510 test diff --git a/t/26schema.t b/t/26schema.t new file mode 100644 index 0000000..17f641e --- /dev/null +++ b/t/26schema.t @@ -0,0 +1,135 @@ +# $Id$ + +## +# Testcases for the XML Schema interface +# + +use strict; +use warnings; + +use lib './t/lib'; +use TestHelpers qw(slurp); + +use Test::More; + +use XML::LibXML; + +if ( XML::LibXML::LIBXML_VERSION >= 20510 ) { + plan tests => 12; +} +else { + plan skip_all => 'No Schema Support compiled.'; +} + +my $xmlparser = XML::LibXML->new(); + +my $file = "test/schema/schema.xsd"; +my $badfile = "test/schema/badschema.xsd"; +my $validfile = "test/schema/demo.xml"; +my $invalidfile = "test/schema/invaliddemo.xml"; +my $netfile = "test/schema/net.xsd"; + + +# 1 parse schema from a file +{ + my $rngschema = XML::LibXML::Schema->new( location => $file ); + # TEST + ok ( $rngschema, 'Good XML::LibXML::Schema was initialised' ); + + eval { $rngschema = XML::LibXML::Schema->new( location => $badfile ); }; + # TEST + ok( $@, 'Bad XML::LibXML::Schema throws an exception.' ); +} + +# 2 parse schema from a string +{ + my $string = slurp($file); + + my $rngschema = XML::LibXML::Schema->new( string => $string ); + # TEST + ok ( $rngschema, 'RNG Schema initialized from string.' ); + + $string = slurp($badfile); + eval { $rngschema = XML::LibXML::Schema->new( string => $string ); }; + # TEST + ok( $@, 'Bad string schema throws an excpetion.' ); +} + +# 3 validate a document +{ + my $doc = $xmlparser->parse_file( $validfile ); + my $rngschema = XML::LibXML::Schema->new( location => $file ); + + my $valid = 0; + eval { $valid = $rngschema->validate( $doc ); }; + # TEST + is( $valid, 0, 'validate() returns 0 to indicate validity of valid file.' ); + + $doc = $xmlparser->parse_file( $invalidfile ); + $valid = 0; + eval { $valid = $rngschema->validate( $doc ); }; + # TEST + ok ( $@, 'Invalid file throws an excpetion.'); +} + +# 4 validate a node +{ + my $doc = $xmlparser->load_xml(string => <<'EOF'); + + John Smith + + Ola Nordmann + + +EOF + + my $schema = XML::LibXML::Schema->new(string => <<'EOF'); + + + + + + + + + + + + + + + + + + +EOF + + my $nodelist = $doc->findnodes('/shiporder/shipto'); + my $result = 1; + eval { $result = $schema->validate($nodelist->get_node(1)); }; + # TEST + is( $@, '', 'validate() with element doesn\'t throw' ); + # TEST + is( $result, 0, 'validate() with element returns 0' ); +} + +# 5 check that no_network => 1 works +{ + my $schema = eval { XML::LibXML::Schema->new( location => $netfile, no_network => 1 ) }; + # TEST + like( $@, qr{I/O error : Attempt to load network entity}, 'Schema from file location with external import and no_network => 1 throws an exception.' ); + # TEST + ok( !defined $schema, 'Schema from file location with external import and no_network => 1 is not loaded.' ); +} +{ + my $schema = eval { XML::LibXML::Schema->new( string => <<'EOF', no_network => 1 ) }; + + + + +EOF + # TEST + like( $@, qr{I/O error : Attempt to load network entity}, 'Schema from buffer with external import and no_network => 1 throws an exception.' ); + # TEST + ok( !defined $schema, 'Schema from buffer with external import and no_network => 1 is not loaded.' ); +} diff --git a/t/27new_callbacks_simple.t b/t/27new_callbacks_simple.t new file mode 100644 index 0000000..4ec1c32 --- /dev/null +++ b/t/27new_callbacks_simple.t @@ -0,0 +1,225 @@ + +use strict; +use warnings; + +use lib './t/lib'; + +use Counter; + +# $Id$ + +# Should be 14. +use Test::More tests => 14; + +use XML::LibXML; +use IO::File; + +# --------------------------------------------------------------------- # +# simple test +# --------------------------------------------------------------------- # +my $string = <test
+EOF + +my $icb = XML::LibXML::InputCallback->new(); +# TEST +ok($icb, ' TODO : Add test name'); + +my $match_file_counter = Counter->new( + { + gen_cb => sub { + my $inc_cb = shift; + + sub { + my $uri = shift; + if ( $uri =~ /^\/example\// ){ + $inc_cb->(); + return 1; + } + return 0; + } + } + } +); + +my $open_file_counter = Counter->new( + { + gen_cb => sub { + my $inc_cb = shift; + + sub { + my $uri = shift; + open my $file, '<', ".$uri" + or die "Cannot open '.$uri'"; + $inc_cb->(); + return $file; + } + } + } +); + +my $read_file_counter = Counter->new( + { + gen_cb => sub { + my $inc_cb = shift; + + sub { + my $h = shift; + my $buflen = shift; + my $rv = undef; + + $inc_cb->(); + my $n = $h->read( $rv , $buflen ); + + return $rv; + } + } + } +); + +my $close_file_counter = Counter->new( + { + gen_cb => sub { + my $inc_cb = shift; + + sub { + my $h = shift; + $inc_cb->(); + $h->close(); + return 1; + + }; + } + } +); + +my $match_hash_counter = Counter->new( + { + gen_cb => sub { + my $inc_cb = shift; + + sub { + my $uri = shift; + if ( $uri =~ /^\/example\// ){ + $inc_cb->(); + return 1; + } + return 0; + } + } + } +); + +my $open_hash_counter = Counter->new( + { + gen_cb => sub { + my $inc_cb = shift; + + sub { + my $uri = shift; + my $hash = { line => 0, + lines => [ "", "bar", "", "..", "" ], + }; + $inc_cb->(); + + return $hash; + } + } + } +); + +my $close_hash_counter = Counter->new( + { + gen_cb => sub { + my $inc_cb = shift; + + sub { + my $h = shift; + undef $h; + $inc_cb->(); + + return; + } + } + } +); + +my $read_hash_counter = Counter->new( + { + gen_cb => sub { + my $inc_cb = shift; + + sub { + my $h = shift; + my $buflen = shift; + + my $id = $h->{line}; + $h->{line} += 1; + my $rv= $h->{lines}->[$id]; + + $rv = "" unless defined $rv; + + $inc_cb->(); + + return $rv; + } + } + } +); + +$icb->register_callbacks( [ $match_file_counter->cb(), $open_file_counter->cb(), + $read_file_counter->cb(), $close_file_counter->cb() ] ); + +my $parser = XML::LibXML->new(); +$parser->expand_xinclude(1); +$parser->input_callbacks($icb); +my $doc = $parser->parse_string($string); + +# TEST +$match_file_counter->test(1, 'match_file matched once.'); + +# TEST +$open_file_counter->test(1, 'open_file called once.'); + +# TEST +$read_file_counter->test(2, 'read_file called twice.'); + +# TEST +$close_file_counter->test(1, 'close_file called once.'); + +# TEST +ok($doc, ' TODO : Add test name'); +# TEST + +is($doc->string_value(),"test..", ' TODO : Add test name'); + +my $icb2 = XML::LibXML::InputCallback->new(); + +# TEST +ok($icb2, ' TODO : Add test name'); + +$icb2->register_callbacks( [ $match_hash_counter->cb(), $open_hash_counter->cb(), + $read_hash_counter->cb(), $close_hash_counter->cb() ] ); + +$parser->input_callbacks($icb2); +$doc = $parser->parse_string($string); + +# TEST +$match_hash_counter->test(1, 'match_hash matched once.'); + +# TEST +$open_hash_counter->test(1, 'open_hash called once.'); + +# TEST +$read_hash_counter->test(6, 'read_hash called six times.'); + +# TEST +$close_hash_counter->test(1, 'close_hash called once.'); + +# TEST +ok($doc, ' TODO : Add test name'); + +# TEST + +is($doc->string_value(),"testbar..", ' TODO : Add test name'); + diff --git a/t/28new_callbacks_multiple.t b/t/28new_callbacks_multiple.t new file mode 100644 index 0000000..22e86a0 --- /dev/null +++ b/t/28new_callbacks_multiple.t @@ -0,0 +1,597 @@ +# $Id$ + +use strict; +use warnings; + +use lib './t/lib'; + +use Counter; +use Stacker; + +# Should be 56 +use Test::More tests => 56; + +use XML::LibXML; +use IO::File; + +my $read_hash_counter = Counter->new( + { + gen_cb => sub { + my $inc_cb = shift; + return sub { + my $h = shift; + my $buflen = shift; + + my $id = $h->{line}; + $h->{line} += 1; + my $rv= $h->{lines}->[$id]; + + $rv = "" unless defined $rv; + + $inc_cb->(); + return $rv; + + }; + } + } +); +my $read_file_counter = Counter->new( + { + gen_cb => sub { + my $inc_cb = shift; + return sub { + my $h = shift; + my $buflen = shift; + my $rv = undef; + + $inc_cb->(); + + my $n = $h->read( $rv , $buflen ); + + return $rv; + }; + } + } +); + +my $close_file_counter = Counter->new( + { + gen_cb => sub { + my $inc_cb = shift; + return sub { + my $h = shift; + + $inc_cb->(); + $h->close(); + + return 1; + }; + } + } +); + +my $close_xml_counter = Counter->new( + { + gen_cb => sub { + my $inc_cb = shift; + return sub { + my $dom = shift; + undef $dom; + + $inc_cb->(); + + return 1; + }; + } + } +); + +my $open_xml_counter = Counter->new( + { + gen_cb => sub { + my $inc_cb = shift; + + return sub { + my $uri = shift; + my $dom = XML::LibXML->new->parse_string(q{barbar}); + + if ($dom) + { + $inc_cb->(); + } + + return $dom; + }; + }, + } +); + +my $close_hash_counter = Counter->new( + { + gen_cb => sub { + my $inc_cb = shift; + return sub { + my $h = shift; + undef $h; + + $inc_cb->(); + + return 1; + }; + } + } +); + +my $open_hash_counter = Counter->new( + { + gen_cb => sub { + my $inc_cb = shift; + return sub { + my $uri = shift; + my $hash = { line => 0, + lines => [ "", "bar", "", "..", "" ], + }; + + $inc_cb->(); + + return $hash; + }; + } + } +); + +my $open_file_stacker = Stacker->new( + { + gen_cb => sub { + my $push_cb = shift; + return sub { + my $uri = shift; + + if (! open (my $file, '<', ".$uri")) + { + die "Could not open file '.$uri'!"; + } + else + { + + $push_cb->($uri); + + return $file; + } + }; + }, + } +); + +my $match_hash_stacker = Stacker->new( + { + gen_cb => sub { + my $push_cb = shift; + return sub { + my $uri = shift; + + if ( $uri =~ /^\/libxml\// ){ + $push_cb->({ verdict => 1, uri => $uri, }); + return 1; + } + else { + return; + } + }; + }, + } +); + +my $match_file_stacker = Stacker->new( + { + gen_cb => sub { + my $push_cb = shift; + return sub { + my $uri = shift; + + my $verdict = (( $uri =~ /^\/example\// ) ? 1 : 0); + if ($verdict) + { + $push_cb->({ verdict => $verdict, uri => $uri, }); + } + + return $verdict; + }; + }, + } +); + +my $match_hash2_stacker = Stacker->new( + { + gen_cb => sub { + my $push_cb = shift; + return sub { + my $uri = shift; + if ( $uri =~ /^\/example\// ){ + $push_cb->({ verdict => 1, uri => $uri, }); + return 1; + } + else { + return 0; + } + }; + }, + } +); + +my $match_xml_stacker = Stacker->new( + { + gen_cb => sub { + my $push_cb = shift; + return sub { + my $uri = shift; + if ( $uri =~ /^\/xmldom\// ){ + $push_cb->({ verdict => 1, uri => $uri, }); + return 1; + } + else { + return 0; + } + }; + }, + } +); + +my $read_xml_stacker = Stacker->new( + { + gen_cb => sub { + my $push_cb = shift; + return sub { + my $dom = shift; + my $buflen = shift; + + my $tmp = $dom->documentElement->findnodes('tmp')->shift; + my $rv = $tmp ? $dom->toString : ""; + $tmp->unbindNode if($tmp); + + $push_cb->($rv); + + return $rv; + }; + }, + } +); + +# --------------------------------------------------------------------- # +# multiple tests +# --------------------------------------------------------------------- # +{ + my $string = < +test + + + +
+EOF + + my $icb = XML::LibXML::InputCallback->new(); + # TEST + ok($icb, 'XML::LibXML::InputCallback was initialized'); + + $icb->register_callbacks( [ $match_file_stacker->cb, $open_file_stacker->cb(), + $read_file_counter->cb(), $close_file_counter->cb(), ] ); + + $icb->register_callbacks( [ $match_hash_stacker->cb, $open_hash_counter->cb, + $read_hash_counter->cb(), $close_hash_counter->cb ] ); + + $icb->register_callbacks( [ $match_xml_stacker->cb, $open_xml_counter->cb, + $read_xml_stacker->cb, $close_xml_counter->cb] ); + + + my $parser = XML::LibXML->new(); + $parser->expand_xinclude(1); + $parser->input_callbacks($icb); + my $doc = $parser->parse_string($string); # read_hash - 1,1,1,1,1 + + # TEST:$c=0; + my $test_counters = sub { + # TEST:$c++; + $read_hash_counter->test(6, "read_hash() count for multiple tests"); + + # TEST:$c++; + $read_file_counter->test(2, 'read_file() called twice.'); + + # TEST:$c++; + $close_file_counter->test(1, 'close_file() called once.'); + + # TEST:$c++; + $open_file_stacker->test( + [ + '/example/test2.xml', + ], + 'open_file() for URLs.', + ); + + # TEST:$c++; + $match_hash_stacker->test( + [ + { verdict => 1, uri => '/libxml/test2.xml',}, + ], + 'match_hash() for URLs.', + ); + + # TEST:$c++; + $read_xml_stacker->test( + [ + qq{\nbarbar\n}, + '', + ], + 'read_xml() for multiple callbacks', + ); + # TEST:$c++; + $match_xml_stacker->test( + [ + { verdict => 1, uri => '/xmldom/test2.xml', }, + ], + 'match_xml() one.', + ); + + # TEST:$c++; + $match_file_stacker->test( + [ + { verdict => 1, uri => '/example/test2.xml',}, + ], + 'match_file() for multiple_tests', + ); + + # TEST:$c++; + $open_hash_counter->test(1, 'open_hash() : called 1 times'); + # TEST:$c++; + $open_xml_counter->test(1, 'open_xml() : parse_string() successful.',); + # TEST:$c++; + $close_xml_counter->test(1, "close_xml() called once."); + # TEST:$c++; + $close_hash_counter->test(1, "close_hash() called once."); + }; + + # TEST:$test_counters=$c; + + # TEST*$test_counters + $test_counters->(); + + # This is a regression test for: + # https://rt.cpan.org/Ticket/Display.html?id=51086 + my $doc2 = $parser->parse_string($string); + + # TEST*$test_counters + $test_counters->(); + + # TEST + ok ($doc, 'parse_string() returns a doc.'); + # TEST + is ($doc->string_value(), + "\ntest\n..\nbar..\nbarbar\n", + '->string_value()', + ); + + # TEST + ok ($doc2, 'second parse_string() returns a doc.'); + # TEST + is ($doc2->string_value(), + "\ntest\n..\nbar..\nbarbar\n", + q{Second parse_string()'s ->string_value()}, + ); +} + +{ + my $string = < +test + + +
+EOF + + my $icb = XML::LibXML::InputCallback->new(); + + $icb->register_callbacks( [ $match_file_stacker->cb, $open_file_stacker->cb(), + $read_file_counter->cb(), $close_file_counter->cb(), ] ); + + $icb->register_callbacks( [ $match_hash2_stacker->cb, $open_hash_counter->cb, + $read_hash_counter->cb(), $close_hash_counter->cb() ] ); + + + my $parser = XML::LibXML->new(); + $parser->expand_xinclude(1); + $parser->input_callbacks($icb); + my $doc = $parser->parse_string($string); + + # TEST + $read_hash_counter->test(12, "read_hash() count for multiple register_callbacks"); + + # TEST + $open_file_stacker->test( + [ + ], + 'open_file() for URLs.', + ); + + # TEST + $match_hash2_stacker->test( + [ + { verdict => 1, uri => '/example/test2.xml',}, + { verdict => 1, uri => '/example/test3.xml',}, + ], + 'match_hash2() input callbacks' , + ); + + # TEST + $match_file_stacker->test( + [ + ], + 'match_file() input callbacks' , + ); + + # TEST + is ($doc->string_value(), "\ntest\nbar..\nbar..\n", + 'string_value returns fine',); + + # TEST + $open_hash_counter->test(2, 'open_hash() : called 2 times'); + # TEST + $close_hash_counter->test( + 2, "close_hash() called twice on two xincludes." + ); + + $icb->unregister_callbacks( [ $match_hash2_stacker->cb, \&open_hash, + $read_hash_counter->cb(), $close_hash_counter->cb] ); + $doc = $parser->parse_string($string); + + # TEST + $read_file_counter->test(4, 'read_file() called 4 times.'); + + # TEST + $close_file_counter->test(2, 'close_file() called twice.'); + + # TEST + $open_file_stacker->test( + [ + '/example/test2.xml', + '/example/test3.xml', + ], + 'open_file() for URLs.', + ); + + # TEST + $match_hash2_stacker->test( + [ + ], + 'match_hash2() does not match after being unregistered.' , + ); + + # TEST + $match_file_stacker->test( + [ + { verdict => 1, uri => '/example/test2.xml',}, + { verdict => 1, uri => '/example/test3.xml',}, + ], + 'match_file() input callbacks' , + ); + + + # TEST + is($doc->string_value(), + "\ntest\n..\n\n \n \n", + 'string_value() after unregister callbacks', + ); +} + +{ + my $string = < +test + + + +EOF + my $string2 = < +foo..bar + +EOF + + + my $icb = XML::LibXML::InputCallback->new(); + # TEST + ok ($icb, 'XML::LibXML::InputCallback was initialized (No. 2)'); + + my $open_xml2 = sub { + my $uri = shift; + my $parser = XML::LibXML->new; + $parser->expand_xinclude(1); + $parser->input_callbacks($icb); + + my $dom = $parser->parse_string($string2); + # TEST + ok ($dom, 'parse_string() inside open_xml2'); + + return $dom; + }; + + $icb->register_callbacks( [ $match_xml_stacker->cb, $open_xml2, + $read_xml_stacker->cb, $close_xml_counter->cb ] ); + + $icb->register_callbacks( [ $match_hash2_stacker->cb, $open_hash_counter->cb, + $read_hash_counter->cb(), $close_hash_counter->cb ] ); + + my $parser = XML::LibXML->new(); + $parser->expand_xinclude(1); + + $parser->match_callback( $match_file_stacker->cb ); + $parser->open_callback( $open_file_stacker->cb() ); + $parser->read_callback( $read_file_counter->cb() ); + $parser->close_callback( $close_file_counter->cb() ); + + $parser->input_callbacks($icb); + + my $doc = $parser->parse_string($string); + + # TEST + $read_hash_counter->test(6, "read_hash() count for stuff."); + + # TEST + $read_file_counter->test(2, 'read_file() called twice.'); + + # TEST + $close_file_counter->test(1, 'close_file() called once.'); + + # TEST + $open_file_stacker->test( + [ + '/example/test2.xml', + ], + 'open_file() for URLs.', + ); + + # TEST + $match_hash2_stacker->test( + [ + { verdict => 1, uri => '/example/test2.xml',}, + ], + 'match_hash2() input callbacks' , + ); + + # TEST + $read_xml_stacker->test( + [ + qq{\n\nfoo..bar..bar\n\n}, + '', + ], + 'read_xml() No. 2', + ); + # TEST + $match_xml_stacker->test( + [ + { verdict => 1, uri => '/xmldom/test2.xml', }, + ], + 'match_xml() No. 2.', + ); + + # TEST + $match_file_stacker->test( + [ + { verdict => 1, uri => '/example/test2.xml',}, + ], + 'match_file() for inner callback.', + ); + + # TEST + $open_hash_counter->test(1, 'open_hash() : called 1 times'); + + # TEST + $close_xml_counter->test(1, "close_xml() called once."); + + # TEST + $close_hash_counter->test(1, "close_hash() called once."); + + # TEST + is ($doc->string_value(), "\ntest\n..\n\nfoo..bar..bar\n\n", + 'string_value()',); +} + diff --git a/t/29id.t b/t/29id.t new file mode 100644 index 0000000..7acd5ab --- /dev/null +++ b/t/29id.t @@ -0,0 +1,171 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +use Test::More; +use XML::LibXML; + +{ + if (XML::LibXML::LIBXML_VERSION() >= 20623) { + plan tests => 42; + } + else { + plan skip_all => 'Skipping ID tests on libxml2 <= 2.6.23'; + } +} + +my $parser = XML::LibXML->new; + +my $xml1 = <<'EOF'; + + +]> + +EOF + +my $xml2 = <<'EOF'; + +EOF + +sub _debug { + my ($msg,$n)=@_; + print "$msg\t$$n\n'",(ref $n ? $n->toString : "NULL"),"'\n"; +} + +# TEST:$do_validate=2; +for my $do_validate (0..1) { + my ($n,$doc,$root,$at); + # TEST*$do_validate + ok( $doc = $parser->parse_string($xml1), ' TODO : Add test name' ); + $root = $doc->getDocumentElement; + $n = $doc->getElementById('foo'); + # TEST*$do_validate + ok( $root->isSameNode( $n ), ' TODO : Add test name' ); + + # old name + $n = $doc->getElementsById('foo'); + # TEST*$do_validate + ok( $root->isSameNode( $n ), ' TODO : Add test name' ); + + $at = $n->getAttributeNode('id'); + # TEST*$do_validate + ok( $at, ' TODO : Add test name' ); + # TEST*$do_validate + ok( $at->isId, ' TODO : Add test name' ); + + $at = $root->getAttributeNode('notid'); + # TEST*$do_validate + ok( $at->isId == 0, ' TODO : Add test name' ); + + # _debug("1: foo: ",$n); + $doc->getDocumentElement->setAttribute('id','bar'); + # TEST + ok( $doc->validate, ' TODO : Add test name' ) if $do_validate; + $n = $doc->getElementById('bar'); + # TEST*$do_validate + ok( $root->isSameNode( $n ), ' TODO : Add test name' ); + + # _debug("1: bar: ",$n); + $n = $doc->getElementById('foo'); + # TEST*$do_validate + ok( !defined($n), ' TODO : Add test name' ); + # _debug("1: !foo: ",$n); + + my $test = $doc->createElement('root'); + $root->appendChild($test); + $test->setAttribute('id','new'); + # TEST + ok( $doc->validate, ' TODO : Add test name' ) if $do_validate; + $n = $doc->getElementById('new'); + # TEST*$do_validate + ok( $test->isSameNode( $n ), ' TODO : Add test name' ); + + $at = $n->getAttributeNode('id'); + # TEST*$do_validate + ok( $at, ' TODO : Add test name' ); + # TEST*$do_validate + ok( $at->isId, ' TODO : Add test name' ); + # _debug("1: new: ",$n); +} + +{ + my ($n,$doc,$root,$at); + # TEST + ok( $doc = $parser->parse_string($xml2), ' TODO : Add test name' ); + $root = $doc->getDocumentElement; + + $n = $doc->getElementById('foo'); + # TEST + ok( $root->isSameNode( $n ), ' TODO : Add test name' ); + # _debug("1: foo: ",$n); + + $doc->getDocumentElement->setAttribute('xml:id','bar'); + $n = $doc->getElementById('foo'); + # TEST + ok( !defined($n), ' TODO : Add test name' ); + # _debug("1: !foo: ",$n); + + $n = $doc->getElementById('bar'); + # TEST + ok( $root->isSameNode( $n ), ' TODO : Add test name' ); + + $at = $n->getAttributeNode('xml:id'); + # TEST + ok( $at, ' TODO : Add test name' ); + # TEST + ok( $at->isId, ' TODO : Add test name' ); + + $n->setAttribute('id','FOO'); + # TEST + ok( $at->isSameNode($n->getAttributeNode('xml:id')), ' TODO : Add test name' ); + + $at = $n->getAttributeNode('id'); + # TEST + ok( $at, ' TODO : Add test name' ); + # TEST + ok( ! $at->isId, ' TODO : Add test name' ); + + $at = $n->getAttributeNodeNS('http://www.w3.org/XML/1998/namespace','id'); + # TEST + ok( $at, ' TODO : Add test name' ); + # TEST + ok( $at->isId, ' TODO : Add test name' ); + # _debug("1: bar: ",$n); + + $doc->getDocumentElement->setAttributeNS('http://www.w3.org/XML/1998/namespace','id','baz'); + $n = $doc->getElementById('bar'); + # TEST + ok( !defined($n), ' TODO : Add test name' ); + # _debug("1: !bar: ",$n); + + $n = $doc->getElementById('baz'); + # TEST + ok( $root->isSameNode( $n ), ' TODO : Add test name' ); + # _debug("1: baz: ",$n); + $at = $n->getAttributeNodeNS('http://www.w3.org/XML/1998/namespace','id'); + # TEST + ok( $at, ' TODO : Add test name' ); + # TEST + ok( $at->isId, ' TODO : Add test name' ); + + $doc->getDocumentElement->setAttributeNS('http://www.w3.org/XML/1998/namespace','xml:id','bag'); + $n = $doc->getElementById('baz'); + # TEST + ok( !defined($n), ' TODO : Add test name' ); + # _debug("1: !baz: ",$n); + + $n = $doc->getElementById('bag'); + # TEST + ok( $root->isSameNode( $n ), ' TODO : Add test name' ); + # _debug("1: bag: ",$n); + + $n->removeAttribute('id'); + # TEST + is( $root->toString, '', ' TODO : Add test name' ); +} + +1; diff --git a/t/30keep_blanks.t b/t/30keep_blanks.t new file mode 100644 index 0000000..d2f446e --- /dev/null +++ b/t/30keep_blanks.t @@ -0,0 +1,34 @@ +#!/usr/bin/perl + +# This is a regression test for this bug: +# +# https://rt.cpan.org/Ticket/Display.html?id=76696 +# +# <<< +# Specifying ->keep_blanks(0) has no effect on parse_balanced_chunk anymore. +# The script below used to pass with XML::LibXML 1.69, but is broken since +# 1.70 and also with the newest 1.96. +# >>> +# +# Thanks to SREZIC for the report, the test and a patch. + +use strict; +use warnings; + +use Test::More tests => 1; + +use XML::LibXML; + +my $xml = <<'EOF'; + +EOF + +my $p = XML::LibXML->new; +$p->keep_blanks(0); + +# TEST +is ( + scalar( $p->parse_balanced_chunk($xml)->serialize() ), + "\n", + 'keep_blanks(0) removes the blanks after a roundtrip.', +); diff --git a/t/30xpathcontext.t b/t/30xpathcontext.t new file mode 100644 index 0000000..35ebef0 --- /dev/null +++ b/t/30xpathcontext.t @@ -0,0 +1,363 @@ + +use strict; +use warnings; + +use Test::More tests => 82; + +use XML::LibXML; +use XML::LibXML::XPathContext; + +my $doc = XML::LibXML->new->parse_string(<<'XML'); + +XML + +# test findnodes() in list context +my $xpath = '/*'; +# TEST:$exp=2; +for my $exp ($xpath, XML::LibXML::XPathExpression->new($xpath)) { + my @nodes = XML::LibXML::XPathContext->new($doc)->findnodes($exp); + # TEST*$exp + ok(@nodes == 1, ' TODO : Add test name'); + # TEST*$exp + ok($nodes[0]->nodeName eq 'foo', ' TODO : Add test name'); + # TEST*$exp + is( + (XML::LibXML::XPathContext->new($nodes[0])->findnodes('bar'))[0]->nodeName(), + 'bar', + ' TODO : Add test list', + ); +} + + +# test findnodes() in scalar context +# TEST:$exp=2; +for my $exp ($xpath, XML::LibXML::XPathExpression->new($xpath)) { + my $nl = XML::LibXML::XPathContext->new($doc)->findnodes($exp); + # TEST*$exp + ok($nl->pop->nodeName eq 'foo', ' TODO : Add test name'); + # TEST*$exp + ok(!defined($nl->pop), ' TODO : Add test name'); +} + +# test findvalue() +# TEST +ok(XML::LibXML::XPathContext->new($doc)->findvalue('1+1') == 2, ' TODO : Add test name'); +# TEST + +ok(XML::LibXML::XPathContext->new($doc)->findvalue(XML::LibXML::XPathExpression->new('1+1')) == 2, ' TODO : Add test name'); +# TEST + +ok(XML::LibXML::XPathContext->new($doc)->findvalue('1=2') eq 'false', ' TODO : Add test name'); +# TEST + +ok(XML::LibXML::XPathContext->new($doc)->findvalue(XML::LibXML::XPathExpression->new('1=2')) eq 'false', ' TODO : Add test name'); + +# test find() +# TEST +ok(XML::LibXML::XPathContext->new($doc)->find('/foo/bar')->pop->nodeName eq 'bar', ' TODO : Add test name'); +# TEST + +ok(XML::LibXML::XPathContext->new($doc)->find(XML::LibXML::XPathExpression->new('/foo/bar'))->pop->nodeName eq 'bar', ' TODO : Add test name'); + +# TEST + +ok(XML::LibXML::XPathContext->new($doc)->find('1*3')->value == '3', ' TODO : Add test name'); +# TEST + +ok(XML::LibXML::XPathContext->new($doc)->find('1=1')->to_literal eq 'true', ' TODO : Add test name'); + +my $doc1 = XML::LibXML->new->parse_string(<<'XML'); + +XML + +# test registerNs() +my $compiled = XML::LibXML::XPathExpression->new('/xxx:foo'); +my $xc = XML::LibXML::XPathContext->new($doc1); +$xc->registerNs('xxx', 'http://example.com/foobar'); +# TEST + +ok($xc->findnodes('/xxx:foo')->pop->nodeName eq 'foo', ' TODO : Add test name'); +# TEST + +ok($xc->findnodes($compiled)->pop->nodeName eq 'foo', ' TODO : Add test name'); +# TEST + +ok($xc->lookupNs('xxx') eq 'http://example.com/foobar', ' TODO : Add test name'); +# TEST + +ok($xc->exists('//xxx:bar/@a'), ' TODO : Add test name'); +# TEST + +is($xc->exists('//xxx:bar/@b'),0, ' TODO : Add test name'); +# TEST + +ok($xc->exists('xxx:bar',$doc1->getDocumentElement), ' TODO : Add test name'); + +# test unregisterNs() +$xc->unregisterNs('xxx'); +eval { $xc->findnodes('/xxx:foo') }; +# TEST + +ok($@, ' TODO : Add test name'); +# TEST + +ok(!defined($xc->lookupNs('xxx')), ' TODO : Add test name'); + +eval { $xc->findnodes($compiled) }; +# TEST + +ok($@, ' TODO : Add test name'); +# TEST + +ok(!defined($xc->lookupNs('xxx')), ' TODO : Add test name'); + +# test getContextNode and setContextNode +# TEST +ok($xc->getContextNode->isSameNode($doc1), ' TODO : Add test name'); +$xc->setContextNode($doc1->getDocumentElement); +# TEST + +ok($xc->getContextNode->isSameNode($doc1->getDocumentElement), ' TODO : Add test name'); +# TEST + +ok($xc->findnodes('.')->pop->isSameNode($doc1->getDocumentElement), ' TODO : Add test name'); + +# test xpath context preserves the document +my $xc2 = XML::LibXML::XPathContext->new( + XML::LibXML->new->parse_string(<<'XML')); + +XML +# TEST + +ok($xc2->findnodes('*')->pop->nodeName eq 'foo', ' TODO : Add test name'); + +# test xpath context preserves context node +my $doc2 = XML::LibXML->new->parse_string(<<'XML'); + +XML +my $xc3 = XML::LibXML::XPathContext->new($doc2->getDocumentElement); +$xc3->find('/'); +# TEST + +ok($xc3->getContextNode->toString() eq '', ' TODO : Add test name'); + +# check starting with empty context +my $xc4 = XML::LibXML::XPathContext->new(); +# TEST + +ok(!defined($xc4->getContextNode), ' TODO : Add test name'); +eval { $xc4->find('/') }; +# TEST + +ok($@, ' TODO : Add test name'); +my $cn=$doc2->getDocumentElement; +$xc4->setContextNode($cn); +# TEST + +ok($xc4->find('/'), ' TODO : Add test name'); +# TEST + +ok($xc4->getContextNode->isSameNode($doc2->getDocumentElement), ' TODO : Add test name'); +$cn=undef; +# TEST + +ok($xc4->getContextNode, ' TODO : Add test name'); +# TEST + +ok($xc4->getContextNode->isSameNode($doc2->getDocumentElement), ' TODO : Add test name'); + +# check temporarily changed context node +my ($bar)=$xc4->findnodes('foo/bar',$doc2); +# TEST + +ok($bar->nodeName eq 'bar', ' TODO : Add test name'); +# TEST + +ok($xc4->getContextNode->isSameNode($doc2->getDocumentElement), ' TODO : Add test name'); + +# TEST + +ok($xc4->findnodes('parent::*',$bar)->pop->nodeName eq 'foo', ' TODO : Add test name'); +# TEST + +ok($xc4->getContextNode->isSameNode($doc2->getDocumentElement), ' TODO : Add test name'); + +# testcase for segfault found by Steve Hay +my $xc5 = XML::LibXML::XPathContext->new(); +$xc5->registerNs('pfx', 'http://www.foo.com'); +$doc = XML::LibXML->new->parse_string(''); +$xc5->setContextNode($doc); +$xc5->findnodes('/'); +$xc5->setContextNode(undef); +$xc5->getContextNode(); +$xc5->setContextNode($doc); +$xc5->findnodes('/'); +# TEST + +ok(1, ' TODO : Add test name'); + +# check setting context position and size +# TEST +ok($xc4->getContextPosition() == -1, ' TODO : Add test name'); +# TEST + +ok($xc4->getContextSize() == -1, ' TODO : Add test name'); +eval { $xc4->setContextPosition(4); }; +# TEST + +ok($@, ' TODO : Add test name'); +eval { $xc4->setContextPosition(-4); }; +# TEST + +ok($@, ' TODO : Add test name'); +eval { $xc4->setContextSize(-4); }; +# TEST + +ok($@, ' TODO : Add test name'); +eval { $xc4->findvalue('position()') }; +# TEST + +ok($@, ' TODO : Add test name'); +eval { $xc4->findvalue('last()') }; +# TEST + +ok($@, ' TODO : Add test name'); + +$xc4->setContextSize(0); +# TEST + +ok($xc4->getContextSize() == 0, ' TODO : Add test name'); +# TEST + +ok($xc4->getContextPosition() == 0, ' TODO : Add test name'); +# TEST + +ok($xc4->findvalue('position()')==0, ' TODO : Add test name'); +# TEST + +ok($xc4->findvalue('last()')==0, ' TODO : Add test name'); + +$xc4->setContextSize(4); +# TEST + +ok($xc4->getContextSize() == 4, ' TODO : Add test name'); +# TEST + +ok($xc4->getContextPosition() == 1, ' TODO : Add test name'); +# TEST + +ok($xc4->findvalue('last()')==4, ' TODO : Add test name'); +# TEST + +ok($xc4->findvalue('position()')==1, ' TODO : Add test name'); +eval { $xc4->setContextPosition(5); }; +# TEST + +ok($@, ' TODO : Add test name'); +# TEST + +ok($xc4->findvalue('position()')==1, ' TODO : Add test name'); +# TEST + +ok($xc4->getContextSize() == 4, ' TODO : Add test name'); +$xc4->setContextPosition(4); +# TEST + +ok($xc4->findvalue('position()')==4, ' TODO : Add test name'); +# TEST + +ok($xc4->findvalue('position()=last()'), ' TODO : Add test name'); + +$xc4->setContextSize(-1); +# TEST + +ok($xc4->getContextPosition() == -1, ' TODO : Add test name'); +# TEST + +ok($xc4->getContextSize() == -1, ' TODO : Add test name'); +eval { $xc4->findvalue('position()') }; +# TEST + +ok($@, ' TODO : Add test name'); +eval { $xc4->findvalue('last()') }; +# TEST + +ok($@, ' TODO : Add test name'); + +{ + my $d = XML::LibXML->new()->parse_string(q~~); + { + my $x = XML::LibXML::XPathContext->new; + + # use the document's declaration + # TEST + ok( $x->findvalue('count(/x:a/y:a)',$d->documentElement)==1, ' TODO : Add test name' ); + + $x->registerNs('x', 'http://x1.com'); + # x now maps to http://x1.com, so it won't match the top-level element + # TEST + ok( $x->findvalue('count(/x:a)',$d->documentElement)==0, ' TODO : Add test name' ); + + $x->registerNs('x1', 'http://x.com'); + # x1 now maps to http://x.com + # x1:a will match the first element + # TEST + ok( $x->findvalue('count(/x1:a)',$d->documentElement)==1, ' TODO : Add test name' ); + # but not the second + # TEST + ok( $x->findvalue('count(/x1:a/x1:a)',$d->documentElement)==0, ' TODO : Add test name' ); + # this will work, though + # TEST + ok( $x->findvalue('count(/x1:a/x:a)',$d->documentElement)==1, ' TODO : Add test name' ); + # the same using y for http://x1.com + # TEST + ok( $x->findvalue('count(/x1:a/y:a)',$d->documentElement)==1, ' TODO : Add test name' ); + $x->registerNs('y', 'http://x.com'); + # y prefix remapped + # TEST + ok( $x->findvalue('count(/x1:a/y:a)',$d->documentElement)==0, ' TODO : Add test name' ); + # TEST + ok( $x->findvalue('count(/y:a/x:a)',$d->documentElement)==1, ' TODO : Add test name' ); + $x->registerNs('y', 'http://x1.com'); + # y prefix remapped back + # TEST + ok( $x->findvalue('count(/x1:a/y:a)',$d->documentElement)==1, ' TODO : Add test name' ); + $x->unregisterNs('x'); + # TEST + ok( $x->findvalue('count(/x:a)',$d->documentElement)==1, ' TODO : Add test name' ); + $x->unregisterNs('y'); + # TEST + ok( $x->findvalue('count(/x:a/y:a)',$d->documentElement)==1, ' TODO : Add test name' ); + } +} + +SKIP: +{ + # 37332 + if (XML::LibXML::LIBXML_VERSION() < 20617) { + skip( + 'xpath does not work on nodes without a document in libxml2 < 2.6.17', + 3 + ); + } + my $frag = XML::LibXML::DocumentFragment->new; + my $foo = XML::LibXML::Element->new('foo'); + my $xpc = XML::LibXML::XPathContext->new; + $frag->appendChild($foo); + $foo->appendTextChild('bar', 'quux'); + { + my @n = $xpc->findnodes('./foo', $frag); + # TEST + ok ( @n == 1, ' TODO : Add test name' ); + } + { + my @n = $xpc->findnodes('./foo/bar', $frag); + # TEST + ok ( @n == 1, ' TODO : Add test name' ); + } + { + my @n = $xpc->findnodes('./bar', $foo); + # TEST + ok ( @n == 1, ' TODO : Add test name' ); + } +} diff --git a/t/31xpc_functions.t b/t/31xpc_functions.t new file mode 100644 index 0000000..07b5c48 --- /dev/null +++ b/t/31xpc_functions.t @@ -0,0 +1,180 @@ +# -*- cperl -*- + +use strict; +use warnings; + +use Test::More tests => 32; + +use XML::LibXML; +use XML::LibXML::XPathContext; + +my $doc = XML::LibXML->new->parse_string(<<'XML'); +Bla +XML +# TEST +ok($doc, ' TODO : Add test name'); + +my $xc = XML::LibXML::XPathContext->new($doc); +$xc->registerNs('foo','urn:foo'); + +$xc->registerFunctionNS('copy','urn:foo', + sub { @_==1 ? $_[0] : die "too many parameters"} + ); + +# copy string, real, integer, nodelist +# TEST +ok($xc->findvalue('foo:copy("bar")') eq 'bar', ' TODO : Add test name'); +# TEST + +ok($xc->findvalue('foo:copy(3.14)') < 3.141, ' TODO : Add test name'); # can't use == here because of +# TEST + +ok($xc->findvalue('foo:copy(3.14)') > 3.139, ' TODO : Add test name'); # float math +# TEST + +ok($xc->findvalue('foo:copy(7)') == 7, ' TODO : Add test name'); +# TEST + +ok($xc->find('foo:copy(//*)')->size() == 3, ' TODO : Add test name'); +my ($foo)=$xc->findnodes('(//*)[2]'); +# TEST + +ok($xc->findnodes('foo:copy(//*)[2]')->pop->isSameNode($foo), ' TODO : Add test name'); + +# too many arguments +eval { $xc->findvalue('foo:copy(1,xyz)') }; +# TEST + +ok ($@, ' TODO : Add test name'); + +# without a namespace +$xc->registerFunction('dummy', sub { 'DUMMY' }); +# TEST + +ok($xc->findvalue('dummy()') eq 'DUMMY', ' TODO : Add test name'); + +# unregister it +$xc->unregisterFunction('dummy'); +eval { $xc->findvalue('dummy()') }; +# TEST + +ok ($@, ' TODO : Add test name'); + +# retister by name +sub dummy2 { 'DUMMY2' }; +$xc->registerFunction('dummy2', 'dummy2'); +# TEST + +ok($xc->findvalue('dummy2()') eq 'DUMMY2', ' TODO : Add test name'); + +# unregister +$xc->unregisterFunction('dummy2'); +eval { $xc->findvalue('dummy2()') }; +# TEST + +ok ($@, ' TODO : Add test name'); + + +# a mix of different arguments types +$xc->registerFunction('join', + sub { join shift, + map { (ref($_)&&$_->isa('XML::LibXML::Node')) ? $_->nodeName : $_ } + map { (ref($_)&&$_->isa('XML::LibXML::NodeList')) ? @$_ : $_ } + @_ + }); + +# TEST + +ok($xc->findvalue('join("","a","b","c")') eq 'abc', ' TODO : Add test name'); +# TEST + +ok($xc->findvalue('join("-","a",/foo,//*)') eq 'a-foo-foo-bar-bar', ' TODO : Add test name'); +# TEST + +ok($xc->findvalue('join("-",foo:copy(//*))') eq 'foo-bar-bar', ' TODO : Add test name'); + +# unregister foo:copy +$xc->unregisterFunctionNS('copy','urn:foo'); +eval { $xc->findvalue('foo:copy("bar")') }; +# TEST + +ok ($@, ' TODO : Add test name'); + +# test context reentrance +$xc->registerFunction('test-lock1', sub { $xc->find('string(//node())') }); +$xc->registerFunction('test-lock2', sub { $xc->findnodes('//bar') }); +# TEST + +ok($xc->find('test-lock1()') eq $xc->find('string(//node())'), ' TODO : Add test name'); +# TEST + +ok($xc->find('count(//bar)=2'), ' TODO : Add test name'); +# TEST + +ok($xc->find('count(test-lock2())=count(//bar)'), ' TODO : Add test name'); +# TEST + +ok($xc->find('count(test-lock2()|//bar)=count(//bar)'), ' TODO : Add test name'); +# TEST + +ok($xc->findnodes('test-lock2()[2]')->pop()->isSameNode($xc->findnodes('//bar[2]')), ' TODO : Add test name'); + +$xc->registerFunction('test-lock3', sub { $xc->findnodes('test-lock2(//bar)') }); +# TEST + +ok($xc->find('count(test-lock2())=count(test-lock3())'), ' TODO : Add test name'); +# TEST + +ok($xc->find('count(test-lock3())=count(//bar)'), ' TODO : Add test name'); +# TEST + +ok($xc->find('count(test-lock3()|//bar)=count(//bar)'), ' TODO : Add test name'); + +# function creating new nodes +$xc->registerFunction('new-foo', + sub { + return $doc->createElement('foo'); + }); +# TEST + +ok($xc->findnodes('new-foo()')->pop()->nodeName eq 'foo', ' TODO : Add test name'); +my ($test_node) = $xc->findnodes('new-foo()'); + +$xc->registerFunction('new-chunk', + sub { + XML::LibXML->new->parse_string('')->find('//a') + }); +# TEST + +ok($xc->findnodes('new-chunk()')->size() == 3, ' TODO : Add test name'); +my ($x)=$xc->findnodes('new-chunk()/parent::*'); +# TEST + +ok($x->nodeName() eq 'y', ' TODO : Add test name'); +# TEST + +ok($xc->findvalue('name(new-chunk()/parent::*)') eq 'y', ' TODO : Add test name'); +# TEST + +ok($xc->findvalue('count(new-chunk()/parent::*)=2'), ' TODO : Add test name'); + +my $largedoc=XML::LibXML->new->parse_string(''.('' x 3000).''); +$xc->setContextNode($largedoc); +$xc->registerFunction('pass1', + sub { + [$largedoc->findnodes('(//*)')] + }); +$xc->registerFunction('pass2',sub { $_[0] } ); +$xc->registerVarLookupFunc( sub { [$largedoc->findnodes('(//*)')] }, undef); +$largedoc->toString(); + +# TEST + +ok($xc->find('$a[name()="b"]')->size()==3000, ' TODO : Add test name'); +my @pass1=$xc->findnodes('pass1()'); +# TEST + +ok(@pass1==3001, ' TODO : Add test name'); +# TEST + +ok($xc->find('pass2(//*)')->size()==3001, ' TODO : Add test name'); diff --git a/t/32xpc_variables.t b/t/32xpc_variables.t new file mode 100644 index 0000000..baf3299 --- /dev/null +++ b/t/32xpc_variables.t @@ -0,0 +1,125 @@ +# -*- cperl -*- + +use strict; +use warnings; + +use Test::More tests => 35; + +use XML::LibXML; +use XML::LibXML::XPathContext; + +my $doc = XML::LibXML->new->parse_string(<<'XML'); +Bla +XML + +my %variables = ( + 'a' => XML::LibXML::Number->new(2), + 'b' => "b", + ); + +sub get_variable { + my ($data, $name, $uri)=@_; + return exists($data->{$name}) ? $data->{$name} : undef; +} + +# $c: nodelist +$variables{c} = XML::LibXML::XPathContext->new($doc)->findnodes('//bar'); +# TEST +ok($variables{c}->isa('XML::LibXML::NodeList'), ' TODO : Add test name'); +# TEST +ok($variables{c}->size() == 2, ' TODO : Add test name'); +# TEST +ok($variables{c}->get_node(1)->nodeName eq 'bar', ' TODO : Add test name'); + +# $d: a single element node +$variables{d} = XML::LibXML::XPathContext->new($doc)->findnodes('/*')->pop; +# TEST +ok($variables{d}->nodeName() eq 'foo', ' TODO : Add test name'); + +# $e: a single text node +$variables{e} = XML::LibXML::XPathContext->new($doc)->findnodes('//text()'); +# TEST +ok($variables{e}->get_node(1)->data() eq 'Bla', ' TODO : Add test name'); + +# $f: a single attribute node +$variables{f} = XML::LibXML::XPathContext->new($doc)->findnodes('//@*')->pop; +# TEST +ok($variables{f}->nodeName() eq 'a', ' TODO : Add test name'); +# TEST +ok($variables{f}->value() eq 'b', ' TODO : Add test name'); + +# $f: a single document node +$variables{g} = XML::LibXML::XPathContext->new($doc)->findnodes('/')->pop; +# TEST +ok($variables{g}->nodeType() == XML::LibXML::XML_DOCUMENT_NODE, ' TODO : Add test name'); + +# test registerVarLookupFunc() and getVarLookupData() +my $xc = XML::LibXML::XPathContext->new($doc); +# TEST +ok(!defined($xc->getVarLookupData), ' TODO : Add test name'); +$xc->registerVarLookupFunc(\&get_variable,\%variables); +# TEST +ok(defined($xc->getVarLookupData), ' TODO : Add test name'); +my $h1=$xc->getVarLookupData; +my $h2=\%variables; +# TEST +ok("$h1" eq "$h2", ' TODO : Add test name' ); +# TEST +ok($h1 eq $xc->getVarLookupData, ' TODO : Add test name'); +# TEST +ok(\&get_variable eq $xc->getVarLookupFunc, ' TODO : Add test name'); + +# test values returned by XPath queries +# TEST +ok($xc->find('$a') == 2, ' TODO : Add test name'); +# TEST +ok($xc->find('$b') eq "b", ' TODO : Add test name'); +# TEST +ok($xc->findnodes('//@a[.=$b]')->size() == 1, ' TODO : Add test name'); +# TEST +ok($xc->findnodes('//@a[.=$b]')->size() == 1, ' TODO : Add test name'); +# TEST +ok($xc->findnodes('$c')->size() == 2, ' TODO : Add test name'); +# TEST +ok($xc->findnodes('$c')->size() == 2, ' TODO : Add test name'); +# TEST +ok($xc->findnodes('$c[1]')->pop->isSameNode($variables{c}->get_node(1)), ' TODO : Add test name'); +# TEST +ok($xc->findnodes('$c[@a="b"]')->size() == 1, ' TODO : Add test name'); +# TEST +ok($xc->findnodes('$d')->size() == 1, ' TODO : Add test name'); +# TEST +ok($xc->findnodes('$d/*')->size() == 2, ' TODO : Add test name'); +# TEST +ok($xc->findnodes('$d')->pop->isSameNode($variables{d}), ' TODO : Add test name'); +# TEST +ok($xc->findvalue('$e') eq 'Bla', ' TODO : Add test name'); +# TEST +ok($xc->findnodes('$e')->pop->isSameNode($variables{e}->get_node(1)), ' TODO : Add test name'); +# TEST +ok($xc->findnodes('$c[@*=$f]')->size() == 1, ' TODO : Add test name'); +# TEST +ok($xc->findvalue('$f') eq 'b', ' TODO : Add test name'); +# TEST +ok($xc->findnodes('$f')->pop->nodeName eq 'a', ' TODO : Add test name'); +# TEST +ok($xc->findnodes('$f')->pop->isSameNode($variables{f}), ' TODO : Add test name'); +# TEST +ok($xc->findnodes('$g')->pop->isSameNode($variables{g}), ' TODO : Add test name'); + +# unregiser variable lookup +$xc->unregisterVarLookupFunc(); +eval { $xc->find('$a') }; +# TEST +ok($@, ' TODO : Add test name'); +# TEST +ok(!defined($xc->getVarLookupFunc()), ' TODO : Add test name'); + +my $foo='foo'; +$xc->registerVarLookupFunc(sub {},$foo); +# TEST +ok($xc->getVarLookupData eq 'foo', ' TODO : Add test name'); +$foo=undef; +# TEST +ok($xc->getVarLookupData eq 'foo', ' TODO : Add test name'); + diff --git a/t/35huge_mode.t b/t/35huge_mode.t new file mode 100644 index 0000000..1ccb334 --- /dev/null +++ b/t/35huge_mode.t @@ -0,0 +1,75 @@ +#!/usr/bin/perl +# +# Having 'XML_PARSE_HUGE' enabled can make an application vulnerable to +# denial of service through entity expansion attacks. This test script +# confirms that huge document mode is disabled by default and that this +# does not adversely affect expansion of sensible entity definitions. +# + +use strict; +use warnings; + +use Test::More; + +use XML::LibXML; + +if (XML::LibXML::LIBXML_VERSION() < 20700) { + plan skip_all => "XML_PARSE_HUGE option not supported for libxml2 < 2.7.0"; +} +else { + plan tests => 5; +} + +my $benign_xml = <<'EOF'; + + +]> +&lol; +EOF + +my $evil_xml = <<'EOF'; + + + + + + + + + + + +]> +&lol9; +EOF + +my($parser, $doc); + +$parser = XML::LibXML->new; +#$parser->set_option(huge => 0); +# TEST +ok(!$parser->get_option('huge'), "huge mode disabled by default"); + +$doc = eval { $parser->parse_string($evil_xml); }; + +# TEST +isnt("$@", "", "exception thrown during parse"); +# TEST +like($@, qr/entity.*loop/si, "exception refers to entity reference loop"); + + +$parser = XML::LibXML->new; + +$doc = eval { $parser->parse_string($benign_xml); }; + +# TEST +is("$@", "", "no exception thrown during parse"); + +my $body = $doc->findvalue( '/lolz' ); +# TEST +is($body, 'haha', 'entity was parsed and expanded correctly'); + +exit; + diff --git a/t/40reader.t b/t/40reader.t new file mode 100644 index 0000000..f08c2ab --- /dev/null +++ b/t/40reader.t @@ -0,0 +1,319 @@ +#!/usr/bin/perl -w +use strict; +use warnings; +use Test::More; + +use XML::LibXML; + +BEGIN{ + if (1000*$] < 5008) { + plan skip_all => "Reader interface only supported in Perl >= 5.8"; + exit; + } elsif (!XML::LibXML::HAVE_READER()) { + plan skip_all => "Reader not supported in this libxml2 build"; + exit; + } else { + plan tests => 100; + } + + use_ok('XML::LibXML::Reader'); +}; + +# TEST*100 +my $file = "test/textReader/countries.xml"; +{ + my $reader = XML::LibXML::Reader->new(location => $file, {expand_entities => 1}); + isa_ok($reader, "XML::LibXML::Reader"); + is($reader->read, 1, "read"); + is($reader->byteConsumed, 488, "byteConsumed"); + is($reader->attributeCount, 0, "attributeCount"); + is($reader->baseURI, $file, "baseURI"); + is($reader->encoding, 'UTF-8', "encoding"); + is($reader->localName, 'countries', "localName"); + is($reader->name, 'countries', "name"); + is($reader->prefix, undef, "prefix"); + is($reader->value, undef, "value"); + is($reader->xmlLang, undef, "xmlLang"); + is($reader->xmlVersion, '1.0', "xmlVersion"); + $reader->read; + $reader->read; + $reader->read; # skipping to country node + is($reader->name, 'country', "skipping to country"); + is($reader->depth, "1", "depth"); + is($reader->getAttribute("acronym"), "AL", "getAttribute"); + is($reader->getAttributeNo(0), "AL", "getAttributeNo"); + is($reader->getAttributeNs("acronym", undef), "AL", "getAttributeNs"); + is($reader->lineNumber, "20", "lineNumber"); + is($reader->columnNumber, "1", "columnNumber"); + ok($reader->hasAttributes, "hasAttributes"); + ok(! $reader->hasValue, "hasValue"); + ok(! $reader->isDefault, "isDefault"); + ok(! $reader->isEmptyElement, "isEmptyElement"); + ok(! $reader->isNamespaceDecl, "isNamespaceDecl"); + ok(! $reader->isValid, "isValid"); + is($reader->localName, "country", "localName"); + is($reader->lookupNamespace(undef), undef, "lookupNamespace"); + + ok($reader->moveToAttribute("acronym"), "moveToAttribute"); + ok($reader->moveToAttributeNo(0), "moveToAttributeNo"); + ok($reader->moveToAttributeNs("acronym", undef), "moveToAttributeNs"); + + ok($reader->moveToElement, "moveToElement"); + ok($reader->moveToFirstAttribute, "moveToFirstAttribute"); + ok($reader->moveToNextAttribute, "moveToNextAttribute"); + ok($reader->readAttributeValue, "attributeValue"); + + $reader->moveToElement; + is($reader->name, "country", "name"); + is($reader->namespaceURI, undef, "namespaceURI"); + + ok($reader->nextSibling, "nextSibling"); + is($reader->nodeType, XML_READER_TYPE_SIGNIFICANT_WHITESPACE, "nodeType"); + is($reader->prefix, undef, "prefix"); + + is($reader->readInnerXml, "", "readInnerXml"); + is($reader->readOuterXml, "\n", "readOuterXml"); + ok($reader->readState, "readState"); + + is($reader->getParserProp('expand_entities'), 1, "getParserProp"); + + ok($reader->standalone, "standalone"); + is($reader->value, "\n", "value"); + is($reader->xmlLang, undef, "xmlLang"); + + + ok($reader->close, "close"); +} + + +# FD interface +for my $how (qw(FD IO)) { +# my $fd; + open my $fd, '<', $file or die "cannot open $file: $!\n"; + my $reader = XML::LibXML::Reader->new($how => $fd, URI => $file); + isa_ok($reader, "XML::LibXML::Reader"); + $reader->read; + $reader->read; + is($reader->name, "countries","name in fd"); + $reader->read; + $reader->read; + $reader->read; + close $fd; +} + +# scalar interface +{ + open my $fd, '<', $file or die "cannot open $file: $!\n"; + my $doc; + { + local $/; + $doc = <$fd>; + } + close $fd; + my $reader = XML::LibXML::Reader->new(string => $doc, URI => $file); + isa_ok($reader, "XML::LibXML::Reader"); + $reader->read; + $reader->read; + is($reader->name, "countries","name in string"); +} + +# DOM +{ + my $DOM = XML::LibXML->new->parse_file($file); + my $reader = XML::LibXML::Reader->new(DOM => $DOM); + isa_ok($reader, "XML::LibXML::Reader"); + $reader->read; + $reader->read; + is($reader->name, "countries","name in string"); + ok($reader->document,"document"); + ok($reader->document->isSameNode($DOM),"document is DOM"); +} + +# Expand +{ + my ($node1,$node2, $node3); + my $xml = <<'EOF'; + + text1 +
text2 xx foo x + xx preserved yy FOO + + + + + + +EOF + { + my $reader = XML::LibXML::Reader->new(string => $xml); + $reader->preservePattern('//PP'); + $reader->preservePattern('//x:ZZ',{ x => "foo"}); + + isa_ok($reader, "XML::LibXML::Reader"); + $reader->nextElement; + is($reader->name, "root","root node"); + $reader->nextElement; + $node1 = $reader->copyCurrentNode(1); + is($node1->nodeName, "AA","deep copy node"); + $reader->next; + ok($reader->nextElement("DD"),"next named element"); + is($reader->name, "DD","name"); + is($reader->readOuterXml, "
","readOuterXml"); + ok($reader->read,"read"); + is($reader->name, "BB","name"); + $node2 = $reader->copyCurrentNode(0); + is($node2->nodeName, "BB","shallow copy node"); + $reader->nextElement; + is($reader->name, "CC","nextElement"); + $reader->nextSibling; + is( $reader->nodeType(), XML_READER_TYPE_TEXT, "text node" ); + is( $reader->value,"foo", "text content" ); + $reader->skipSiblings; + is( $reader->nodeType(), XML_READER_TYPE_END_ELEMENT, "end element type" ); + $reader->nextElement; + is($reader->name, "EE","name"); + ok($reader->nextSiblingElement("ZZ","foo"),"namespace"); + is($reader->namespaceURI, "foo","namespaceURI"); + $reader->nextElement; + $node3= $reader->preserveNode; + is( $reader->readOuterXml(), $node3->toString(),"outer xml"); + ok($node3,"preserve node"); + $reader->finish; + my $doc = $reader->document; + ok($doc, "document"); + ok($doc->documentElement, "doc root element"); + is($doc->documentElement->toString,q(preserved), + "preserved content"); + } + ok($node1->hasChildNodes,"copy w/ child nodes"); + ok($node1->toString(),q( text1 )); + ok(!defined $node2->firstChild, "copy w/o child nodes"); + ok($node2->toString(),q()); + ok($node3->toString(),q()); +} + +{ + my $bad_xml = <<'EOF'; + + + + foo + + + foo + + +EOF + my $reader = XML::LibXML::Reader->new( + string => $bad_xml, + URI => "mystring.xml" + ); + eval { $reader->finish }; + my $Err = $@; + use Data::Dumper; + # print Dumper($Err); + # print $Err; + ok((defined($Err) and $Err =~ /in mystring.xml at line 3:|mystring.xml:5:/), + 'caught the error'); +} + +{ + my $rng = "test/relaxng/demo.rng"; + for my $RNG ($rng, XML::LibXML::RelaxNG->new(location => $rng)) { + { + my $reader = XML::LibXML::Reader->new( + location => "test/relaxng/demo.xml", + RelaxNG => $RNG, + ); + ok($reader->finish, "validate using ".(ref($RNG) ? 'XML::LibXML::RelaxNG' : 'RelaxNG file')); + } + { + my $reader = XML::LibXML::Reader->new( + location => "test/relaxng/invaliddemo.xml", + RelaxNG => $RNG, + ); + eval { $reader->finish }; + print $@; + ok($@, "catch validation error for a ".(ref($RNG) ? 'XML::LibXML::RelaxNG' : 'RelaxNG file')); + } + + } +} + +SKIP: { + if ((!XML::LibXML::HAVE_SCHEMAS) + or (XML::LibXML::LIBXML_DOTTED_VERSION eq '2.9.4') + ) + { + skip "https://github.com/shlomif/libxml2-2.9.4-reader-schema-regression", 4; + } + my $xsd = "test/schema/schema.xsd"; + for my $XSD ($xsd, XML::LibXML::Schema->new(location => $xsd)) { + { + my $reader = XML::LibXML::Reader->new( + location => "test/schema/demo.xml", + Schema => $XSD, + ); + ok($reader->finish, "validate using ".(ref($XSD) ? 'XML::LibXML::Schema' : 'Schema file')); + } + { + my $reader = XML::LibXML::Reader->new( + location => "test/schema/invaliddemo.xml", + Schema => $XSD, + ); + eval { $reader->finish }; + ok($@, "catch validation error for ".(ref($XSD) ? 'XML::LibXML::Schema' : 'Schema file')); + } + } +} + +# Patterns +{ + my ($node1,$node2, $node3); + my $xml = <<'EOF'; + + text1 +
text2 xx foo x + xx preserved yy FOO + + + + + + +EOF + my $pattern = XML::LibXML::Pattern->new('//inner|CC|/root/y:ZZ',{y=>'foo'}); + ok($pattern); + { + my $reader = XML::LibXML::Reader->new(string => $xml); + ok($reader); + my $matches=''; + while ($reader->read) { + if ($reader->matchesPattern($pattern)) { + $matches.=$reader->nodePath.','; + } + } + ok($matches,'/root/AA/inner,/root/BB/CC,/root/*,'); + } + { + my $reader = XML::LibXML::Reader->new(string => $xml); + ok($reader); + my $matches=''; + while ($reader->nextPatternMatch($pattern)) { + $matches.=$reader->nodePath.','; + } + ok($matches,'/root/AA/inner,/root/BB/CC,/root/*,'); + } + { + my $dom = XML::LibXML->new->parse_string($xml); + ok($dom); + my $matches=''; + for my $node ($dom->findnodes('//node()|@*')) { + if ($pattern->matchesNode($node)) { + $matches.=$node->nodePath.','; + } + } + ok($matches,'/root/AA/inner,/root/BB/CC,/root/*,'); + } +} + diff --git a/t/40reader_mem_error.t b/t/40reader_mem_error.t new file mode 100644 index 0000000..c717ef8 --- /dev/null +++ b/t/40reader_mem_error.t @@ -0,0 +1,367 @@ +#!/usr/bin/perl + +# This code used to generate a memory error in valgrind/etc. +# Testing it. + +use strict; +use warnings; + +use Test::More; + +use utf8; + +use XML::LibXML; + +BEGIN { + if (!XML::LibXML::HAVE_READER()) { + plan skip_all => 'Reader not supported in this libxml2 build'; + exit; + } + else { + plan tests => 2; + } +} + +package Test::XML::Ordered; + +use XML::LibXML::Reader; + +use Test::More; + +use parent 'Exporter'; + +use vars '@EXPORT_OK'; + +@EXPORT_OK = (qw(is_xml_ordered)); + +sub new +{ + my $class = shift; + my $self = {}; + + bless $self, $class; + + $self->_init(@_); + + return $self; +} + +sub _got +{ + return shift->{got_reader}; +} + +sub _expected +{ + return shift->{expected_reader}; +} + +sub _init +{ + my ($self, $args) = @_; + + $self->{got_reader} = + XML::LibXML::Reader->new(@{$args->{got_params}}); + $self->{expected_reader} = + XML::LibXML::Reader->new(@{$args->{expected_params}}); + + $self->{diag_message} = $args->{diag_message}; + + $self->{got_end} = 0; + $self->{expected_end} = 0; + + return; +} + +sub _got_end +{ + return shift->{got_end}; +} + +sub _expected_end +{ + return shift->{expected_end}; +} + +sub _read_got +{ + my $self = shift; + + if ($self->_got->read() <= 0) + { + $self->{got_end} = 1; + } + + return; +} + +sub _read_expected +{ + my $self = shift; + + if ($self->_expected->read() <= 0) + { + $self->{expected_end} = 1; + } + + return; +} + +sub _next_elem +{ + my $self = shift; + + $self->_read_got(); + $self->_read_expected(); + + return; +} + +sub _ns +{ + my $elem = shift; + my $ns = $elem->namespaceURI(); + + return defined($ns) ? $ns : ""; +} + +sub _compare_loop +{ + my $self = shift; + + my $calc_prob = sub { + my $args = shift; + + if (!exists($args->{param})) + { + die "No 'param' specified."; + } + return + { + verdict => 0, + param => $args->{param}, + } + }; + + NODE_LOOP: + while ((!$self->_got_end()) && (!$self->_expected_end())) + { + my $type = $self->_got->nodeType(); + my $exp_type = $self->_expected->nodeType(); + + if ($type == XML_READER_TYPE_SIGNIFICANT_WHITESPACE()) + { + $self->_read_got(); + redo NODE_LOOP; + } + elsif ($exp_type == XML_READER_TYPE_SIGNIFICANT_WHITESPACE()) + { + $self->_read_expected(); + redo NODE_LOOP; + } + elsif ($type != $exp_type) + { + return $calc_prob->({param => "nodeType"}); + } + elsif ($type == XML_READER_TYPE_TEXT()) + { + my $got_text = $self->_got->value(); + my $expected_text = $self->_expected->value(); + + foreach my $t ($got_text, $expected_text) + { + $t =~ s{\A\s+}{}ms; + $t =~ s{\s+\z}{}ms; + $t =~ s{\s+}{ }ms; + } + if ($got_text ne $expected_text) + { + return $calc_prob->({param => "text"}); + } + } + elsif ($type == XML_READER_TYPE_ELEMENT()) + { + if ($self->_got->name() ne $self->_expected->name()) + { + return $calc_prob->({param => "element_name"}); + } + if (_ns($self->_got) ne _ns($self->_expected)) + { + return $calc_prob->({param => "mismatch_ns"}); + } + } + } + continue + { + $self->_next_elem(); + } + + return { verdict => 1}; +} + +sub _get_diag_message +{ + my ($self, $status_struct) = @_; + + if ($status_struct->{param} eq "nodeType") + { + return + "Different Node Type!\n" + . "Got: " . $self->_got->nodeType() . " at line " . $self->_got->lineNumber() + . "\n" + . "Expected: " . $self->_expected->nodeType() . " at line " . $self->_expected->lineNumber() + ; + } + elsif ($status_struct->{param} eq "text") + { + return + "Texts differ: Got at " . $self->_got->lineNumber(). " with value <<@{[$self->_got->value()]}>> ; Expected at ". $self->_expected->lineNumber() . " with value <<@{[$self->_expected->value()]}>>."; + } + elsif ($status_struct->{param} eq "element_name") + { + return + "Got name: " . $self->_got->name(). " at " . $self->_got->lineNumber() . + " ; " . + "Expected name: " . $self->_expected->name() . " at " .$self->_expected->lineNumber(); + } + elsif ($status_struct->{param} eq "mismatch_ns") + { + return + "Got Namespace: " . _ns($self->_got). " at " . $self->_got->lineNumber() . + " ; " . + "Expected Namespace: " . _ns($self->_expected) . " at " .$self->_expected->lineNumber(); + } + + else + { + die "Unknown param"; + } +} + +sub compare +{ + local $Test::Builder::Level = $Test::Builder::Level+1; + + my $self = shift; + + $self->_next_elem(); + + my $status_struct = $self->_compare_loop(); + my $verdict = $status_struct->{verdict}; + + if (!$verdict) + { + diag($self->_get_diag_message($status_struct)); + } + + return ok($verdict, $self->{diag_message}); +} + +sub is_xml_ordered +{ + local $Test::Builder::Level = $Test::Builder::Level+1; + + my ($got_params, $expected_params, $message) = @_; + + my $comparator = + Test::XML::Ordered->new( + { + got_params => $got_params, + expected_params => $expected_params, + diag_message => $message, + } + ); + + return $comparator->compare(); +} + +my $xml_source = <<'EOF'; + + + + + David vs. Goliath - Part I + + +
+

David vs. Goliath - Part I

+
+

The Top Section

+

+ King David and Goliath were standing by each other. +

+

+ David said unto Goliath: "I will shoot you. I swear I will" +

+
+

Goliath's Response

+

+ Goliath was not amused. +

+

+ He said to David: "Oh, really. David, the red-headed!". +

+

+ David started listing Goliath's disadvantages: +

+
+
+
+ + +EOF + +my $final_source = <<'EOF'; + + + + + David vs. Goliath - Part I + + +
+

David vs. Goliath - Part I

+
+

The Top Section

+

+ King David and Goliath were standing by each other. +

+

+ David said unto Goliath: "I will shoot you. I swear I will" +

+
+

Goliath's Response

+

+ Goliath was not amused. +

+

+ He said to David: "Oh, really. David, the red-headed!". +

+

+ David started listing Goliath's disadvantages: +

+
+
+
+ + +EOF + +SKIP: { + # RT #84564 + # https://bugzilla.gnome.org/show_bug.cgi?id=447899 + if (XML::LibXML::LIBXML_RUNTIME_VERSION() < 20704) { + skip('Known double-free with libxml2 < 2.7.4', 1); + } + + my @common = (validation => 0, load_ext_dtd => 0, no_network => 1); + # TEST + Test::XML::Ordered::is_xml_ordered( + [ string => $final_source, @common,], + [ string => $xml_source, @common,], + "foo", + ); +} + +# TEST +ok (1, "Finished"); diff --git a/t/41xinclude.t b/t/41xinclude.t new file mode 100644 index 0000000..76f4911 --- /dev/null +++ b/t/41xinclude.t @@ -0,0 +1,43 @@ +#!/usr/bin/perl -w +use strict; +use warnings; +use XML::LibXML; +use Test::More tests => 7; + +# tests for bug #24953: External entities not expanded in included file (XInclude) + +my $parser = XML::LibXML->new; +my $file = 'test/xinclude/test.xml'; +{ + $parser->expand_xinclude(0); + $parser->expand_entities(1); + # TEST + ok (scalar ($parser->parse_file($file)->toString() !~ /IT WORKS/), ' TODO : Add test name'); +} +{ + $parser->expand_xinclude(1); + $parser->expand_entities(0); + # TEST + ok (scalar($parser->parse_file($file)->toString() !~ /IT WORKS/), ' TODO : Add test name'); +} +{ + $parser->expand_xinclude(1); + $parser->expand_entities(1); + # TEST + ok (scalar($parser->parse_file($file)->toString() =~ /IT WORKS/), ' TODO : Add test name'); +} +{ + $parser->expand_xinclude(0); + my $doc = $parser->parse_file($file); + # TEST + ok( $doc->process_xinclude({expand_entities=>0}), ' TODO : Add test name' ); + # TEST + ok( scalar($doc->toString() !~ /IT WORKS/), ' TODO : Add test name' ); +} +{ + my $doc = $parser->parse_file($file); + # TEST + ok( $doc->process_xinclude({expand_entities=>1}), ' TODO : Add test name' ); + # TEST + ok( scalar($doc->toString() =~ /IT WORKS/), ' TODO : Add test name' ); +} diff --git a/t/42common.t b/t/42common.t new file mode 100644 index 0000000..8c348e4 --- /dev/null +++ b/t/42common.t @@ -0,0 +1,93 @@ +use strict; +use warnings; + +# Should be 12. +use Test::More tests => 12; + +use XML::LibXML::Common qw( :libxml :encoding ); + +use constant TEST_STRING_GER => "Hänsel und Gretel"; +use constant TEST_STRING_GER2 => "täst"; +use constant TEST_STRING_UTF => 'test'; +use constant TEST_STRING_JP => 'À¸ÇþÀ¸ÊÆÀ¸Íñ'; + +# TEST +ok(1, 'Loading'); + +######################### + +# TEST +is (XML_ELEMENT_NODE, 1, 'XML_ELEMENT_NODE is 1.' ); + +# encoding(); + +# TEST +is (decodeFromUTF8( + 'iso-8859-1', encodeToUTF8('iso-8859-1', TEST_STRING_GER2 ) + ), + TEST_STRING_GER2, + 'Roundup trip from UTF-8 to ISO-8859-1 and back.', +); + +# TEST +is ( decodeFromUTF8( + 'UTF-8' , encodeToUTF8('UTF-8', TEST_STRING_UTF ) + ), + TEST_STRING_UTF, + 'Rountrip trip through UTF-8', +); + + +my $u16 = + decodeFromUTF8( 'UTF-16', encodeToUTF8('UTF-8', TEST_STRING_UTF ) ) + ; + +# TEST +is ( length($u16), 2*length(TEST_STRING_UTF), + 'UTF-16 String is twice as long.' +); + +my $u16be = decodeFromUTF8( 'UTF-16BE', + encodeToUTF8('UTF-8', TEST_STRING_UTF ) ); +# TEST +is ( length($u16be), 2*length(TEST_STRING_UTF), + 'UTF-16BE String is twice as long.' +); + +my $u16le = decodeFromUTF8( 'UTF-16LE', + encodeToUTF8('UTF-8', TEST_STRING_UTF ) ); +# TEST +is ( length($u16le), 2*length(TEST_STRING_UTF), + 'UTF-16LE String is twice as long.' +); + +# Bad encoding name tests. +eval { + my $str = encodeToUTF8( "foo" , TEST_STRING_GER2 ); +}; +# TEST +ok( $@, 'Exception was thrown.' ); + +# TEST +is (encodeToUTF8( 'UTF-16' , '' ), '', 'Encoding empty string to UTF-8'); + +# TEST +ok (!defined(encodeToUTF8( 'UTF-16' , undef )), + 'encoding undef to UTF-8 is undefined' +); + +# TEST +is (decodeFromUTF8( 'UTF-16' , '' ), '', 'decodeFromUTF8 of empty string'); + +# TEST +ok (!defined(decodeFromUTF8( 'UTF-16' , undef )), 'decodeFromUTF8 of undef.'); + +# here should be a test to test badly encoded strings. but for some +# reasons i am unable to create an appropriate test :( + +# uncomment these lines if your system is capable to handel not only i +# so latin 1 +#ok( decodeFromUTF8('EUC-JP', +# encodeToUTF8('EUC-JP', +# TEST_STRING_JP ) ), +# TEST_STRING_JP ); diff --git a/t/43options.t b/t/43options.t new file mode 100644 index 0000000..d46fe23 --- /dev/null +++ b/t/43options.t @@ -0,0 +1,229 @@ +# -*- cperl -*- + +use strict; +use warnings; + +use Test::More tests => 291; + +use XML::LibXML; + +# TEST:$all=23 +my @all = qw( + recover + expand_entities + load_ext_dtd + complete_attributes + validation + suppress_errors + suppress_warnings + pedantic_parser + no_blanks + expand_xinclude + xinclude + no_network + clean_namespaces + no_cdata + no_xinclude_nodes + old10 + no_base_fix + huge + oldsax + line_numbers + URI + base_uri + gdome +); + +# TEST:$old=8 +my %old = map { $_=> 1 } qw( +recover +pedantic_parser +line_numbers +load_ext_dtd +complete_attributes +expand_xinclude +clean_namespaces +no_network +); + + +{ + my $p = XML::LibXML->new(); + for my $opt (@all) { + my $ret = 0; + # TEST*$all + ok( + ($p->get_option($opt)||0) == $ret + , + "Testing option $opt", + ); + } + # TEST + ok(! $p->option_exists('foo'), ' TODO : Add test name'); + + # TEST + ok( $p->keep_blanks() == 1, ' TODO : Add test name' ); + # TEST + ok( $p->set_option(no_blanks => 1) == 1, ' TODO : Add test name'); + # TEST + ok( ! $p->keep_blanks(), ' TODO : Add test name' ); + # TEST + ok( $p->keep_blanks(1) == 1, ' TODO : Add test name' ); + # TEST + ok( ! $p->get_option('no_blanks'), ' TODO : Add test name' ); + + my $uri = 'http://foo/bar'; + + # TEST + ok( $p->set_option(URI => $uri) eq $uri, ' TODO : Add test name'); + # TEST + ok ($p->base_uri() eq $uri, ' TODO : Add test name'); + # TEST + ok ($p->base_uri($uri.'2') eq $uri.'2', ' TODO : Add test name'); + # TEST + ok( $p->get_option('URI') eq $uri.'2', ' TODO : Add test name'); + # TEST + ok( $p->get_option('base_uri') eq $uri.'2', ' TODO : Add test name'); + # TEST + ok( $p->set_option(base_uri => $uri) eq $uri, ' TODO : Add test name'); + # TEST + ok( $p->set_option(URI => $uri) eq $uri, ' TODO : Add test name'); + # TEST + ok ($p->base_uri() eq $uri, ' TODO : Add test name'); + + # TEST + ok( ! $p->recover_silently(), ' TODO : Add test name' ); + $p->set_option(recover => 1); + + # TEST + ok( $p->recover_silently() == 0, ' TODO : Add test name' ); + $p->set_option(recover => 2); + # TEST + ok( $p->recover_silently() == 1, ' TODO : Add test name' ); + # TEST + ok( $p->recover_silently(0) == 0, ' TODO : Add test name' ); + # TEST + ok( $p->get_option('recover') == 0, ' TODO : Add test name' ); + # TEST + ok( $p->recover_silently(1) == 1, ' TODO : Add test name' ); + # TEST + ok( $p->get_option('recover') == 2, ' TODO : Add test name' ); + + # TEST + ok( $p->expand_entities() == 0, 'expand_entities should default to false' ); + # TEST + ok( $p->load_ext_dtd() == 0, 'load_ext_dtd should default to false' ); + $p->load_ext_dtd(1); + # TEST + ok( $p->load_ext_dtd() == 1, 'load_ext_dtd should be true after being set to true' ); + $p->load_ext_dtd(0); + $p->expand_entities(1); + # TEST + ok( $p->expand_entities() == 1, 'expand_entities should be true after being set to true' ); + # TEST + ok( $p->load_ext_dtd() == 1, 'load_ext_dtd should be true after expand_entities is set to true' ); + $p->expand_entities(0); + # TEST + ok( $p->expand_entities() == 0, 'expand_entities should be false after being set to false' ); +} + +{ + my $XML = <<'EOT'; + + +]> + + + example.com + XXE + + &xxe; + example.com + XXE here + + + +EOT + + my $sys_line = <<'EOT'; +&xxe; +EOT + + chomp ($sys_line); + + my $parser = XML::LibXML->new( + expand_entities => 0, + load_ext_dtd => 0, + no_network => 1, + expand_xinclude => 0, + ); + my $XML_DOC = $parser->load_xml( string => $XML, ); + + # TEST + ok (scalar($XML_DOC->toString() =~ m{\Q$sys_line\E}), + "expand_entities is preserved after _clone()/etc." + ); +} + +{ + my $p = XML::LibXML->new(map { $_=>1 } @all); + for my $opt (@all) { + # TEST*$all + ok($p->get_option($opt)==1, ' TODO : Add test name'); + # TEST*$old + if ($old{$opt}) + { + ok($p->$opt()==1, ' TODO : Add test name') + } + } + + for my $opt (@all) { + # TEST*$all + ok($p->option_exists($opt), ' TODO : Add test name'); + # TEST*$all + ok($p->set_option($opt,0)==0, ' TODO : Add test name'); + # TEST*$all + ok($p->get_option($opt)==0, ' TODO : Add test name'); + # TEST*$all + ok($p->set_option($opt,1)==1, ' TODO : Add test name'); + # TEST*$all + ok($p->get_option($opt)==1, ' TODO : Add test name'); + if ($old{$opt}) { + # TEST*$old + ok($p->$opt()==1, ' TODO : Add test name'); + # TEST*$old + ok($p->$opt(0)==0, ' TODO : Add test name'); + # TEST*$old + ok($p->$opt()==0, ' TODO : Add test name'); + # TEST*$old + ok($p->$opt(1)==1, ' TODO : Add test name'); + } + + } +} + +{ + my $p = XML::LibXML->new(map { $_=>0 } @all); + for my $opt (@all) { + # TEST*$all + ok($p->get_option($opt)==0, ' TODO : Add test name'); + # TEST*$old + if ($old{$opt}) + { + ok($p->$opt()==0, ' TODO : Add test name'); + } + } +} + +{ + my $p = XML::LibXML->new({map { $_=>1 } @all}); + for my $opt (@all) { + # TEST*$all + ok($p->get_option($opt)==1, ' TODO : Add test name'); + # TEST*$old + if ($old{$opt}) + { + ok($p->$opt()==1, ' TODO : Add test name'); + } + } +} diff --git a/t/44extent.t b/t/44extent.t new file mode 100644 index 0000000..7f8836d --- /dev/null +++ b/t/44extent.t @@ -0,0 +1,82 @@ +# Test file created outside of h2xs framework. +# Run this like so: `perl 44extent.t' +# pajas@ufal.mff.cuni.cz 2009/09/24 13:18:43 + +######################### + +# change 'tests => 1' to 'tests => last_test_to_print'; + +use warnings; +use strict; + +use Test::More; + +use XML::LibXML; + +use IO::Handle; + +STDOUT->autoflush(1); +STDERR->autoflush(1); + +if (XML::LibXML::LIBXML_VERSION() < 20627) +{ + plan skip_all => "skipping for libxml2 < 2.6.27"; +} +else +{ + plan tests => 7; +} + +my $parser = XML::LibXML->new({ + expand_entities => 1, + ext_ent_handler => \&handler, +}); + +sub handler { + return join(",",@_); +} + +my $xml = <<'EOF'; + + + +]> + +
&a; + &b; + +EOF +my $xml_out = $xml; +$xml_out =~ s{&a;}{file:/dev/null,//foo/bar/b}; +$xml_out =~ s{&b;}{file:///dev/null,}; + +my $doc = $parser->parse_string($xml); + +# TEST +is( $doc->toString(), $xml_out, ' TODO : Add test name' ); + +my $xml_out2 = $xml; $xml_out2 =~ s{&[ab];}{}g; + +$parser->set_option( ext_ent_handler => sub { return '' } ); +$doc = $parser->parse_string($xml); +# TEST +is( $doc->toString(), $xml_out2, ' TODO : Add test name' ); + +$parser->set_option( ext_ent_handler=>sub{ '' } ); +$parser->set_options({ + expand_entities => 0, + recover => 2, +}); +$doc = $parser->parse_string($xml); +# TEST +is( $doc->toString(), $xml, ' TODO : Add test name' ); + +# TEST:$el=2; +foreach my $el ($doc->findnodes('/root/*')) { + # TEST*$el + ok ($el->hasChildNodes, ' TODO : Add test name'); + # TEST*$el + ok ($el->firstChild->nodeType == XML_ENTITY_REF_NODE, ' TODO : Add test name'); +} + diff --git a/t/45regex.t b/t/45regex.t new file mode 100644 index 0000000..54428e6 --- /dev/null +++ b/t/45regex.t @@ -0,0 +1,53 @@ +######################### + +use strict; +use warnings; + +use Test::More tests => 13; + +use XML::LibXML; + +{ + my $regex = '[0-9]{5}(-[0-9]{4})?'; + my $re = XML::LibXML::RegExp->new($regex); + + # TEST + ok( $re, 'Regex object was initted.'); + # TEST + ok( ! $re->matches('00'), 'Does not match 00' ); + # TEST + ok( ! $re->matches('00-'), 'Does not match 00-' ); + # TEST + ok( $re->matches('12345'), 'Matches 12345' ); + # TEST + ok( !$re->matches('123456'), 'Does not match 123456' ); + + # TEST + ok( $re->matches('12345-1234'), 'Matches 12345-1234'); + # TEST + ok( ! $re->matches(' 12345-1234'), 'Does not match leading space'); + # TEST + ok( ! $re->matches(' 12345-12345'), 'Leading space No. 2' ); + # TEST + ok( ! $re->matches('12345-1234 '), 'Trailing space' ); + + # TEST + ok( $re->isDeterministic, 'Regex is deterministic' ); +} + +{ + my $nondet_regex = '(bc)|(bd)'; + my $nondet_re = XML::LibXML::RegExp->new($nondet_regex); + + # TEST + ok( $nondet_re, 'Non deterministic re was initted' ); + # TEST + ok( ! $nondet_re->isDeterministic, 'It is not deterministic' ); +} + +{ + my $bad_regex = '[0-9]{5}(-[0-9]{4}?'; + eval { XML::LibXML::RegExp->new($bad_regex); }; + # TEST + ok( $@, 'An exception was thrown on bad regex' ); +} diff --git a/t/46err_column.t b/t/46err_column.t new file mode 100644 index 0000000..7597dc2 --- /dev/null +++ b/t/46err_column.t @@ -0,0 +1,40 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +# Bug #66642 for XML-LibXML: $err->column() incorrectly maxed out as 80 +# https://rt.cpan.org/Public/Bug/Display.html?id=66642 . + +use Test::More tests => 1; + +use XML::LibXML qw(); + +eval { + XML::LibXML->new()->parse_string( +'' + ) +}; + +SKIP: +{ + my $err = $@; + # This is a fix for: + # https://rt.cpan.org/Ticket/Display.html?id=69070 + # << t/46err_column.t is broken on centos/RHEL 4 >> + + # On this system, libxml is as follows: + # libxml2-devel-2.6.16-12.8 + + if (! ref($err)) + { + skip('parse_string returned a string - not an XML::LibXML::Error object - probably an old libxml2', + 1 + ); + } + # TEST + is ($err->column(), 203, "Column is OK."); +} diff --git a/t/47load_xml_callbacks.t b/t/47load_xml_callbacks.t new file mode 100644 index 0000000..1447539 --- /dev/null +++ b/t/47load_xml_callbacks.t @@ -0,0 +1,43 @@ +#!/usr/bin/perl + +# Fix the handling of XML::LibXML::InputCallbacks at load_xml(). +# - https://rt.cpan.org/Ticket/Display.html?id=58190 +# - The problem was that the input callbacks were not cloned in +# _clone(). + +use strict; +use warnings; + +use Test::More tests => 3; + +use XML::LibXML; + +{ + my $got_open = 0; + my $got_read = 0; + my $got_close = 0; + + my $input_callbacks = XML::LibXML::InputCallback->new(); + $input_callbacks->register_callbacks([ + sub { 1 }, + sub { $got_open = 1; open my $fh, '<', shift; return $fh; }, + sub { $got_read = 1; my $buffer; read(shift, $buffer, shift); return $buffer; }, + sub { $got_close = 1; close shift }, + ]); + + my $xml_parser = XML::LibXML->new(); + $xml_parser->input_callbacks($input_callbacks); + + my $TEST_FILENAME = 'example/dromeds.xml'; + + $xml_parser->load_xml(location => $TEST_FILENAME); + + # TEST + ok ($got_open, 'load_xml() encountered the open InputCallback'); + + # TEST + ok ($got_read, 'load_xml() encountered the read InputCallback'); + + # TEST + ok ($got_close, 'load_xml() encountered the close InputCallback'); +} diff --git a/t/48_RH5_double_free_rt83779.t b/t/48_RH5_double_free_rt83779.t new file mode 100644 index 0000000..4931a3f --- /dev/null +++ b/t/48_RH5_double_free_rt83779.t @@ -0,0 +1,100 @@ + +use strict; +use warnings; +use Scalar::Util qw(blessed); + +=head1 DESCRIPTION + +Double free on RHEL-5-x86_64. + +See L. + +=cut + +use constant HAS_LEAKTRACE => eval{ require Test::LeakTrace }; +use Test::More HAS_LEAKTRACE ? (tests => 6) : (skip_all => 'Test::LeakTrace is required.'); +use Test::LeakTrace; +use XML::LibXML::Reader; + +my $xml = <<'EOF'; + + + David vs. Goliath - Part I + + + + +EOF + +my $xml_decl = <<'EOF'; + +EOF + +{ + my $r = XML::LibXML::Reader->new(string => $xml); + my @nodes; + while ($r->read) { + push @nodes, $r->name; + } + # TEST + is( + join(',', @nodes), + 'html,#text,head,#text,title,#text,title,#text,head,#text,body,#text,body,#text,html', + 'Check reader' + ); +} + +{ + my $r = XML::LibXML::Reader->new(string => $xml); + while ($r->read) { + $r->preserveNode(); + } + # TEST + is( + $r->document->toString(), + $xml_decl . $xml, + 'Check reader with using preserveNode' + ); +} + +{ + my $r = XML::LibXML::Reader->new(string => $xml); + my $copy; + while ($r->read) { + $copy = $r->copyCurrentNode() if $r->name eq 'body'; + } + # TEST + is( + $copy->toString(), + '', + 'Check reader with using copyCurrentNode' + ); +} + +# TEST +no_leaks_ok { + my $r = XML::LibXML::Reader->new(string => $xml); + while ($r->read) { + # nothing + } +} 'Check reader, without leaks'; + +# TEST +no_leaks_ok { + my $node; + { + my $r = XML::LibXML::Reader->new(string => $xml); + while ($r->read) { + $node ||= $r->preserveNode(); + } + my $doc = $r->document(); + } +} 'Check reader with using preserveNode, without leaks'; + +# TEST +no_leaks_ok { + my $r = XML::LibXML::Reader->new(string => $xml); + while ($r->read) { + my $copy = $r->copyCurrentNode(); + } +} 'Check reader with using copyCurrentNode, without leaks'; diff --git a/t/48_SAX_Builder_rt_91433.t b/t/48_SAX_Builder_rt_91433.t new file mode 100644 index 0000000..808c0ec --- /dev/null +++ b/t/48_SAX_Builder_rt_91433.t @@ -0,0 +1,153 @@ +#!/usr/bin/env perl + +use strict; +use warnings; + +use Test::More tests => 1; + +use lib './t/lib'; +use TestHelpers qw(eq_or_diff); + +BEGIN { + $XML::SAX::ParserPackage = "XML::LibXML::SAX"; +} + +use XML::SAX::ParserFactory; + +my @got_warnings; +local $SIG{__WARN__} = sub { + my ($warning) = @_; + + if ($warning =~ /\AUse of uninitialized value/) + { + push @got_warnings, $warning; + } +}; + +my $metadataHandler = innerSAX->new(); +my $oaiHandler = outerSAX->new(metadataHandler => $metadataHandler, + oaiNS => "http://www.openarchives.org/OAI/2.0/"); + +my $parser = XML::SAX::ParserFactory->parser(Handler => $oaiHandler); + +$parser->parse_string(<<'END_OF_XML'); + +2013-12-16T20:19:20Zhttp://services.d-nb.de/oai/repository
oai:dnb.de/authorities/10456014622013-12-16T18:47:23Zauthorities
+ + + + 1045601462 + Kencena, Rain + + Rain + Kencena + + + + ca. 20. / 21. Jh. + + +
+END_OF_XML + +# TEST +eq_or_diff( + \@got_warnings, + [], + "No warnings were generated.", +); + + +package outerSAX; +use parent qw(XML::SAX::Base); + +sub new { + my ($class, %opts) = @_; + my $self = bless \%opts, ref($class) || $class; + $self->set_handler( undef ); + return $self; +} + +sub start_element { + my ($self, $element) = @_; + + return $self->SUPER::start_element($element) unless $element->{NamespaceURI} eq $self->{oaiNS}; + + if ( $element->{LocalName} eq 'metadata' ) { + $self->{ OLD_Handler } = $self->get_handler(); + $self->set_handler( $self->{metadataHandler} ); + } + else { + return $self->SUPER::start_element($element)}; +} + +sub end_element { + my ($self, $element) = @_; + + return $self->SUPER::end_element($element) unless $element->{NamespaceURI} eq $self->{oaiNS}; + + if ( $element->{LocalName} eq 'metadata' ) { + $self->set_handler( $self->{OLD_Handler} ); + } + else { + $self->SUPER::end_element($element); + } +} + + +package innerSAX; +use parent qw(XML::SAX::Base); +use XML::LibXML::SAX::Builder; + +sub new { + my ($class, %opts) = @_; + my $self = bless \%opts, ref($class) || $class; + $self->{'tagStack'} = []; + return $self; +} + +sub start_element { + my ($self, $element) = @_; + + unless ( $self->{'tagStack'}[0] ) { + my $builder = XML::LibXML::SAX::Builder->new() + or die "cannot instantiate SAX builder"; + $self->set_handler($builder); + $self->SUPER::start_document(); # i.e. $builder->start_document(); + # DEBUG ME: warnings occur here + $self->SUPER::start_element($element); + } + else { + $self->SUPER::start_element($element)}; + + push(@{$self->{'tagStack'}}, $element->{Name}); +} + +sub end_element { + my ($self, $element) = @_; + $self->SUPER::end_element($element); + pop (@{$self->{'tagStack'}}); + + unless ( $self->{'tagStack'}[0] ) { + my $hdl = $self->get_handler(); + $self->set_handler(undef); + + # Convert fragment to document, do something with it + # (in real life: XSLT) + my $fragment = $hdl->done(); + my $child = $fragment->firstChild(); + while ($child && $child->nodeName eq "#text") + { + $child = $child->nextSibling; + } + my $tempdoc = XML::LibXML::Document->createDocument() + or die "cannot create new Document"; + $tempdoc->addChild($child) + or die "cannot addChild"; + # Removing because it was converted into a test script. + # print $tempdoc->toString; + } +} + +1; + diff --git a/t/48_gh_pr63_detect_undef_values.t b/t/48_gh_pr63_detect_undef_values.t new file mode 100644 index 0000000..cb5f935 --- /dev/null +++ b/t/48_gh_pr63_detect_undef_values.t @@ -0,0 +1,34 @@ + +use strict; +use warnings; + +=head1 DESCRIPTION + +L + +This test program + + use warnings; + use XML::LibXML; + + my $test = XML::LibXML::Text->new({}->{bar}); + +produces the following warning: + + $ perl ~/test.pl + Use of uninitialized value in subroutine entry at /home/sven/test.pl line 4. + +This apparently happens, because Sv2C tries to catch undef values by comparing the memory location of the scalar in question to &PL_sv_undef. While PL_sv_undef certainly is an undef value, not all undef values share its memory location. The added commit fixes this, by using SvOK to correctly detect all undef values. + +=cut + +use Test::More tests => 1; + +use XML::LibXML; + +$SIG{__WARN__} = sub { die "warning " . shift . "!"; }; + +my $test = XML::LibXML::Text->new( {}->{bar} ); + +# TEST +pass("success"); diff --git a/t/48_memleak_rt_83744.t b/t/48_memleak_rt_83744.t new file mode 100644 index 0000000..b6ceb75 --- /dev/null +++ b/t/48_memleak_rt_83744.t @@ -0,0 +1,27 @@ + +use strict; +use warnings; + +=head1 DESCRIPTION + +XPathContext memory leak on registerFunction. + +See L. + +=cut + +use constant HAS_LEAKTRACE => eval{ require Test::LeakTrace }; +use Test::More HAS_LEAKTRACE ? (tests => 2) : (skip_all => 'Test::LeakTrace is required for memory leak tests.'); +use Test::LeakTrace; + +# TEST +no_leaks_ok { + use XML::LibXML::XPathContext; +} 'load XPathContext without leaks'; + +# TEST +no_leaks_ok { + my $context = XML::LibXML::XPathContext->new(); + $context->registerFunction('match-font', sub {1;}); + $context->unregisterFunction('match-font'); +} 'register an XPath function and unregister it, without leaks'; diff --git a/t/48_reader_undef_warning_on_empty_str_rt106830.t b/t/48_reader_undef_warning_on_empty_str_rt106830.t new file mode 100644 index 0000000..f3d3f52 --- /dev/null +++ b/t/48_reader_undef_warning_on_empty_str_rt106830.t @@ -0,0 +1,79 @@ +# This is a test for: +# https://rt.cpan.org/Ticket/Display.html?id=106830 + +=head1 DESCRIPTION + +XML::LibXML::Reader emits a warning on empty string. + +=head1 THANKS. + +Rich. + +=cut + +use strict; +use warnings; + +use Test::More tests => 2; + +use lib './t/lib'; +use TestHelpers ( qw(eq_or_diff) ); + +use XML::LibXML::Reader; + +{ + my @warnings; + + local $SIG{__WARN__} = sub { push @warnings, [@_] }; + + my $empty_xml_doc = ''; + my $xml_reader = XML::LibXML::Reader->new(string => $empty_xml_doc); + + # TEST + SKIP: + { + if (XML::LibXML::LIBXML_VERSION() >= 20905) + { + skip 'libxml2 accepts empty strings since 2.9.5 version', 1; + } + ok (scalar(!defined($xml_reader)), 'xml_reader is undef', ); + } + + # TEST + eq_or_diff( + \@warnings, + [], + 'no warnigns were emitted.' + ); +} + + +=head1 COPYRIGHT & LICENSE + +Copyright 2015 by Shlomi Fish + +This program is distributed under the MIT (Expat) License: +L + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +=cut diff --git a/t/48_removeChild_crashes_rt_80395.t b/t/48_removeChild_crashes_rt_80395.t new file mode 100644 index 0000000..a0e3563 --- /dev/null +++ b/t/48_removeChild_crashes_rt_80395.t @@ -0,0 +1,36 @@ +#!/usr/bin/perl + +# See: +# +# https://rt.cpan.org/Public/Bug/Display.html?id=80395 + +use strict; +use warnings; + +use Test::More tests => 1; + +use XML::LibXML; + +my $xml = < +]> + + &myent; + +EOF + +my $dom = XML::LibXML->load_xml (string => $xml, + expand_entities => 0); +my $root = $dom->documentElement; + +my @nodes = $root->childNodes; +foreach my $node (@nodes) { + next if $node->nodeType != XML_ELEMENT_NODE; + next if $node->nodeName ne 'elem'; + + $root->removeChild ($node); +} + +# TEST +ok(1, "Code did not crash."); diff --git a/t/48_replaceNode_DTD_nodes_rT_80521.t b/t/48_replaceNode_DTD_nodes_rT_80521.t new file mode 100644 index 0000000..e70ef29 --- /dev/null +++ b/t/48_replaceNode_DTD_nodes_rT_80521.t @@ -0,0 +1,24 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +use Test::More tests => 1; + +use XML::LibXML; + +my $xml = <<'EOF'; + +]> + +EOF + +my $src = XML::LibXML->load_xml (string => $xml); +my $dest = XML::LibXML->load_xml (string => $xml); +my $src_dtd = $src->firstChild; +my $dest_dtd = $dest->firstChild; +$dest_dtd->replaceNode($src_dtd); + +# TEST +ok(1, "Did not crash."); diff --git a/t/48_rt123379_setNamespace.t b/t/48_rt123379_setNamespace.t new file mode 100644 index 0000000..bf396f7 --- /dev/null +++ b/t/48_rt123379_setNamespace.t @@ -0,0 +1,23 @@ +use strict; +use warnings; + +use XML::LibXML; +use Test::More tests => 8; + +# TEST +ok(my $doc = XML::LibXML::Document->new(), 'new document'); +# TEST +ok(my $elm = $doc->createElement('D:element'), 'create element'); +# TEST +ok($elm->setAttribute('xmlns:D', 'attribute'), 'set attribute'); +$doc->setDocumentElement($elm); # XXX does not return true if successful +# TEST +ok(my $str = $doc->toString(0), 'to string'); +# TEST +ok(my $par = XML::LibXML->new(), 'new parser'); +# TEST +ok( eval { $par->parse_string($str) } , 'parse string'); +# TEST +is($@, "", 'parse error'); +# TEST +like($str, qr{}, 'xml element'); diff --git a/t/48_rt55000.t b/t/48_rt55000.t new file mode 100644 index 0000000..e53a1fc --- /dev/null +++ b/t/48_rt55000.t @@ -0,0 +1,60 @@ + +use strict; +use warnings; + +=head1 DESCRIPTION + +If an element contains both a default namespace declaration and a second +namespace declaration, adding an attribute using the default namespace +declaration will cause that attribute to have the other prefix. + +OS Version: FreeBSD 6.3-RELEASE +Perl Version: v5.8.8 +LibXML Version: 1.70 + +See L . + +=cut + +use Test::More tests => 6; + +use XML::LibXML; + +my $xml_string = <<'XML'; + + + +XML + +my $parser = XML::LibXML->new; +my $doc = $parser->parse_string($xml_string); +my $root = $doc->documentElement(); +$root->setAttributeNS("uri", "prefix:attribute", "text"); +$root->setAttributeNS("uri", "second", "text"); + +my $string = $doc->toString(1); + +# TEST +unlike ($string, qr/[^\w:]attribute="text"/, + "Not placed as an unprefixed attribute"); +# TEST +unlike ($string, qr/\bwrong:attribute="text"/, + "Not placed in the wrong namespace"); + +# TEST +like ($string, qr/\bprefix:attribute="text"/, + "Placed in the right namespace"); + +# TEST +unlike ($string, qr/[^\w:]second="text"/, + "Not placed as an unprefixed attribute"); + +# TEST +unlike ($string, qr/\bwrong:second="text"/, + "Not placed in the wrong namespace"); + +# TEST +like ($string, qr/\bprefix:second="text"/, + "Placed in the right namespace"); + diff --git a/t/48_rt93429_recover_2_in_html_parsing.t b/t/48_rt93429_recover_2_in_html_parsing.t new file mode 100644 index 0000000..c1c06be --- /dev/null +++ b/t/48_rt93429_recover_2_in_html_parsing.t @@ -0,0 +1,33 @@ +#!/usr/bin/perl + +# Test for: +# https://rt.cpan.org/Ticket/Display.html?id=93429 +# +# Contributed by Nick Wellnhofer. + +use strict; +use warnings; + +use Test::More tests => 1; + +use XML::LibXML; + +{ + my $err_html = ''; + + my $parser = XML::LibXML->new(); + + my $buf = ''; + open(my $fh, '>', \$buf); + + { + local *STDERR = $fh; + $parser->load_html( string => $err_html, recover => 2, ); + } + + close($fh); + + # TEST + is($buf, '', 'No warning emitted on load_html with recover => 2.'); +} + diff --git a/t/48importing_nodes_IDs_rt_69520.t b/t/48importing_nodes_IDs_rt_69520.t new file mode 100644 index 0000000..5255d6b --- /dev/null +++ b/t/48importing_nodes_IDs_rt_69520.t @@ -0,0 +1,79 @@ +# This is a test for: +# https://rt.cpan.org/Public/Bug/Display.html?id=69520 + +=head1 DESCRIPTION + +IDs of elements is lost when importing nodes from another document. +When call method 'importNode' executed function 'xmlNodeCopy' from the +library libxml2, which does not import IDs. +Propose to replace the call "xmlNodeCopy" on "xmlDocNodeCopy" in the file +"dom.c". + +=head1 THANKS. + +Yuriy Ustushenko . + +=cut + +use strict; +use warnings; + +use Test::More tests => 4; + +use XML::LibXML; + +{ + my $doc = XML::LibXML->load_xml(string => <<'EOT'); + + item1 + +EOT + + my $elem = $doc->getElementById('id1'); + # TEST + ok ($elem, 'Orig doc has id1'); + + # TEST + is ($elem->textContent(), 'item1', 'Content of orig doc elem id1'); + + my $doc2 = XML::LibXML->createDocument( "1.0", "UTF-8" ); + $doc2->setDocumentElement( $doc2->importNode( $doc->documentElement() ) ); + + my $elem2 = $doc2->getElementById('id1'); + # TEST + ok ($elem2, 'Doc2 after importNode has id1'); + + # TEST + is ($elem2->textContent(), 'item1', 'Doc2 after importNode has id1'); +} + + +=head1 COPYRIGHT & LICENSE + +Copyright 2011 by Shlomi Fish + +This program is distributed under the MIT (X11) License: +L + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +=cut diff --git a/t/49_load_html.t b/t/49_load_html.t new file mode 100644 index 0000000..70d2660 --- /dev/null +++ b/t/49_load_html.t @@ -0,0 +1,106 @@ +#!/usr/bin/perl + +use strict; +use warnings; +use utf8; + +use lib './t/lib'; +use TestHelpers qw(utf8_slurp eq_or_diff); + +use Test::More tests => 3; + +use XML::LibXML; + +# This is a check for: +# https://rt.cpan.org/Ticket/Display.html?id=53270 + +{ + my $content = utf8_slurp('example/yahoo-finance-html-with-errors.html'); + + my $parser = XML::LibXML->new; + + $parser->set_option('recover', 1); + $parser->set_option('suppress_errors', 1); + + my @warnings; + + local $SIG{__WARN__} = sub { + my $warning = shift; + push @warnings, $warning; + }; + my $dom = $parser->load_html(string => $content); + + # TEST + eq_or_diff( + \@warnings, + [], + 'suppress_errors worked.', + ); +} + +{ + # These are tests for https://rt.cpan.org/Ticket/Display.html?id=58024 : + # <<< + # In XML::LibXML, warnings are not suppressed when specifying the recover + # or recover_silently flags as per the following excerpt from the manpage: + # >>> + + my $txt = <<'EOS'; + +EOS + + { + my $buf = ''; + open my $fh, '>', \$buf; + # redirect STDERR there + local *STDERR = $fh; + + XML::LibXML->new(recover => 1)->load_html( string => $txt ); + close($fh); + + # TEST + like ($buf, qr/htmlParseEntityRef:/, 'warning emitted'); + } + { + my $buf = ''; + open my $fh, '>', \$buf; + local *STDERR = $fh; + XML::LibXML->new(recover => 2)->load_html( string => $txt ); + close($fh); + # TEST + is ($buf, '', 'No warning emitted.'); + } +} + +=head1 COPYRIGHT & LICENSE + +Copyright 2011 by Shlomi Fish + +This program is distributed under the MIT (X11) License: +L + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +=cut diff --git a/t/49callbacks_returning_undef.t b/t/49callbacks_returning_undef.t new file mode 100644 index 0000000..ad0741f --- /dev/null +++ b/t/49callbacks_returning_undef.t @@ -0,0 +1,103 @@ + +# This is a bug fix for: +# https://rt.cpan.org/Ticket/Display.html?id=70321 +# +# When the match callback returns 1 and the open callback returns undef, then the +# read callback (inside the XS code) warnings about: +# "Use of uninitialized value in subroutine entry at". +# +# This is due to the value returned being undef and processed by SvPV. + +use strict; +use warnings; + +use lib './t/lib'; + +use Test::More; +use File::Spec; + +BEGIN +{ + # Part of the fix for https://rt.cpan.org/Ticket/Display.html?id=86665 + delete $ENV{'XML_CATALOG_FILES'}; +} + +use XML::LibXML; + +if (! eval { require URI::file; } ) +{ + plan skip_all => "URI::file is not available."; +} +elsif ( URI->VERSION() < 1.35 ) +{ + plan skip_all => "URI >= 1.35 is not available (".URI->VERSION.")."; +} +else +{ + plan tests => 1; +} + +sub _escape_html +{ + my $string = shift; + $string =~ s{&}{&}gso; + $string =~ s{<}{<}gso; + $string =~ s{>}{>}gso; + $string =~ s{"}{"}gso; + return $string; +} + + +my $uri = URI::file->new( + File::Spec->rel2abs( + File::Spec->catfile( + File::Spec->curdir(), "t", "data", "callbacks_returning_undef.xml" + ) + ) +); + +my $esc_path = _escape_html("$uri"); + +my $string = <<"EOF"; + + +]> + + metaWeblog.newPost + + + Entity test: &foo; + + + +EOF + +my $icb = XML::LibXML::InputCallback->new(); + +my $match_ret = 1; +$icb->register_callbacks( [ + sub { + my $uri = shift; + # skip for XML catalogs in /etc/xml/ + return 0 if $uri =~ m{^file:///etc/xml/}; + my $to_ret = $match_ret; $match_ret = 0; return $to_ret; + }, + sub { return undef; }, + undef, + undef + ] +); + +my $parser = XML::LibXML->new(); +$parser->input_callbacks($icb); +my $num_warnings = 0; +{ + local $^W = 1; + local $SIG{__WARN__} = sub { + $num_warnings++; + }; + my $doc = $parser->parse_string($string); +} +# TEST +is ($num_warnings, 0, "No warnings were recorded."); diff --git a/t/49global_extent.t b/t/49global_extent.t new file mode 100644 index 0000000..d2f4524 --- /dev/null +++ b/t/49global_extent.t @@ -0,0 +1,44 @@ +use strict; +use warnings; + +use Test::More; +use XML::LibXML; + +if (XML::LibXML::LIBXML_VERSION() < 20627) { + plan skip_all => "skipping for libxml2 < 2.6.27"; +} +else +{ + plan tests => 1; +} + +sub handler { + return "ENTITY:" . join(",",@_); +} + +# global entity loader +XML::LibXML::externalEntityLoader(\&handler); + +my $parser = XML::LibXML->new({ + expand_entities => 1, +}); + +my $xml = <<'EOF'; + + + +]> + + &a; + &b; + +EOF +my $xml_out = $xml; +$xml_out =~ s{&a;}{ENTITY:file:/dev/null,//foo/bar/b}; +$xml_out =~ s{&b;}{ENTITY:file:///dev/null,}; + +my $doc = $parser->parse_string($xml); + +# TEST +is( $doc->toString(), $xml_out ); diff --git a/t/50devel.t b/t/50devel.t new file mode 100644 index 0000000..c71d13c --- /dev/null +++ b/t/50devel.t @@ -0,0 +1,93 @@ +use Test::More; +BEGIN { plan tests => 18 }; + +use warnings; +use strict; + +BEGIN {$ENV{'DEBUG_MEMORY'} = 1;} +use XML::LibXML; +use XML::LibXML::Devel qw(:all); + +$|=1; + +# Base line +{ + my $doc = XML::LibXML::Document->new(); + + my $raw; + my $mem_before = mem_used(); + { + my $node = $doc->createTextNode("Hello"); + + $raw = node_from_perl($node); + refcnt_inc($raw); + } + cmp_ok(mem_used(), '>', $mem_before); + is(refcnt_dec($raw), 1); + is(mem_used(), $mem_before); + + # Next group of checks - multiple nodes + my ($rawT, $rawN); + $mem_before = mem_used(); + { + my $node = XML::LibXML::Element->new( 'text' ); + my $text = $doc->createTextNode( "Hello" ); + + $rawN = node_from_perl($node); + $rawT = node_from_perl($text); + + refcnt_inc($rawN); + refcnt_inc($rawT); + + $node->appendChild($text); + + # Done by appendChild + # fix_owner($rawT, $rawN); + } + cmp_ok(mem_used(), '>', $mem_before); + is(refcnt_dec($rawN), 2); + is(refcnt_dec($rawT), 1); + is(mem_used(), $mem_before); + + # The owner node remains until the last node is gone + my ($rawR, $rawD); + $mem_before = mem_used(); + { + my $dom = XML::LibXML->load_xml(string => <<'EOT'); + + + Hello + +EOT + my ($root) = $dom->getElementsByTagName('test'); + $rawR = node_from_perl($root); + $rawD = node_from_perl($dom); + + is(refcnt($rawR), 1); + is(refcnt($rawD), 2); + + my ($node) = $dom->getElementsByTagName('text'); + $rawN = node_from_perl($node); + + is(refcnt($rawN), 1); + is(refcnt($rawR), 1); + is(refcnt($rawD), 3); + + refcnt_inc($rawN); + + is(refcnt($rawD), 3); + + my $child = $node->firstChild; + + is(refcnt($rawD), 4); + } + cmp_ok(mem_used(), '>', $mem_before); + # $rawR's proxy node is no longer accessible + # but $rawD still has one + is(refcnt($rawD), 1); + is(refcnt_dec($rawN), 1); + is(mem_used(), $mem_before); + +} + + diff --git a/t/51_parse_html_string_rt87089.t b/t/51_parse_html_string_rt87089.t new file mode 100644 index 0000000..366bdbb --- /dev/null +++ b/t/51_parse_html_string_rt87089.t @@ -0,0 +1,33 @@ + +use strict; +use warnings; + +=head1 DESCRIPTION + +Getting wrong result when parsing HTML string as a scalar reference. + +See L . + +=cut + +use Test::More tests => 2; + +use XML::LibXML; + +my $parser = XML::LibXML->new(); + +# Parse HTML string as scalar +{ + my $dom = $parser->load_html(string => ''); + # TEST + is ($dom->toStringHTML, "\n\n", + "Parse HTML string as scalar"); +} + +# Parse HTML string as scalar reference +{ + my $dom = $parser->load_html(string => \''); + # TEST + is ($dom->toStringHTML, "\n\n", + "Parse HTML string as scalar reference"); +} diff --git a/t/60error_prev_chain.t b/t/60error_prev_chain.t new file mode 100644 index 0000000..e48215c --- /dev/null +++ b/t/60error_prev_chain.t @@ -0,0 +1,77 @@ +# This test script checks for: +# +# https://rt.cpan.org/Ticket/Display.html?id=56671 . +# +# It makes sure an error chain cannot be too long, because if it is it consumes +# a lot of RAM. + +use strict; +use warnings; + +no warnings 'recursion'; + +use Test::More; + +use XML::LibXML; + +{ + my $parser = XML::LibXML->new(); + $parser->validation(0); + $parser->load_ext_dtd(0); + + eval + { + local $^W = 0; + $parser->parse_file('example/JBR-ALLENtrees.htm'); + }; + + my $err = $@; + my $count = 0; + + if( $err && !ref($err) ) { + plan skip_all => 'The local libxml library does not support errors as objects to $@'; + } + plan tests => 1; + + while (defined($err) && $count < 200) + { + $err = $err->_prev(); + } + continue + { + $count++; + } + + # TEST + ok ((!$err), "Reached the end of the chain."); +} + +=head1 COPYRIGHT & LICENSE + +Copyright 2011 by Shlomi Fish + +This program is distributed under the MIT (X11) License: +L + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +=cut diff --git a/t/60struct_error.t b/t/60struct_error.t new file mode 100644 index 0000000..1b156d3 --- /dev/null +++ b/t/60struct_error.t @@ -0,0 +1,56 @@ +# $Id: 29_struct_errors.t,v 1.1.2.2 2006/06/22 14:34:47 pajas Exp $ +# First version of the new structured error test suite + +use strict; +use warnings; + +use Test::More; +use XML::LibXML; + +if (! XML::LibXML::HAVE_STRUCT_ERRORS() ) +{ + plan skip_all => 'Does not have struct errors - skipping'; +} +else +{ + plan tests => 7; +} + +use XML::LibXML::Error; +use XML::LibXML::ErrNo; + +{ + my $p = XML::LibXML->new(); + my $xmlstr = ''; + + eval { + my $doc = $p->parse_string( $xmlstr ); + }; + my $err = $@; + # TEST + ok (defined($err), 'Error is defined.'); + # TEST + isa_ok ($err, "XML::LibXML::Error", '$err is an XML::LibXML::Error'); + # TEST + is ($err->domain(), "parser", 'domain'); + # TEST + is ($err->line(), 1, 'line'); + # TEST + ok ($err->code == XML::LibXML::ErrNo::ERR_TAG_NAME_MISMATCH, ' TODO : Add test name'); + + my $fake_err = XML::LibXML::Error->new('fake error'); + my $domain_num = @XML::LibXML::Error::error_domains; # too big + $fake_err->{domain} = $domain_num; # white-box test + # TEST + is($fake_err->domain, "domain_$domain_num", + '$err->domain is reasonable on unknown domain'); + { + my $warnings = 0; + local $SIG{__WARN__} = sub { $warnings++; warn "@_\n" }; + my $s = $fake_err->as_string; + # TEST + is($warnings, 0, + 'No warnings when stringifying unknown-domain error', + ); + } +} diff --git a/t/61error.t b/t/61error.t new file mode 100644 index 0000000..2235da2 --- /dev/null +++ b/t/61error.t @@ -0,0 +1,35 @@ + +use strict; +use warnings; + +use Test::More; +use XML::LibXML; + +use XML::LibXML::Error; + +if (! XML::LibXML::HAVE_STRUCT_ERRORS() ) { + plan skip_all => 'XML::LibXML does not have struct errrors.'; +} +else +{ + plan tests => 3; +} + +my $p = XML::LibXML->new(); + +my $xmlstr = < +EOX + +eval { + my $doc = $p->parse_string( $xmlstr ); +}; + +my $err = $@; +# TEST +isa_ok ($err, "XML::LibXML::Error", 'Exception is of type error.'); +# TEST +is ($err->domain(), 'parser', 'Error is in the parser domain'); +# TEST +is ($err->line(), 1, 'Error is on line 1.'); +# warn "se: ", $@; diff --git a/t/62overload.t b/t/62overload.t new file mode 100644 index 0000000..98b9032 --- /dev/null +++ b/t/62overload.t @@ -0,0 +1,50 @@ +# -*- cperl -*- + +use strict; +use warnings; + +use Test::More tests => 12; + +use XML::LibXML; + +my $e1 = XML::LibXML::Element->new('test1'); +$e1->setAttribute('attr' => 'value1'); + +my $e2 = XML::LibXML::Element->new('test2'); +$e2->setAttribute('attr' => 'value2'); + +my $h1 = \%{ $e1 }; +my $h2 = \%{ $e2 }; + +# TEST +isnt $h1,$h2, 'different references'; + +# TEST +is $h1->{attr}, 'value1', 'affr for el 1'; +# TEST +is $h2->{attr}, 'value2', 'affr for el 2'; + +# TEST +is "$e1", '', 'stringify for el 1'; +# TEST +is "$e2", '', 'stringify for el 2'; + +# TEST +cmp_ok 0+$e1, '>', 1, 'num for el 1'; +# TEST +cmp_ok 0+$e2, '>', 1, 'num for el 2'; + +# TEST +isnt 0+$e1,0+$e2, 'num for e1 and e2 differs'; + +my $e3 = $e1; + +# TEST +ok $e3 eq $e1, 'eq'; +# TEST +ok $e3 == $e1, '=='; + +# TEST +ok $e1 ne $e2, 'ne'; +# TEST +ok $e1 != $e2, '!='; diff --git a/t/71overloads.t b/t/71overloads.t new file mode 100644 index 0000000..e67138d --- /dev/null +++ b/t/71overloads.t @@ -0,0 +1,217 @@ +use strict; +use warnings; +use Test::More tests => 25; +use XML::LibXML; + +my $root = XML::LibXML->load_xml( IO => \*DATA )->documentElement; + +# TEST +ok( + tied %$root, + 'elements can be hash dereffed to a tied hash', + ); + +# TEST +isa_ok( + tied %$root, + 'XML::LibXML::AttributeHash', + 'tied %$element', + ); + +# TEST +ok( + exists $root->{'attr1'}, + 'EXISTS non-namespaced', + ); + +# TEST +is( + $root->{'attr1'}, + 'foo', + 'FETCH non-namespaced', + ); + +$root->{attr1} = 'bar'; +# TEST +is( + $root->getAttribute('attr1'), + 'bar', + 'STORE non-namespaced', + ); + +$root->{attr11} = 'baz'; +# TEST +is( + $root->getAttribute('attr11'), + 'baz', + 'STORE (and create) non-namespaced', + ); + +delete $root->{attr11}; +# TEST +ok( + !$root->hasAttribute('attr11'), + 'DELETE non-namespaced', + ); + +my $fail = 1; +while (my ($k, $v) = each %$root) +{ + if ($k eq 'attr1') + { + $fail = 0; + # TEST + pass('FIRSTKEY/NEXTKEY non-namespaced'); + } +} + +if ($fail) +{ + fail('FIRSTKEY/NEXTKEY non-namespaced'); +} + +# TEST +ok( + exists $root->{'{http://localhost/}attr2'}, + 'EXISTS namespaced', + ); + +# TEST +is( + $root->{'{http://localhost/}attr2'}, + 'bar', + 'FETCH namespaced', + ); + +$root->{'{http://localhost/}attr2'} = 'quux'; +# TEST +is( + $root->getAttributeNS('http://localhost/', 'attr2'), + 'quux', + 'STORE namespaced', + ); + +$root->{'{http://localhost/}attr22'} = 'quuux'; +# TEST +is( + $root->getAttributeNS('http://localhost/', 'attr22'), + 'quuux', + 'STORE (and create) namespaced', + ); + +$root->{'{http://localhost/another}attr22'} = 'xyzzy'; +# TEST +is( + $root->getAttributeNS('http://localhost/another', 'attr22'), + 'xyzzy', + 'STORE (and create) namespaced, in new namespace', + ); + +delete $root->{'{http://localhost/another}attr22'}; +# TEST +ok( + !$root->hasAttributeNS('http://localhost/another', 'attr22'), + 'DELETE namespaced', + ); + +my $fail2 = 1; +while (my ($k, $v) = each %$root) +{ + if ($k eq '{http://localhost/}attr22') + { + $fail2 = 0; + # TEST + pass('FIRSTKEY/NEXTKEY namespaced'); + } +} + +if ($fail2) +{ + fail('FIRSTKEY/NEXTKEY namespaced'); +} + +# TEST +like( + $root->toStringEC14N, + qr{}, + '!!! toStringEC14N', + ); + +# These are tests for: +# https://rt.cpan.org/Ticket/Display.html?id=75257 +# https://rt.cpan.org/Ticket/Display.html?id=75293 +# https://rt.cpan.org/Ticket/Display.html?id=75259 +# (Three duplicate reports for the same problem.) + +# TEST +is_deeply( + [($root == $root)], + [1], + '== comparison', +); + +# TEST +is_deeply( + [($root eq $root)], + [1], + 'eq comparison', +); + +# TEST +is_deeply( + [($root == 'not-root')], + [''], + '== negative comparison', +); + +# TEST +is_deeply( + [($root == 'not-root')], + [''], + '== negative comparison', +); + +# TEST +is_deeply( + [!($root != 'not-root')], + [''], + '!== negative comparison', +); + +# TEST +is_deeply( + [($root eq 'not-root')], + [''], + 'eq negative comparison', +); + +# TEST +is_deeply( + [!($root ne 'not-root')], + [''], + 'eq negative comparison', +); + +{ + my $doc = XML::LibXML->load_xml( string => <<'EOT' )->documentElement; + + + + +EOT + + my ($bar_elem) = $doc->findnodes('//bar'); + my ($baz_elem) = $doc->findnodes('//baz'); + + # TEST + is_deeply([$bar_elem == $baz_elem], [''], + '== comparison between two differenet nodes' + ); + + # TEST + is_deeply([$bar_elem eq $baz_elem], [''], + 'eq comparison between two differenet nodes' + ); +} +__DATA__ + diff --git a/t/72destruction.t b/t/72destruction.t new file mode 100644 index 0000000..d69efdf --- /dev/null +++ b/t/72destruction.t @@ -0,0 +1,48 @@ +use strict; +use warnings; +use Test::More; +use Scalar::Util; +use XML::LibXML; + +if (defined (&Scalar::Util::weaken)) +{ + plan tests => 1; +} +else +{ + plan skip_all => 'Need Scalar::Util::weaken'; +} + +my $is_destroyed; +BEGIN +{ + no warnings 'once', 'redefine'; + my $old = \&XML::LibXML::Element::DESTROY; + *XML::LibXML::Element::DESTROY = sub + { + $is_destroyed++; + $old->(@_); + }; +} + +# Create element... +my $root = XML::LibXML->load_xml( IO => \*DATA )->documentElement; + +# allow %hash to go out of scope quickly. +{ + my %hash = %$root; + # assignment to ensure block is not optimized away + $hash{foo} = 'phooey'; +} + +# Destroy element... +undef($root); + +# Touch the fieldhash... +my %other = %{ XML::LibXML->load_xml( string => '' )->documentElement }; + +# TEST +ok($is_destroyed, "does not leak memory"); + +__DATA__ + diff --git a/t/80registryleak.t b/t/80registryleak.t new file mode 100644 index 0000000..63f4a86 --- /dev/null +++ b/t/80registryleak.t @@ -0,0 +1,24 @@ + +use strict; +use warnings; + +use Test::More tests => 2; +use XML::LibXML; + +my $p = XML::LibXML->new(); +# TEST +ok($p, 'Parser was initialized.'); + +my $xml = < + +EOX + +{ +my $doc = $p->parse_string($xml); +my $root = $doc->documentElement; +my $child = $root->firstChild; +} + +# TEST +is (scalar(XML::LibXML::_leaked_nodes()), 0, '0 leaked nodes'); diff --git a/t/90shared_clone_failed_rt_91800.t b/t/90shared_clone_failed_rt_91800.t new file mode 100644 index 0000000..eefb624 --- /dev/null +++ b/t/90shared_clone_failed_rt_91800.t @@ -0,0 +1,50 @@ +use strict; +use warnings; + +use Test::More; +use Config; + +BEGIN +{ + my $will_run = 0; + if ( $Config{useithreads} ) + { + if ($ENV{THREAD_TEST}) + { + require threads; + require threads::shared; + $will_run = 1; + } + else + { + plan skip_all => "optional (set THREAD_TEST=1 to run these tests)"; + } + } + else + { + plan skip_all => "no ithreads in this Perl"; + } + + if ($will_run) + { + plan tests => 3; + } +} + +use XML::LibXML qw(:threads_shared); + +# TEST +ok(1, 'Loaded'); + +my $p = XML::LibXML->new(); + +# TEST +ok($p, 'Parser initted.'); + +{ + my $doc = $p->parse_string(qq{bar}); + my $cloned = threads::shared::shared_clone($doc); + + # TEST + ok(1, "Shared clone"); +} diff --git a/t/90stack.t b/t/90stack.t new file mode 100644 index 0000000..574b50c --- /dev/null +++ b/t/90stack.t @@ -0,0 +1,19 @@ +# -*- cperl -*- + +## +# This test checks that the XS code handles the perl stack correctly +# when the module loads. This failed in 5.19.6+. +# +# See: https://rt.cpan.org/Ticket/Display.html?id=92606 . + +use Test::More tests => 1; + +for (1) { + for (1,0) { + require XML::LibXML; + } +} + +# If we get this far, then all is fine. +# TEST +pass("Loading XML::LibXML works inside multiple foreach loops"); diff --git a/t/90threads.t b/t/90threads.t new file mode 100644 index 0000000..e52348e --- /dev/null +++ b/t/90threads.t @@ -0,0 +1,448 @@ +# -*- cperl -*- + +use strict; +use warnings; + +use lib './t/lib'; +use TestHelpers qw(utf8_slurp); + +use Test::More; +use Config; +use constant MAX_THREADS => 10; +use constant MAX_LOOP => 50; +# use constant PLAN => 24; +BEGIN +{ + my $will_run = 0; + if ( $Config{useithreads} ) + { + if ($ENV{THREAD_TEST}) + { + require threads; + require threads::shared; + $will_run = 1; + } + else + { + plan skip_all => "optional (set THREAD_TEST=1 to run these tests)"; + } + } + else + { + plan skip_all => "no ithreads in this Perl"; + } + + if ($will_run) + { + plan tests => 24; + } +} + +use XML::LibXML qw(:threads_shared); +# TEST +ok(1, 'Loaded'); + +my $p = XML::LibXML->new(); +# TEST +ok($p, 'Parser initted.'); + +{ +for(1..MAX_THREADS) +{ + threads->new(sub {}); +} +$_->join for(threads->list); +# TEST +ok(1, "Simple spawn threads with a parser in scope"); +} + +{ + my $grammar = <<'EOF'; + + + + + +EOF + my $r = XML::LibXML::RelaxNG->new(string=>$grammar); + for(1..MAX_THREADS) { + threads->new(sub { XML::LibXML::RelaxNG->new(string=>$grammar) }); + } + $_->join for(threads->list); + # TEST + ok(1, "RelaxNG"); +} + +{ + eval { XML::LibXML->new->parse_string('foo') }; + for(1..40) { + threads->new(sub { eval { XML::LibXML->new->parse_string('foo') } for(1..1000); 1; }); + } + $_->join for(threads->list); + # TEST + ok(1, "XML error\n"); +} + + +{ + my $doc=XML::LibXML::Document->new; + $doc->setDocumentElement($doc->createElement('root')); + $doc->getDocumentElement->setAttribute('foo','bar'); +# threads->new(sub { +# for (1..100000) { +# # a dictionary of $doc +# my $el =$doc->createElement('foo'.$_); +# $el->setAttribute('foo','bar'); +# } +# return; +# }); + for my $t_no (1..40) { + threads->new(sub { + for (1..1000) { + $doc->getDocumentElement; + } + return; + }); + } + $_->join for(threads->list); +} +# TEST +ok(1, "accessing document elements without lock"); +{ + my @docs=map { + my $doc = XML::LibXML::Document->new; + $doc->setDocumentElement($doc->createElement('root')); + $doc->getDocumentElement->setAttribute('foo','bar'); + $doc } 1..40; + for my $t_no (1..40) { + threads->new(sub { + my $doc=$docs[$t_no-1]; + for (1..10000) { + # a dictionary of $doc + my $el =$doc->createElement('foo'.$_); + $el->setAttribute('foo','bar'); + $doc->getDocumentElement->getAttribute('foo'); + $el->getAttribute('foo'); + } + return; + }); + } + $_->join for(threads->list); +} +# TEST +ok(1, "operating on different documents without lock\n"); + +# operating on the same document with a lock +{ + my $lock : shared; + my $doc=XML::LibXML::Document->new; + for my $t_no (1..40) { + threads->new(sub { + for (1..10000) { + lock $lock; # must lock since libxml2 uses + # a dictionary of $doc + my $el =$doc->createElement('foo'); + $el->setAttribute('foo','bar'); + $el->getAttribute('foo'); + } + return; + }); + } + $_->join for(threads->list); +} + + +my $xml = < + +EOF + +{ +my $doc = $p->parse_string( $xml ); +for(1..MAX_THREADS) +{ + threads->new(sub {}); +} +$_->join for(threads->list); +} +# TEST +ok(1, "Spawn threads with a document in scope"); + + +{ +my $waitfor : shared; +{ +lock $waitfor; +my $doc = $p->parse_string($xml); +for(1..MAX_THREADS) +{ + threads->new(sub { lock $waitfor; $doc->toString; }); +} +} +$_->join for(threads->list); +# TEST +ok(1, "Spawn threads that use document that has gone out of scope from where it was created"); +} + +{ +for(1..MAX_THREADS) +{ + threads->new(sub { $p->parse_string($xml) for 1..MAX_LOOP; 1; }); +} +$_->join for(threads->list); +# TEST +ok(1, "Parse a correct XML document"); +} + +my $xml_bad = < + +EOF + + +{ +for(1..MAX_THREADS) +{ + threads->new(sub { eval { my $x = $p->parse_string($xml_bad)} for(1..MAX_LOOP); 1; }); +} +$_->join for(threads->list); +# TEST +ok(1, "Parse a bad XML document\n"); +} + + +my $xml_invalid = < + +]> + +EOF + +{ +for(1..MAX_THREADS) +{ + threads->new(sub { + for (1..MAX_LOOP) { + my $x = $p->parse_string($xml_invalid); + die if $x->is_valid; + eval { $x->validate }; + die unless $@; + } + 1; + }); +} +$_->join for(threads->list); +# TEST +ok(1, "Parse an invalid XML document"); +} + +my $rngschema = < + + + + + + + +EOF + +{ +for(1..MAX_THREADS) +{ + threads->new( + sub { + for (1..MAX_LOOP) { + my $x = $p->parse_string($xml); + eval { XML::LibXML::RelaxNG->new( string => $rngschema )->validate( $x ) }; + die unless $@; + }; 1; + }); +} +$_->join for(threads->list); +# TEST +ok(1, "test RNG validation errors are thread safe"); +} + +my $xsdschema = < + + + + + +EOF + +{ +for(1..MAX_THREADS) +{ + threads->new( + sub { + for (1..MAX_LOOP) { + my $x = $p->parse_string($xml); + eval { XML::LibXML::Schema->new( string => $xsdschema )->validate( $x ) }; + die unless $@; + }; 1; + }); +} +$_->join for(threads->list); +# TEST +ok(1, "test Schema validation errors are thread safe"); +} + +my $bigfile = "docs/libxml.dbk"; +$xml = utf8_slurp($bigfile); +# TEST +ok($xml, 'bigfile was slurped fine.'); +sub use_dom +{ + my $d = shift; + my @nodes = $d->getElementsByTagName("title",1); + for(@nodes) + { + my $title = $_->toString; + } + die unless $nodes[0]->toString eq 'XML::LibXML'; +} + +{ +for(1..MAX_THREADS) { + threads->new(sub { my $dom = do { $p->parse_string($xml); }; use_dom($dom) for 1..5; 1; }); +} +$_->join for(threads->list); +# TEST +ok(1, 'Joined all threads.'); +} + +{ +package MyHandler; + +use parent 'XML::SAX::Base'; + +sub AUTOLOAD +{ +} +} + +use XML::LibXML::SAX; +$p = XML::LibXML::SAX->new( + Handler=>MyHandler->new(), +); +# TEST +ok($p, 'XML::LibXML::SAX was initted.'); + +{ +for(1..MAX_THREADS) +{ + threads->new(sub { $p->parse_string($xml) for (1..5); 1; }); +} +$_->join for threads->list; + +# TEST +ok(1, 'After XML::LibXML::SAX - join.'); +} + +$p = XML::LibXML->new( + Handler=>MyHandler->new(), +); +$p->parse_chunk($xml); +$p->parse_chunk("",1); + +{ +for(1..MAX_THREADS) +{ + threads->new(sub { +$p = XML::LibXML->new(); +$p->parse_chunk($xml); +use_dom($p->parse_chunk("",1)); +1; +}); +} +$_->join for(threads->list); +# TEST +ok(1, 'XML::LibXML thread.'); +} + +$p = XML::LibXML->new(); +# parse a big file using the same parser +{ +for(1..MAX_THREADS) +{ + threads->new(sub { +open my $fh, '<', $bigfile + or die "Cannot open '$bigfile'!"; +my $doc = $p->parse_fh($fh); +close $fh; +2; +}); +} +my @results = $_->join for(threads->list); +# TEST +ok(1, 'threads->join after opening bigfile.'); +} + +# create elements +{ +my @n = map XML::LibXML::Element->new('bar'.$_), 1..1000; +for(1..MAX_THREADS) +{ + threads->new(sub { + push @n, map XML::LibXML::Element->new('foo'.$_), 1..1000; +1; +}); +} +$_->join for(threads->list); +# TEST +ok(1, 'create elements'); +} + +{ +my $e = XML::LibXML::Element->new('foo'); +for(1..MAX_THREADS) { + threads->new(sub { + if ($_[0]==1) { + my $d = XML::LibXML::Document->new(); + $d->setDocumentElement($d->createElement('root')); + $d->documentElement->appendChild($e); + } + 1; + },$_); +} +$_->join for(threads->list); +# TEST +ok(1, "docfrag"); +} + +{ +my $e = XML::LibXML::Element->new('foo'); +my $d = XML::LibXML::Document->new(); +$d->setDocumentElement($d->createElement('root')); +for(1..MAX_THREADS) { + threads->new(sub { + if ($_[0]==1) { + $d->documentElement->appendChild($e); + } + 1; + },$_); +} +$_->join for(threads->list); +# TEST +ok(1, "docfrag2"); +} + +{ +my $e = XML::LibXML::Element->new('foo'); +for(1..MAX_THREADS) { + threads->new(sub { + if ($_[0]==1) { + XML::LibXML::Element->new('root')->appendChild($e); + } + 1; + },$_); +} +$_->join for(threads->list); +# TEST +ok(1, "docfrag3"); +} + diff --git a/t/91unique_key.t b/t/91unique_key.t new file mode 100644 index 0000000..fd5a0bb --- /dev/null +++ b/t/91unique_key.t @@ -0,0 +1,86 @@ +# -*- cperl -*- +# $Id$ + +## +# This test checks that unique_key works correctly. +# it relies on the success of t/01basic.t, t/02parse.t, +# t/04node.t and namespace tests (not done yet) + +use Test::More tests => 31; + +use XML::LibXML; +use XML::LibXML::Common qw(:libxml); +use strict; +use warnings; +my $xmlstring = q{bar}; + +my $parser = XML::LibXML->new(); +my $doc = $parser->parse_string( $xmlstring ); + +my $foo = $doc->documentElement; + +# TEST:$num_children=5; +my @children_1 = $foo->childNodes; +my @children_2 = $foo->childNodes; + +# TEST +ok($children_1[0]->can('unique_key'), 'unique_key method available') + or exit -1; + +# compare unique keys between all nodes in the above tiny document. +# Different nodes should have different keys; same nodes should have the same keys. +for my $c1(0..4){ + for my $c2(0..4){ + if($c1 == $c2){ + # TEST*$num_children + ok($children_1[$c1]->unique_key == $children_2[$c2]->unique_key, + 'Key for ' . $children_1[$c1]->nodeName . + ' matches key from same node'); + }else{ + # TEST*($num_children)*($num_children-1) + ok($children_1[$c1]->unique_key != $children_2[$c2]->unique_key, + 'Key for ' . $children_1[$c1]->nodeName . + ' does not match key for' . $children_2[$c2]->nodeName); + } + } +} + +my $foo_default_ns = XML::LibXML::Namespace->new('foo.com'); +my $foo_ns = XML::LibXML::Namespace->new('foo.com','foo'); +my $bar_default_ns = XML::LibXML::Namespace->new('bar.com'); +my $bar_ns = XML::LibXML::Namespace->new('bar.com','bar'); + +# TEST +is( + XML::LibXML::Namespace->new('foo.com')->unique_key, + XML::LibXML::Namespace->new('foo.com')->unique_key, + 'default foo ns key matches itself' +); + +# TEST +isnt( + XML::LibXML::Namespace->new('foo.com', 'foo')->unique_key, + XML::LibXML::Namespace->new('foo.com', 'bar')->unique_key, + q[keys for ns's with different prefixes don't match] +); + +# TEST +isnt( + XML::LibXML::Namespace->new('foo.com', 'foo')->unique_key, + XML::LibXML::Namespace->new('foo.com')->unique_key, + q[key for prefixed ns doesn't match key for default ns] +); + +# TEST +isnt( + XML::LibXML::Namespace->new('foo.com', 'foo')->unique_key, + XML::LibXML::Namespace->new('bar.com', 'foo')->unique_key, + q[keys for ns's with different URI's don't match] +); + +# TEST +isnt( + XML::LibXML::Namespace->new('foo.com', 'foo')->unique_key, + XML::LibXML::Namespace->new('bar.com', 'bar')->unique_key, + q[keys for ns's with different URI's and prefixes don't match] +); diff --git a/t/cpan-changes.t b/t/cpan-changes.t new file mode 100644 index 0000000..5eb4a73 --- /dev/null +++ b/t/cpan-changes.t @@ -0,0 +1,13 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +use Test::More; + +plan skip_all => "These tests are for authors only!" unless $ENV{AUTHOR_TESTING} or $ENV{RELEASE_TESTING}; + +eval 'use Test::CPAN::Changes 0.27'; +plan skip_all => 'Test::CPAN::Changes 0.27 required for this test' if $@; + +changes_ok(); diff --git a/t/data/callbacks_returning_undef.xml b/t/data/callbacks_returning_undef.xml new file mode 100644 index 0000000..9ec81b2 --- /dev/null +++ b/t/data/callbacks_returning_undef.xml @@ -0,0 +1 @@ + diff --git a/t/data/chinese.xml b/t/data/chinese.xml new file mode 100644 index 0000000..a8de33f --- /dev/null +++ b/t/data/chinese.xml @@ -0,0 +1,4 @@ + + 主題 + é—œéµè©ž + diff --git a/t/lib/Collector.pm b/t/lib/Collector.pm new file mode 100644 index 0000000..2f09aaa --- /dev/null +++ b/t/lib/Collector.pm @@ -0,0 +1,73 @@ +package Collector; + +use strict; +use warnings; + +sub new +{ + my $class = shift; + + my $self = bless {}, $class; + + $self->_init(@_); + + return $self; +} + +sub _init +{ + my $self = shift; + my $args = shift; + + $self->_reset; + + $self->_callback( $args->{gen_cb}->($self->_calc_op_callback()) ); + + $self->_init_returned_cb; + + return; +} + +sub _callback +{ + my $self = shift; + + if (@_) + { + $self->{_callback} = shift; + } + + return $self->{_callback}; +} + +sub _returned_cb +{ + my $self = shift; + + if (@_) + { + $self->{_returned_cb} = shift; + } + + return $self->{_returned_cb}; +} + +sub _init_returned_cb +{ + my $self = shift; + + $self->_returned_cb( + sub { + return $self->_callback()->(@_); + } + ); + + return; +} + +sub cb +{ + return shift->_returned_cb(); +} + +1; diff --git a/t/lib/Counter.pm b/t/lib/Counter.pm new file mode 100644 index 0000000..6871038 --- /dev/null +++ b/t/lib/Counter.pm @@ -0,0 +1,60 @@ +package Counter; + +use strict; +use warnings; + +use parent 'Collector'; + +sub _counter +{ + my $self = shift; + + if (@_) + { + $self->{_counter} = shift; + } + + return $self->{_counter}; +} + + +sub _increment +{ + my $self = shift; + + $self->_counter($self->_counter + 1); + + return; +} + +sub _reset +{ + my $self = shift; + + $self->_counter(0); + + return; +} + +sub _calc_op_callback { + my $self = shift; + + return sub { + return $self->_increment(); + }; +} + +sub test +{ + my ($self, $value, $blurb) = @_; + + local $Test::Builder::Level = $Test::Builder::Level + 1; + + Test::More::is ($self->_counter(), $value, $blurb); + + $self->_reset; + + return; +} + +1; diff --git a/t/lib/Stacker.pm b/t/lib/Stacker.pm new file mode 100644 index 0000000..d238332 --- /dev/null +++ b/t/lib/Stacker.pm @@ -0,0 +1,64 @@ +package Stacker; + +use strict; +use warnings; + +use TestHelpers qw(eq_or_diff); + +use parent 'Collector'; + +sub _stack +{ + my $self = shift; + + if (@_) + { + $self->{_stack} = shift; + } + + return $self->{_stack}; +} + +sub _push +{ + my $self = shift; + my $item = shift; + + push @{$self->_stack()}, $item; + + return; +} + +sub _reset +{ + my $self = shift; + + $self->_stack([]); + + return; +} + +sub _calc_op_callback { + my $self = shift; + + return sub { + my $item = shift; + + return $self->_push($item); + }; +} + +sub test +{ + my ($self, $value, $blurb) = @_; + + local $Test::Builder::Level = $Test::Builder::Level + 1; + + eq_or_diff ($self->_stack(), $value, $blurb); + + $self->_reset; + + return; +} + +1; diff --git a/t/lib/TestHelpers.pm b/t/lib/TestHelpers.pm new file mode 100644 index 0000000..286519e --- /dev/null +++ b/t/lib/TestHelpers.pm @@ -0,0 +1,62 @@ +package TestHelpers; + +use strict; +use warnings; + +our @EXPORT_OK = (qw(slurp utf8_slurp eq_or_diff)); + +use parent 'Exporter'; + +use Test::More (); + +sub slurp +{ + my $filename = shift; + + open my $in, "<", $filename + or die "Cannot open '$filename' for slurping - $!"; + + local $/; + my $contents = <$in>; + + close($in); + + return $contents; +} + +sub utf8_slurp +{ + my $filename = shift; + + open my $in, '<', $filename + or die "Cannot open '$filename' for slurping - $!"; + + binmode $in, ':utf8'; + + local $/; + my $contents = <$in>; + + close($in); + + return $contents; +} + +my $_eq_or_diff_ref; + +if (eval "require Test::Differences; 1;" && (!$@)) +{ + $_eq_or_diff_ref = \&Test::Differences::eq_or_diff; +} +else +{ + $_eq_or_diff_ref = \&Test::More::is_deeply; +} + +sub eq_or_diff +{ + local $Test::Builder::Level = $Test::Builder::Level + 1; + + return $_eq_or_diff_ref->(@_); +} + +1; diff --git a/t/pod-files-presence.t b/t/pod-files-presence.t new file mode 100644 index 0000000..02b069d --- /dev/null +++ b/t/pod-files-presence.t @@ -0,0 +1,37 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +use Test::More; +use File::Spec; + +if ( ! $ENV{AUTHOR_TESTING} ) { + plan skip_all => "only for AUTHORS"; +} else { + plan tests => 3; +} + +sub _is_present +{ + my $path = shift; + + my $fn = File::Spec->catfile( File::Spec->curdir(), @$path ); + + return ( ( -e $fn ) and ( ( -s $fn ) > 0 ) ); +} + +{ + # TEST*3 + foreach my $path ( + [qw#lib XML LibXML DOM.pod#], + [qw#lib XML LibXML Document.pod#], + [qw#lib XML LibXML Parser.pod#], + ) + { + if ( !ok( scalar( _is_present($path) ), "Path [@$path] exists." ) ) + { + diag('Perhaps you should run "make docs"'); + } + } +} diff --git a/t/pod.t b/t/pod.t new file mode 100644 index 0000000..6eb37b9 --- /dev/null +++ b/t/pod.t @@ -0,0 +1,9 @@ +#!perl -T + +use Test::More; + +plan skip_all => "These tests are for authors only!" unless $ENV{AUTHOR_TESTING} or $ENV{RELEASE_TESTING}; + +eval "use Test::Pod 1.14"; +plan skip_all => "Test::Pod 1.14 required for testing POD" if $@; +all_pod_files_ok(); diff --git a/t/release-kwalitee.t b/t/release-kwalitee.t new file mode 100644 index 0000000..9e67534 --- /dev/null +++ b/t/release-kwalitee.t @@ -0,0 +1,12 @@ +#!perl + +use strict; +use warnings; +use Test::More; # needed to provide plan. + +plan skip_all => "These tests are for authors only!" unless $ENV{AUTHOR_TESTING} or $ENV{RELEASE_TESTING}; + +eval { require Test::Kwalitee }; +plan skip_all => "Test::Kwalitee required for testing kwalitee: $@" if $@; + +eval "use Test::Kwalitee"; diff --git a/t/style-trailing-space.t b/t/style-trailing-space.t new file mode 100644 index 0000000..8040b9d --- /dev/null +++ b/t/style-trailing-space.t @@ -0,0 +1,32 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +use Test::More; + +plan skip_all => "These tests are for authors only!" unless $ENV{AUTHOR_TESTING} or $ENV{RELEASE_TESTING}; + +eval "use Test::TrailingSpace"; +if ($@) +{ + plan skip_all => "Test::TrailingSpace required for trailing space test."; +} +else +{ + plan tests => 1; +} + +# TODO: add .pod, .PL, the README/Changes/TODO/etc. documents and possibly +# some other stuff. +my $finder = Test::TrailingSpace->new( + { + root => '.', + filename_regex => qr/(?:\.(?:t|pm|pl|xs|c|h|txt|pod|PL)|README|Changes|TODO|LICENSE)\z/, + }, +); + +# TEST +$finder->no_trailing_space( + "No trailing space was found." +); diff --git a/test/relaxng/badschema.rng b/test/relaxng/badschema.rng new file mode 100644 index 0000000..78918ee --- /dev/null +++ b/test/relaxng/badschema.rng @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + + + diff --git a/test/relaxng/demo.rng b/test/relaxng/demo.rng new file mode 100644 index 0000000..7c9ee1b --- /dev/null +++ b/test/relaxng/demo.rng @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + + + + + + + + + diff --git a/test/relaxng/demo.xml b/test/relaxng/demo.xml new file mode 100644 index 0000000..6b0cac1 --- /dev/null +++ b/test/relaxng/demo.xml @@ -0,0 +1 @@ +hello \ No newline at end of file diff --git a/test/relaxng/demo2.rng b/test/relaxng/demo2.rng new file mode 100644 index 0000000..f528f73 --- /dev/null +++ b/test/relaxng/demo2.rng @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/test/relaxng/demo3.rng b/test/relaxng/demo3.rng new file mode 100644 index 0000000..73e1eb6 --- /dev/null +++ b/test/relaxng/demo3.rng @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/test/relaxng/demo4.rng b/test/relaxng/demo4.rng new file mode 100644 index 0000000..7ee0d0b --- /dev/null +++ b/test/relaxng/demo4.rng @@ -0,0 +1,34 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/test/relaxng/invaliddemo.xml b/test/relaxng/invaliddemo.xml new file mode 100644 index 0000000..b00b8e7 --- /dev/null +++ b/test/relaxng/invaliddemo.xml @@ -0,0 +1 @@ +hello diff --git a/test/relaxng/net.rng b/test/relaxng/net.rng new file mode 100644 index 0000000..32f8571 --- /dev/null +++ b/test/relaxng/net.rng @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff --git a/test/relaxng/schema.rng b/test/relaxng/schema.rng new file mode 100644 index 0000000..7c9ee1b --- /dev/null +++ b/test/relaxng/schema.rng @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + + + + + + + + + diff --git a/test/schema/badschema.xsd b/test/schema/badschema.xsd new file mode 100644 index 0000000..8e09830 --- /dev/null +++ b/test/schema/badschema.xsd @@ -0,0 +1,39 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/test/schema/demo.xml b/test/schema/demo.xml new file mode 100644 index 0000000..19d0790 --- /dev/null +++ b/test/schema/demo.xml @@ -0,0 +1,6 @@ + +foo +2 +1.0 +No comment ! + diff --git a/test/schema/invaliddemo.xml b/test/schema/invaliddemo.xml new file mode 100644 index 0000000..b9cb5ac --- /dev/null +++ b/test/schema/invaliddemo.xml @@ -0,0 +1,6 @@ + +foo +2 +1.0 +No comment ! + diff --git a/test/schema/net.xsd b/test/schema/net.xsd new file mode 100644 index 0000000..21bb390 --- /dev/null +++ b/test/schema/net.xsd @@ -0,0 +1,4 @@ + + + + diff --git a/test/schema/schema.xsd b/test/schema/schema.xsd new file mode 100644 index 0000000..315e06e --- /dev/null +++ b/test/schema/schema.xsd @@ -0,0 +1,39 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/test/textReader/countries.xml b/test/textReader/countries.xml new file mode 100644 index 0000000..3d5eeb2 --- /dev/null +++ b/test/textReader/countries.xml @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + + + + + diff --git a/test/xinclude/entity.txt b/test/xinclude/entity.txt new file mode 100644 index 0000000..5e79fa2 --- /dev/null +++ b/test/xinclude/entity.txt @@ -0,0 +1 @@ +IT WORKS! diff --git a/test/xinclude/test.xml b/test/xinclude/test.xml new file mode 100644 index 0000000..4e83bf4 --- /dev/null +++ b/test/xinclude/test.xml @@ -0,0 +1,4 @@ + + + + diff --git a/test/xinclude/xinclude.xml b/test/xinclude/xinclude.xml new file mode 100644 index 0000000..75c701d --- /dev/null +++ b/test/xinclude/xinclude.xml @@ -0,0 +1,7 @@ + + +]> + + &trend; + diff --git a/typemap b/typemap new file mode 100644 index 0000000..b10104b --- /dev/null +++ b/typemap @@ -0,0 +1,108 @@ +TYPEMAP +const char * T_PV +xmlParserCtxtPtr O_PARSER_OBJECT +xmlRelaxNGPtr O_OBJECT +xmlPatternPtr O_PATTERN_OBJECT +xmlRegexpPtr O_REGEXP_OBJECT +xmlSchemaPtr O_OBJECT +xmlNodeSetPtr O_OBJECT +perlxmlParserObjectPtr O_OBJECT +xmlDocPtr O_DOC_OBJECT +xmlNodePtr O_NODE_OBJECT +xmlDtdPtr O_NODE_OBJECT +xmlTextReaderPtr O_OBJECT +xmlErrorPtr O_OBJECT +xmlHashTablePtr O_OBJECT +xmlXPathCompExprPtr O_XPATH_OBJECT + +INPUT +O_OBJECT + if( sv_isobject($arg) && (SvTYPE(SvRV($arg)) == SVt_PVMG) ) + $var = INT2PTR($type,SvIV((SV*)SvRV( $arg ))); + else{ + warn( \"${Package}::$func_name() -- $var is not a blessed SV reference\" ); + XSRETURN_UNDEF; + } + +O_DOC_OBJECT + if( sv_isobject($arg) && (SvTYPE(SvRV($arg)) == SVt_PVMG) ) { + $var = INT2PTR($type,PmmSvNode($arg)); + if ( $var == NULL ) { + croak( \"${Package}::$func_name() -- $var contains no data\" ); + XSRETURN_UNDEF; + } + } + else{ + croak( \"${Package}::$func_name() -- $var is not a blessed SV reference\" ); + XSRETURN_UNDEF; + } + +O_NODE_OBJECT + if( sv_isobject($arg) && (SvTYPE(SvRV($arg)) == SVt_PVMG) ) { + $var = INT2PTR($type,PmmSvNode($arg)); + if ( $var == NULL ) { + croak( \"${Package}::$func_name() -- $var contains no data\" ); + XSRETURN_UNDEF; + } + } + else { + croak( \"${Package}::$func_name() -- $var is not a blessed SV reference\" ); + XSRETURN_UNDEF; + } + +O_PARSER_OBJECT + if( sv_isobject($arg) && (SvTYPE(SvRV($arg)) == SVt_PVMG) ) { + $var = PmmSvContext($arg); + if ( $var == NULL ) { + croak( \"${Package}::$func_name() -- $var contains no parse context\" ); + XSRETURN_UNDEF; + } + } + else { + croak( \"${Package}::$func_name() -- $var is not a blessed SV reference\" ); + XSRETURN_UNDEF; + } + +O_XPATH_OBJECT + if( sv_isobject($arg) && (SvTYPE(SvRV($arg)) == SVt_PVMG) && sv_isa($arg,\"XML::LibXML::XPathExpression\")) + $var = INT2PTR($type,SvIV((SV*)SvRV( $arg ))); + else{ + warn( \"${Package}::$func_name() -- $var is not a XML::LibXML::XPathExpression\" ); + XSRETURN_UNDEF; + } + +O_PATTERN_OBJECT + if( sv_isobject($arg) && (SvTYPE(SvRV($arg)) == SVt_PVMG) && sv_isa($arg,\"XML::LibXML::Pattern\")) + $var = INT2PTR($type,SvIV((SV*)SvRV( $arg ))); + else{ + warn( \"${Package}::$func_name() -- $var is not a XML::LibXML::Pattern\" ); + XSRETURN_UNDEF; + } + +O_REGEXP_OBJECT + if( sv_isobject($arg) && (SvTYPE(SvRV($arg)) == SVt_PVMG) && sv_isa($arg,\"XML::LibXML::RegExp\")) + $var = INT2PTR($type,SvIV((SV*)SvRV( $arg ))); + else{ + warn( \"${Package}::$func_name() -- $var is not a XML::LibXML::RegExp\" ); + XSRETURN_UNDEF; + } + + +OUTPUT + +# The Perl object is blessed into 'CLASS', which should be a +# char* having the name of the package for the blessing. +O_OBJECT + sv_setref_pv( $arg, (char *)CLASS, (void*)$var ); + +O_PATTERN_OBJECT + sv_setref_pv( $arg, (char *)\"XML::LibXML::Pattern\", (void*)$var ); + +O_REGEXP_OBJECT + sv_setref_pv( $arg, (char *)\"XML::LibXML::RegExp\", (void*)$var ); + +O_XPATH_OBJECT + sv_setref_pv( $arg, (char *)\"XML::LibXML::XPathExpression\", (void*)$var ); + +O_PARSER_OBJECT + $arg = PmmContextSv( $var ); diff --git a/xpath.c b/xpath.c new file mode 100644 index 0000000..eaaeeae --- /dev/null +++ b/xpath.c @@ -0,0 +1,406 @@ +/* $Id$ + * + * This is free software, you may use it and distribute it under the same terms as + * Perl itself. + * + * Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas +*/ + +#include +#include +#include +#include + +#include "EXTERN.h" + +#include "dom.h" +#include "xpath.h" + +void +perlDocumentFunction(xmlXPathParserContextPtr ctxt, int nargs){ + xmlXPathObjectPtr obj = NULL, obj2 = NULL; + xmlChar *base = NULL, *URI = NULL; + + + if ((nargs < 1) || (nargs > 2)) { + ctxt->error = XPATH_INVALID_ARITY; + return; + } + if (ctxt->value == NULL) { + ctxt->error = XPATH_INVALID_TYPE; + return; + } + + if (nargs == 2) { + if (ctxt->value->type != XPATH_NODESET) { + ctxt->error = XPATH_INVALID_TYPE; + return; + } + + obj2 = valuePop(ctxt); + } + + + /* first assure the XML::LibXML error handler is deactivated + otherwise strange things might happen + */ + + if (ctxt->value->type == XPATH_NODESET) { + int i; + xmlXPathObjectPtr newobj, ret; + + obj = valuePop(ctxt); + ret = xmlXPathNewNodeSet(NULL); + + if (obj->nodesetval) { + for (i = 0; i < obj->nodesetval->nodeNr; i++) { + valuePush(ctxt, + xmlXPathNewNodeSet(obj->nodesetval->nodeTab[i])); + xmlXPathStringFunction(ctxt, 1); + if (nargs == 2) { + valuePush(ctxt, xmlXPathObjectCopy(obj2)); + } else { + valuePush(ctxt, + xmlXPathNewNodeSet(obj->nodesetval->nodeTab[i])); + } + perlDocumentFunction(ctxt, 2); + newobj = valuePop(ctxt); + ret->nodesetval = xmlXPathNodeSetMerge(ret->nodesetval, + newobj->nodesetval); + xmlXPathFreeObject(newobj); + } + } + + xmlXPathFreeObject(obj); + if (obj2 != NULL) + xmlXPathFreeObject(obj2); + valuePush(ctxt, ret); + + /* reset the error old error handler before leaving + */ + return; + } + /* + * Make sure it's converted to a string + */ + xmlXPathStringFunction(ctxt, 1); + if (ctxt->value->type != XPATH_STRING) { + ctxt->error = XPATH_INVALID_TYPE; + if (obj2 != NULL) + xmlXPathFreeObject(obj2); + + /* reset the error old error handler before leaving + */ + + return; + } + obj = valuePop(ctxt); + if (obj->stringval == NULL) { + valuePush(ctxt, xmlXPathNewNodeSet(NULL)); + } else { + if ((obj2 != NULL) && (obj2->nodesetval != NULL) && + (obj2->nodesetval->nodeNr > 0)) { + xmlNodePtr target; + + target = obj2->nodesetval->nodeTab[0]; + if (target->type == XML_ATTRIBUTE_NODE) { + target = ((xmlAttrPtr) target)->parent; + } + base = xmlNodeGetBase(target->doc, target); + } else { + base = xmlNodeGetBase(ctxt->context->node->doc, ctxt->context->node); + } + URI = xmlBuildURI(obj->stringval, base); + if (base != NULL) + xmlFree(base); + if (URI == NULL) { + valuePush(ctxt, xmlXPathNewNodeSet(NULL)); + } else { + if (xmlStrEqual(ctxt->context->node->doc->URL, URI)) { + valuePush(ctxt, xmlXPathNewNodeSet((xmlNodePtr)ctxt->context->node->doc)); + } + else { + xmlDocPtr doc; + doc = xmlParseFile((const char *)URI); + if (doc == NULL) + valuePush(ctxt, xmlXPathNewNodeSet(NULL)); + else { + /* TODO: use XPointer of HTML location for fragment ID */ + /* pbm #xxx can lead to location sets, not nodesets :-) */ + valuePush(ctxt, xmlXPathNewNodeSet((xmlNodePtr) doc)); + } + } + xmlFree(URI); + } + } + xmlXPathFreeObject(obj); + if (obj2 != NULL) + xmlXPathFreeObject(obj2); + + /* reset the error old error handler before leaving + */ +} + + +/** + * Most of the code is stolen from testXPath. + * The almost only thing I added, is the storeing of the data, so + * we can access the data easily - or say more easiely than through + * libxml2. + **/ + +xmlXPathObjectPtr +domXPathFind( xmlNodePtr refNode, xmlChar * path, int to_bool ) { + xmlXPathObjectPtr res = NULL; + xmlXPathCompExprPtr comp; + comp = xmlXPathCompile( path ); + if ( comp == NULL ) { + return NULL; + } + res = domXPathCompFind(refNode,comp,to_bool); + xmlXPathFreeCompExpr(comp); + return res; +} + +xmlXPathObjectPtr +domXPathCompFind( xmlNodePtr refNode, xmlXPathCompExprPtr comp, int to_bool ) { + xmlXPathObjectPtr res = NULL; + + if ( refNode != NULL && comp != NULL ) { + xmlXPathContextPtr ctxt; + + xmlDocPtr tdoc = NULL; + xmlNodePtr froot = refNode; + + if ( comp == NULL ) { + return NULL; + } + + if ( refNode->doc == NULL ) { + /* if one XPaths a node from a fragment, libxml2 will + refuse the lookup. this is not very useful for XML + scripters. thus we need to create a temporary document + to make libxml2 do it's job correctly. + */ + tdoc = xmlNewDoc( NULL ); + + /* find refnode's root node */ + while ( froot != NULL ) { + if ( froot->parent == NULL ) { + break; + } + froot = froot->parent; + } + xmlAddChild((xmlNodePtr)tdoc, froot); + xmlSetTreeDoc(froot, tdoc); /* probably no need to clean psvi */ + froot->doc = tdoc; + /* refNode->doc = tdoc; */ + } + + /* prepare the xpath context */ + ctxt = xmlXPathNewContext( refNode->doc ); + ctxt->node = refNode; + /* get the namespace information */ + if (refNode->type == XML_DOCUMENT_NODE) { + ctxt->namespaces = xmlGetNsList( refNode->doc, + xmlDocGetRootElement( refNode->doc ) ); + } + else { + ctxt->namespaces = xmlGetNsList(refNode->doc, refNode); + } + ctxt->nsNr = 0; + if (ctxt->namespaces != NULL) { + while (ctxt->namespaces[ctxt->nsNr] != NULL) + ctxt->nsNr++; + } + + xmlXPathRegisterFunc(ctxt, + (const xmlChar *) "document", + perlDocumentFunction); + if (to_bool) { +#if LIBXML_VERSION >= 20627 + int val = xmlXPathCompiledEvalToBoolean(comp, ctxt); + res = xmlXPathNewBoolean(val); +#else + res = xmlXPathCompiledEval(comp, ctxt); + if (res!=NULL) { + int val = xmlXPathCastToBoolean(res); + xmlXPathFreeObject(res); + res = xmlXPathNewBoolean(val); + } +#endif + } else { + res = xmlXPathCompiledEval(comp, ctxt); + } + if (ctxt->namespaces != NULL) { + xmlFree( ctxt->namespaces ); + } + + xmlXPathFreeContext(ctxt); + + if ( tdoc != NULL ) { + /* after looking through a fragment, we need to drop the + fake document again */ + xmlSetTreeDoc(froot, NULL); /* probably no need to clean psvi */ + froot->doc = NULL; + froot->parent = NULL; + tdoc->children = NULL; + tdoc->last = NULL; + /* next line is not required anymore */ + /* refNode->doc = NULL; */ + + xmlFreeDoc( tdoc ); + } + } + return res; +} + +/* this function is not actually used: */ +xmlNodeSetPtr +domXPathSelect( xmlNodePtr refNode, xmlChar * path ) { + xmlNodeSetPtr rv = NULL; + xmlXPathObjectPtr res = NULL; + + res = domXPathFind( refNode, path, 0 ); + + if (res != NULL) { + /* here we have to transfer the result from the internal + structure to the return value */ + /* get the result from the query */ + /* we have to unbind the nodelist, so free object can + not kill it */ + rv = res->nodesetval; + res->nodesetval = 0 ; + } + + xmlXPathFreeObject(res); + + return rv; +} + +/* this function is not actually used: */ +xmlNodeSetPtr +domXPathCompSelect( xmlNodePtr refNode, xmlXPathCompExprPtr comp ) { + xmlNodeSetPtr rv = NULL; + xmlXPathObjectPtr res = NULL; + + res = domXPathCompFind( refNode, comp, 0 ); + + if (res != NULL) { + /* here we have to transfer the result from the internal + structure to the return value */ + /* get the result from the query */ + /* we have to unbind the nodelist, so free object can + not kill it */ + rv = res->nodesetval; + res->nodesetval = 0 ; + } + + xmlXPathFreeObject(res); + + return rv; +} + +/** + * Most of the code is stolen from testXPath. + * The almost only thing I added, is the storeing of the data, so + * we can access the data easily - or say more easiely than through + * libxml2. + **/ + +xmlXPathObjectPtr +domXPathFindCtxt( xmlXPathContextPtr ctxt, xmlChar * path, int to_bool ) { + xmlXPathObjectPtr res = NULL; + if ( ctxt->node != NULL && path != NULL ) { + xmlXPathCompExprPtr comp; + comp = xmlXPathCompile( path ); + if ( comp == NULL ) { + return NULL; + } + res = domXPathCompFindCtxt(ctxt,comp,to_bool); + xmlXPathFreeCompExpr(comp); + } + return res; +} + +xmlXPathObjectPtr +domXPathCompFindCtxt( xmlXPathContextPtr ctxt, xmlXPathCompExprPtr comp, int to_bool ) { + xmlXPathObjectPtr res = NULL; + if ( ctxt != NULL && ctxt->node != NULL && comp != NULL ) { + xmlDocPtr tdoc = NULL; + xmlNodePtr froot = ctxt->node; + + if ( ctxt->node->doc == NULL ) { + /* if one XPaths a node from a fragment, libxml2 will + refuse the lookup. this is not very useful for XML + scripters. thus we need to create a temporary document + to make libxml2 do it's job correctly. + */ + + tdoc = xmlNewDoc( NULL ); + + /* find refnode's root node */ + while ( froot != NULL ) { + if ( froot->parent == NULL ) { + break; + } + froot = froot->parent; + } + xmlAddChild((xmlNodePtr)tdoc, froot); + xmlSetTreeDoc(froot,tdoc); /* probably no need to clean psvi */ + froot->doc = tdoc; + /* ctxt->node->doc = tdoc; */ + } + if (to_bool) { +#if LIBXML_VERSION >= 20627 + int val = xmlXPathCompiledEvalToBoolean(comp, ctxt); + res = xmlXPathNewBoolean(val); +#else + res = xmlXPathCompiledEval(comp, ctxt); + if (res!=NULL) { + int val = xmlXPathCastToBoolean(res); + xmlXPathFreeObject(res); + res = xmlXPathNewBoolean(val); + } +#endif + } else { + res = xmlXPathCompiledEval(comp, ctxt); + } + if ( tdoc != NULL ) { + /* after looking through a fragment, we need to drop the + fake document again */ + xmlSetTreeDoc(froot,NULL); /* probably no need to clean psvi */ + froot->doc = NULL; + froot->parent = NULL; + tdoc->children = NULL; + tdoc->last = NULL; + if (ctxt->node) { + ctxt->node->doc = NULL; + } + xmlFreeDoc( tdoc ); + } + } + return res; +} + +xmlNodeSetPtr +domXPathSelectCtxt( xmlXPathContextPtr ctxt, xmlChar * path ) { + xmlNodeSetPtr rv = NULL; + xmlXPathObjectPtr res = NULL; + + res = domXPathFindCtxt( ctxt, path, 0 ); + + if (res != NULL) { + /* here we have to transfer the result from the internal + structure to the return value */ + /* get the result from the query */ + /* we have to unbind the nodelist, so free object can + not kill it */ + rv = res->nodesetval; + res->nodesetval = 0 ; + } + + xmlXPathFreeObject(res); + + return rv; +} diff --git a/xpath.h b/xpath.h new file mode 100644 index 0000000..d3ce1cb --- /dev/null +++ b/xpath.h @@ -0,0 +1,31 @@ +#ifndef __LIBXML_XPATH_H__ +#define __LIBXML_XPATH_H__ + +#include +#include + +void +perlDocumentFunction( xmlXPathParserContextPtr ctxt, int nargs ); + +xmlNodeSetPtr +domXPathSelect( xmlNodePtr refNode, xmlChar * xpathstring ); + +xmlXPathObjectPtr +domXPathFind( xmlNodePtr refNode, xmlChar * xpathstring, int to_bool ); + +xmlNodeSetPtr +domXPathCompSelect( xmlNodePtr refNode, xmlXPathCompExprPtr comp ); + +xmlXPathObjectPtr +domXPathCompFind( xmlNodePtr refNode, xmlXPathCompExprPtr comp, int to_bool ); + +xmlNodeSetPtr +domXPathSelectCtxt( xmlXPathContextPtr ctxt, xmlChar * xpathstring ); + +xmlXPathObjectPtr +domXPathFindCtxt( xmlXPathContextPtr ctxt, xmlChar * xpathstring, int to_bool ); + +xmlXPathObjectPtr +domXPathCompFindCtxt( xmlXPathContextPtr ctxt, xmlXPathCompExprPtr comp, int to_bool ); + +#endif diff --git a/xpathcontext.h b/xpathcontext.h new file mode 100644 index 0000000..725a847 --- /dev/null +++ b/xpathcontext.h @@ -0,0 +1,22 @@ +#ifndef __LIBXML_XPATHCONTEXT_H__ +#define __LIBXML_XPATHCONTEXT_H__ + +/* + * xpathcontext.h + * + * This file is directly included into LibXML.xs. + * + */ + +struct _XPathContextData { + SV* node; + HV* pool; + SV* varLookup; + SV* varData; +}; +typedef struct _XPathContextData XPathContextData; +typedef XPathContextData* XPathContextDataPtr; + +#define XPathContextDATA(ctxt) ((XPathContextDataPtr) ctxt->user) + +#endif