Import Upstream version 2.0207

This commit is contained in:
su-fang 2022-09-27 15:00:17 +08:00
commit 1d68b2273b
213 changed files with 62471 additions and 0 deletions

97
Av_CharPtrPtr.c Normal file
View File

@ -0,0 +1,97 @@
/* Modified from API Cookbook A Example 8 */
#ifdef __cplusplus
extern "C" {
#endif
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
#include "Av_CharPtrPtr.h" /* XS_*_charPtrPtr() */
#ifdef __cplusplus
}
#endif
#if defined(_MSC_VER)
#define _CRT_SECURE_NO_DEPRECATE 1
#define _CRT_NONSTDC_NO_DEPRECATE 1
#endif
/* Used by the INPUT typemap for char**.
* Will convert a Perl AV* (containing strings) to a C char**.
*/
char ** XS_unpack_charPtrPtr(SV* rv )
{
AV *av;
SV **ssv;
char **s;
int avlen;
int x;
if( SvROK( rv ) && (SvTYPE(SvRV(rv)) == SVt_PVAV) )
av = (AV*)SvRV(rv);
else {
return( (char**)NULL );
}
/* is it empty? */
avlen = av_len(av);
if( avlen < 0 ){
return( (char**)NULL );
}
/* av_len+2 == number of strings, plus 1 for an end-of-array sentinel.
*/
s = (char **)safemalloc( sizeof(char*) * (avlen + 2) );
if( s == NULL ){
warn("XS_unpack_charPtrPtr: unable to malloc char**");
return( (char**)NULL );
}
for( x = 0; x <= avlen; ++x ){
ssv = av_fetch( av, x, 0 );
if( ssv != NULL ){
if( SvPOK( *ssv ) ){
s[x] = (char *)safemalloc( SvCUR(*ssv) + 1 );
if( s[x] == NULL )
warn("XS_unpack_charPtrPtr: unable to malloc char*");
else
strcpy( s[x], SvPV( *ssv, PL_na ) );
}
else
warn("XS_unpack_charPtrPtr: array elem %d was not a string.", x );
}
else
s[x] = (char*)NULL;
}
s[x] = (char*)NULL; /* sentinel */
return( s );
}
/* Used by the OUTPUT typemap for char**.
* Will convert a C char** to a Perl AV*.
*/
void XS_pack_charPtrPtr(SV* st, char **s)
{
AV *av = newAV();
SV *sv;
char **c;
for( c = s; *c != NULL; ++c ){
sv = newSVpv( *c, 0 );
av_push( av, sv );
}
sv = newSVrv( st, NULL ); /* upgrade stack SV to an RV */
SvREFCNT_dec( sv ); /* discard */
SvRV( st ) = (SV*)av; /* make stack RV point at our AV */
}
/* cleanup the temporary char** from XS_unpack_charPtrPtr */
void XS_release_charPtrPtr(char **s)
{
char **c;
for( c = s; *c != NULL; ++c )
safefree( *c );
safefree( s );
}

4
Av_CharPtrPtr.h Normal file
View File

@ -0,0 +1,4 @@
char ** XS_unpack_charPtrPtr _(( SV *rv ));
void XS_pack_charPtrPtr _(( SV *st, char **s ));
void XS_release_charPtrPtr _(( char **s ));

1299
Changes Normal file

File diff suppressed because it is too large Load Diff

128
Devel.xs Normal file
View File

@ -0,0 +1,128 @@
/* $Id: Devel.xs 20 2011-10-11 02:05:01Z jo $
*
* This is free software, you may use it and distribute it under the same terms as
* Perl itself.
*
* Copyright 2011 Joachim Zobel
*
* This module gives external access to the functions needed to create
* and use XML::LibXML::Nodes from C functions. These functions are made
* accessible from Perl to have cleaner dependencies.
* The idea is to pass xmlNode * pointers (as typemapped void *) to and
* from Perl and call the functions that turns them to and from
* XML::LibXML::Nodes there.
*
* Be aware that using this module gives you the ability to easily create
* segfaults and memory leaks.
*/
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
#include "ppport.h"
#include <stdlib.h>
/* XML::LibXML stuff */
#include <libxml/xmlmemory.h>
#include "perl-libxml-mm.h"
#undef NDEBUG
#include <assert.h>
static void * xmlMemMallocAtomic(size_t size)
{
return xmlMallocAtomicLoc(size, "none", 0);
}
static int debug_memory()
{
return xmlGcMemSetup( xmlMemFree,
xmlMemMalloc,
xmlMemMallocAtomic,
xmlMemRealloc,
xmlMemStrdup);
}
MODULE = XML::LibXML::Devel PACKAGE = XML::LibXML::Devel
PROTOTYPES: DISABLE
BOOT:
if (getenv("DEBUG_MEMORY")) {
debug_memory();
}
SV*
node_to_perl( n, o = NULL )
void * n
void * o
PREINIT:
xmlNode *node = n;
xmlNode *owner = o;
CODE:
RETVAL = PmmNodeToSv(node , owner?owner->_private:NULL );
OUTPUT:
RETVAL
void *
node_from_perl( sv )
SV *sv
PREINIT:
xmlNode *n = PmmSvNodeExt(sv, 0);
CODE:
RETVAL = n;
OUTPUT:
RETVAL
void
refcnt_inc( n )
void *n
PREINIT:
xmlNode *node = n;
CODE:
PmmREFCNT_inc(((ProxyNode *)(node->_private)));
int
refcnt_dec( n )
void *n
PREINIT:
xmlNode *node = n;
CODE:
RETVAL = PmmREFCNT_dec(((ProxyNode *)(node->_private)));
OUTPUT:
RETVAL
int
refcnt( n )
void *n
PREINIT:
xmlNode *node = n;
CODE:
RETVAL = PmmREFCNT(((ProxyNode *)(node->_private)));
OUTPUT:
RETVAL
int
fix_owner( n, p )
void * n
void * p
PREINIT:
xmlNode *node = n;
xmlNode *parent = p;
CODE:
RETVAL = PmmFixOwner(node->_private , parent->_private);
OUTPUT:
RETVAL
int
mem_used()
CODE:
RETVAL = xmlMemUsed();
OUTPUT:
RETVAL

331
HACKING.txt Normal file
View File

@ -0,0 +1,331 @@
Coding Style and Conventions for Shlomi Fishs Projects
=======================================================
Shlomi Fish <shlomif@cpan.org>
:Date: 2012-05-14
:Revision: $Id$
Perl Style Guidelines
---------------------
Use Test::More for test scripts while using Test::Count annotations
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
One should use Test::More for new test scripts, while using Test::Count
( https://metacpan.org/module/Test::Count ) "# TEST" annotations. Some
of the old test scripts under +t/*.t+ had used Test.pm, but they
have all been converted to Test::More, which should be used for new code.
Any bug fixes or feature addition patches should be accompanied with
a test script to test the code.
Avoid trailing statement modifiers
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
One should not use trailing "if"s "while"s "until"s, etc.
Bad:
----------------
print "Hello\n" if $cond;
----------------
Good:
----------------
if ($cond)
{
print "Hello\n";
}
----------------
Avoid until and unless
~~~~~~~~~~~~~~~~~~~~~~
"until" and "unless" should be spelled using "if !" or "while !" or
alternatively "if not" or "while not".
Make sure you update the "MANIFEST" file with any new source files
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
All the new source files should be places in the "MANIFEST" file in the core
distribution. Note that I am considering to make use of "MANIFEST.SKIP"
instead, which would not necessitate that in general.
Make sure to update the "Changes" (or equivalently named) file
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
A patch should also patch the "Changes" file (whose name may vary) with the
explanation of the change. A Changes file should not be automatically
generated. Note that due to historical reasons, the exact format of the Changes
varies between different projects of mine and you should try to emulate the
style and format of the one of the CPAN distribution in question.
Test programs should not connect to Internet resources
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
As a general rule, test programs should not connect to Internet resources
(such as global web-sites) using LWP or WWW::Mechanize or whatever, and
should rely only on local resources. The reasons for that are that relying
on such Internet resources:
* May fail if the machine does not have a fully open Internet connection.
* Will add load to the hosts in question.
* Such Internet resources can fluctuate in their content and behaviour,
which may break the tests.
Other elements to avoid
~~~~~~~~~~~~~~~~~~~~~~~
See http://perl-begin.org/tutorials/bad-elements/ .
C Style Guidelines
------------------
Here are some style guidelines for new code to be accepted into XML-LibXML:
4 Spaces for Indentation
~~~~~~~~~~~~~~~~~~~~~~~~
The source code should be kept free of horizontal
tabs (\t, HT, \x09) and use spaces alone. Furthermore, there should be
a 4 wide space indentation inside blocks:
----------------
if (COND())
{
int i;
printf("%s\n", "COND() is successful!");
for (i=0 ; i < 10 ; i++)
{
...
}
}
----------------
Curly Braces Alignment
~~~~~~~~~~~~~~~~~~~~~~
The opening curly brace of an if-statement or a for-statement should be
placed below the statement on the same level as the other line, and the
inner block indented by 4 spaces. A good example can be found in the previous
section. Here are some bad examples:
----------------
if ( COND() ) {
/* Bad because the opening brace is on the same line.
}
----------------
----------------
if ( COND() )
{
/* Bad because the left and right braces are indented along with
the block. */
printf(....)
}
----------------
----------------
/* GNU Style - fear and loathing. */
if ( COND() )
{
printf(....)
}
----------------
Comments should precede the lines performing the action
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Comments should come one line before the line that they explain:
----------------
/* Check if it can be moved to something on the same stack */
for(dc=0;dc<c-1;dc++)
{
.
.
.
}
----------------
+TODO: Fill in+
One line clauses should be avoided
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
One should avoid one-line clauses inside the clauses of +if+, +else+,
+elsif+, +while+, etc. Instead one should wrap the single statements inside
blocks. This is to avoid common errors with extraneous semicolons:
----------------
/* Bad: */
if (COND())
printf ("%s\n", "Success!");
/* Good: */
if (COND())
{
printf ("%s\n", "Success!");
}
/* Bad: */
while (COND())
printf("%s\n", "I'm still running.");
/* Good: */
while (COND())
{
printf("%s\n", "I'm still running.");
}
----------------
Identifier Naming Conventions
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Here are some naming conventions for identifiers:
1. Please do not use capital letters (including not +CamelCase+) - use
all lowercase letters with words separated by underscores. Remember, C is
case sensitive.
2. Note, however, that comments should be phrased in proper English, with
proper Capitalization and distinction between uppercase and lowercase
letters. So should the rest of the internal and external documentation.
3. Some commonly used abbreviations:
----------------
max - maximum
num - numbers
dest - destination
src - source
ptr - pointer
val - value
iter - iterator
idx - index
i, j - indexes
----------------
Don't comment-out - use #if 0 to temporarily remove code
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Code should not be commented-out using gigantic +/* ... */+ comments. Instead,
it should be out-blocked using +#if 0...#endif+.
In Perl code, one can use the following POD paradigm to remove a block of
code:
----------------
=begin Removed
Removed code here.
=end Removed
=cut
----------------
No declarations after statements
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
One should make sure there are no declarations after statements in the ANSI
C code. If you're using gcc, you can make sure this is the case by adding
the flags "-Wdeclaration-after-statement -Werror" to "CCFLAGS" in the makefile.
Bad:
----------------
int my_func(int arg)
{
int var;
printf("%s\n", "Foo");
/* Declaration after statement. */
int other_var = var;
return;
}
----------------
Better:
----------------
int my_func(int arg)
{
int var;
int other_var;
printf("%s\n", "Foo");
other_var = var;
return;
}
----------------
Comments should have an empty space after the comment leader
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Comments in Perl, C, Python, Ruby, and other languages should have an
empty space after the comment leader.
Bad:
----------------
#Print a value.
print "Hello\n";
/*Print a value.*/
printf("%s\n", "Hello");
----------------
Better:
----------------
# Print a value.
print "Hello\n";
/* Print a value. */
printf("%s\n", "Hello");
----------------
sizeof(var) is preferable to sizeof(mytype_t)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Given the choice between +sizeof(var)+ as well as +sizeof(*var)+
and +sizeof(mytype_t)+ where +mytype_t+ is a type, the former should be
prfereable. This way, if the type of the variable changes, one does not
need to fix the +sizeof(…)+.
sizeof() must always be enclosed in parentheses
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Do not write +sizeof int+, +sizeof mystruct_t+ etc. Instead write
+sizeof(int)+, +sizeof(mystruct_t)+ .
Types should end in “_t” ; Raw struct definitions in “_struct”
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
New typedefs should call their types in names that end with a “_t”:
----------------
typedef int myint_t;
typedef struct
{
.
.
.
} mystruct_t
----------------
Prefer doing +typedef struct { ... } mystruct_t+ to declaring a struct
separately. If a struct declartion is still needed (e.g: if it contains
a pointer to itself) it should:
1. Have an identifier that ends with “_struct”.
2. Be typedefed into a type (that ends with “_t”):
+typedef struct my_struct_struct my_struct_t;+.

17
LICENSE Normal file
View File

@ -0,0 +1,17 @@
LICENSE
=======
This is free software, you may use it and distribute it under the same terms as
Perl itself.
Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr
Pajas
DISCLAIMER
==========
THIS PROGRAM IS DISTRIBUTED IN THE HOPE THAT IT WILL BE USEFUL, BUT WITHOUT ANY
WARRANTY; WITHOUT EVEN THE IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS FOR A
PARTICULAR PURPOSE.

2371
LibXML.pm Normal file

File diff suppressed because it is too large Load Diff

527
LibXML.pod Normal file
View File

@ -0,0 +1,527 @@
=head1 NAME
XML::LibXML - Perl Binding for libxml2
=head1 SYNOPSIS
use XML::LibXML;
my $dom = XML::LibXML->load_xml(string => <<'EOT');
<some-xml/>
EOT
$Version_String = XML::LibXML::LIBXML_DOTTED_VERSION;
$Version_ID = XML::LibXML::LIBXML_VERSION;
$DLL_Version = XML::LibXML::LIBXML_RUNTIME_VERSION;
$libxmlnode = XML::LibXML->import_GDOME( $node, $deep );
$gdomenode = XML::LibXML->export_GDOME( $node, $deep );
=head1 DESCRIPTION
This module is an interface to libxml2, providing XML and HTML parsers with
DOM, SAX and XMLReader interfaces, a large subset of DOM Layer 3 interface and
a XML::XPath-like interface to XPath API of libxml2. The module is split into
several packages which are not described in this section; unless stated
otherwise, you only need to C<<<<<< use XML::LibXML; >>>>>> in your programs.
Check out XML::LibXML by Example (L<<<<<< http://grantm.github.io/perl-libxml-by-example/ >>>>>>) for a tutorial.
For further information, please check the following documentation:
=over 4
=item L<<<<<< XML::LibXML::Parser >>>>>>
Parsing XML files with XML::LibXML
=item L<<<<<< XML::LibXML::DOM >>>>>>
XML::LibXML Document Object Model (DOM) Implementation
=item L<<<<<< XML::LibXML::SAX >>>>>>
XML::LibXML direct SAX parser
=item L<<<<<< XML::LibXML::Reader >>>>>>
Reading XML with a pull-parser
=item L<<<<<< XML::LibXML::Dtd >>>>>>
XML::LibXML frontend for DTD validation
=item L<<<<<< XML::LibXML::RelaxNG >>>>>>
XML::LibXML frontend for RelaxNG schema validation
=item L<<<<<< XML::LibXML::Schema >>>>>>
XML::LibXML frontend for W3C Schema schema validation
=item L<<<<<< XML::LibXML::XPathContext >>>>>>
API for evaluating XPath expressions with enhanced support for the evaluation
context
=item L<<<<<< XML::LibXML::InputCallback >>>>>>
Implementing custom URI Resolver and input callbacks
=item L<<<<<< XML::LibXML::Common >>>>>>
Common functions for XML::LibXML related Classes
=back
The nodes in the Document Object Model (DOM) are represented by the following
classes (most of which "inherit" from L<<<<<< XML::LibXML::Node >>>>>>):
=over 4
=item L<<<<<< XML::LibXML::Document >>>>>>
XML::LibXML class for DOM document nodes
=item L<<<<<< XML::LibXML::Node >>>>>>
Abstract base class for XML::LibXML DOM nodes
=item L<<<<<< XML::LibXML::Element >>>>>>
XML::LibXML class for DOM element nodes
=item L<<<<<< XML::LibXML::Text >>>>>>
XML::LibXML class for DOM text nodes
=item L<<<<<< XML::LibXML::Comment >>>>>>
XML::LibXML class for comment DOM nodes
=item L<<<<<< XML::LibXML::CDATASection >>>>>>
XML::LibXML class for DOM CDATA sections
=item L<<<<<< XML::LibXML::Attr >>>>>>
XML::LibXML DOM attribute class
=item L<<<<<< XML::LibXML::DocumentFragment >>>>>>
XML::LibXML's DOM L2 Document Fragment implementation
=item L<<<<<< XML::LibXML::Namespace >>>>>>
XML::LibXML DOM namespace nodes
=item L<<<<<< XML::LibXML::PI >>>>>>
XML::LibXML DOM processing instruction nodes
=back
=head1 ENCODINGS SUPPORT IN XML::LIBXML
Recall that since version 5.6.1, Perl distinguishes between character strings
(internally encoded in UTF-8) and so called binary data and, accordingly,
applies either character or byte semantics to them. A scalar representing a
character string is distinguished from a byte string by special flag (UTF8).
Please refer to I<<<<<< perlunicode >>>>>> for details.
XML::LibXML's API is designed to deal with many encodings of XML documents
completely transparently, so that the application using XML::LibXML can be
completely ignorant about the encoding of the XML documents it works with. On
the other hand, functions like C<<<<<< XML::LibXML::Document-E<gt>setEncoding >>>>>> give the user control over the document encoding.
To ensure the aforementioned transparency and uniformity, most functions of
XML::LibXML that work with in-memory trees accept and return data as character
strings (i.e. UTF-8 encoded with the UTF8 flag on) regardless of the original
document encoding; however, the functions related to I/O operations (i.e.
parsing and saving) operate with binary data (in the original document
encoding) obeying the encoding declaration of the XML documents.
Below we summarize basic rules and principles regarding encoding:
=over 4
=item 1.
Do NOT apply any encoding-related PerlIO layers (C<<<<<< :utf8 >>>>>> or C<<<<<< :encoding(...) >>>>>>) to file handles that are an input for the parses or an output for a
serializer of (full) XML documents. This is because the conversion of the data
to/from the internal character representation is provided by libxml2 itself
which must be able to enforce the encoding specified by the C<<<<<< E<lt>?xml version="1.0" encoding="..."?E<gt> >>>>>> declaration. Here is an example to follow:
use XML::LibXML;
# load
open my $fh, '<', 'file.xml';
binmode $fh; # drop all PerlIO layers possibly created by a use open pragma
$doc = XML::LibXML->load_xml(IO => $fh);
# save
open my $out, '>', 'out.xml';
binmode $out; # as above
$doc->toFH($out);
# or
print {$out} $doc->toString();
=item 2.
All functions working with DOM accept and return character strings (UTF-8
encoded with UTF8 flag on). E.g.
my $doc = XML::LibXML::Document->new('1.0',$some_encoding);
my $element = $doc->createElement($name);
$element->appendText($text);
$xml_fragment = $element->toString(); # returns a character string
$xml_document = $doc->toString(); # returns a byte string
where C<<<<<< $some_encoding >>>>>> is the document encoding that will be used when saving the document, and C<<<<<< $name >>>>>> and C<<<<<< $text >>>>>> contain character strings (UTF-8 encoded with UTF8 flag on). Note that the
method C<<<<<< toString >>>>>> returns XML as a character string if applied to other node than the Document
node and a byte string containing the appropriate
<?xml version="1.0" encoding="..."?>
declaration if applied to a L<<<<<< XML::LibXML::Document >>>>>>.
=item 3.
DOM methods also accept binary strings in the original encoding of the document
to which the node belongs (UTF-8 is assumed if the node is not attached to any
document). Exploiting this feature is NOT RECOMMENDED since it is considered
bad practice.
my $doc = XML::LibXML::Document->new('1.0','iso-8859-2');
my $text = $doc->createTextNode($some_latin2_encoded_byte_string);
# WORKS, BUT NOT RECOMMENDED!
=back
I<<<<<< NOTE: >>>>>> libxml2 support for many encodings is based on the iconv library. The actual
list of supported encodings may vary from platform to platform. To test if your
platform works correctly with your language encoding, build a simple document
in the particular encoding and try to parse it with XML::LibXML to see if the
parser produces any errors. Occasional crashes were reported on rare platforms
that ship with a broken version of iconv.
=head1 THREAD SUPPORT
XML::LibXML since 1.67 partially supports Perl threads in Perl >= 5.8.8.
XML::LibXML can be used with threads in two ways:
By default, all XML::LibXML classes use CLONE_SKIP class method to prevent Perl
from copying XML::LibXML::* objects when a new thread is spawn. In this mode,
all XML::LibXML::* objects are thread specific. This is the safest way to work
with XML::LibXML in threads.
Alternatively, one may use
use threads;
use XML::LibXML qw(:threads_shared);
to indicate, that all XML::LibXML node and parser objects should be shared
between the main thread and any thread spawn from there. For example, in
my $doc = XML::LibXML->load_xml(location => $filename);
my $thr = threads->new(sub{
# code working with $doc
1;
});
$thr->join;
the variable C<<<<<< $doc >>>>>> refers to the exact same XML::LibXML::Document in the spawned thread as in the
main thread.
Without using mutex locks, parallel threads may read the same document (i.e.
any node that belongs to the document), parse files, and modify different
documents.
However, if there is a chance that some of the threads will attempt to modify a
document (or even create new nodes based on that document, e.g. with C<<<<<< $doc-E<gt>createElement >>>>>>) that other threads may be reading at the same time, the user is responsible
for creating a mutex lock and using it in I<<<<<< both >>>>>> in the thread that modifies and the thread that reads:
my $doc = XML::LibXML->load_xml(location => $filename);
my $mutex : shared;
my $thr = threads->new(sub{
lock $mutex;
my $el = $doc->createElement('foo');
# ...
1;
});
{
lock $mutex;
my $root = $doc->documentElement;
say $root->name;
}
$thr->join;
Note that libxml2 uses dictionaries to store short strings and these
dictionaries are kept on a document node. Without mutex locks, it could happen
in the previous example that the thread modifies the dictionary while other
threads attempt to read from it, which could easily lead to a crash.
=head1 VERSION INFORMATION
Sometimes it is useful to figure out, for which version XML::LibXML was
compiled for. In most cases this is for debugging or to check if a given
installation meets all functionality for the package. The functions
XML::LibXML::LIBXML_DOTTED_VERSION and XML::LibXML::LIBXML_VERSION provide this
version information. Both functions simply pass through the values of the
similar named macros of libxml2. Similarly, XML::LibXML::LIBXML_RUNTIME_VERSION
returns the version of the (usually dynamically) linked libxml2.
=over 4
=item XML::LibXML::LIBXML_DOTTED_VERSION
$Version_String = XML::LibXML::LIBXML_DOTTED_VERSION;
Returns the version string of the libxml2 version XML::LibXML was compiled for.
This will be "2.6.2" for "libxml2 2.6.2".
=item XML::LibXML::LIBXML_VERSION
$Version_ID = XML::LibXML::LIBXML_VERSION;
Returns the version id of the libxml2 version XML::LibXML was compiled for.
This will be "20602" for "libxml2 2.6.2". Don't mix this version id with
$XML::LibXML::VERSION. The latter contains the version of XML::LibXML itself
while the first contains the version of libxml2 XML::LibXML was compiled for.
=item XML::LibXML::LIBXML_RUNTIME_VERSION
$DLL_Version = XML::LibXML::LIBXML_RUNTIME_VERSION;
Returns a version string of the libxml2 which is (usually dynamically) linked
by XML::LibXML. This will be "20602" for libxml2 released as "2.6.2" and
something like "20602-CVS2032" for a CVS build of libxml2.
XML::LibXML issues a warning if the version of libxml2 dynamically linked to it
is less than the version of libxml2 which it was compiled against.
=back
=head1 EXPORTS
By default the module exports all constants and functions listed in the :all
tag, described below.
=head1 EXPORT TAGS
=over 4
=item C<<<<<< :all >>>>>>
Includes the tags C<<<<<< :libxml >>>>>>, C<<<<<< :encoding >>>>>>, and C<<<<<< :ns >>>>>> described below.
=item C<<<<<< :libxml >>>>>>
Exports integer constants for DOM node types.
XML_ELEMENT_NODE => 1
XML_ATTRIBUTE_NODE => 2
XML_TEXT_NODE => 3
XML_CDATA_SECTION_NODE => 4
XML_ENTITY_REF_NODE => 5
XML_ENTITY_NODE => 6
XML_PI_NODE => 7
XML_COMMENT_NODE => 8
XML_DOCUMENT_NODE => 9
XML_DOCUMENT_TYPE_NODE => 10
XML_DOCUMENT_FRAG_NODE => 11
XML_NOTATION_NODE => 12
XML_HTML_DOCUMENT_NODE => 13
XML_DTD_NODE => 14
XML_ELEMENT_DECL => 15
XML_ATTRIBUTE_DECL => 16
XML_ENTITY_DECL => 17
XML_NAMESPACE_DECL => 18
XML_XINCLUDE_START => 19
XML_XINCLUDE_END => 20
=item C<<<<<< :encoding >>>>>>
Exports two encoding conversion functions from XML::LibXML::Common.
encodeToUTF8()
decodeFromUTF8()
=item C<<<<<< :ns >>>>>>
Exports two convenience constants: the implicit namespace of the reserved C<<<<<< xml: >>>>>> prefix, and the implicit namespace for the reserved C<<<<<< xmlns: >>>>>> prefix.
XML_XML_NS => 'http://www.w3.org/XML/1998/namespace'
XML_XMLNS_NS => 'http://www.w3.org/2000/xmlns/'
=back
=head1 RELATED MODULES
The modules described in this section are not part of the XML::LibXML package
itself. As they support some additional features, they are mentioned here.
=over 4
=item L<<<<<< XML::LibXSLT >>>>>>
XSLT 1.0 Processor using libxslt and XML::LibXML
=item L<<<<<< XML::LibXML::Iterator >>>>>>
XML::LibXML Implementation of the DOM Traversal Specification
=item L<<<<<< XML::CompactTree::XS >>>>>>
Uses XML::LibXML::Reader to very efficiently to parse XML document or element
into native Perl data structures, which are less flexible but significantly
faster to process then DOM.
=back
=head1 XML::LIBXML AND XML::GDOME
Note: I<<<<<< THE FUNCTIONS DESCRIBED HERE ARE STILL EXPERIMENTAL >>>>>>
Although both modules make use of libxml2's XML capabilities, the DOM
implementation of both modules are not compatible. But still it is possible to
exchange nodes from one DOM to the other. The concept of this exchange is
pretty similar to the function cloneNode(): The particular node is copied on
the low-level to the opposite DOM implementation.
Since the DOM implementations cannot coexist within one document, one is forced
to copy each node that should be used. Because you are always keeping two nodes
this may cause quite an impact on a machines memory usage.
XML::LibXML provides two functions to export or import GDOME nodes:
import_GDOME() and export_GDOME(). Both function have two parameters: the node
and a flag for recursive import. The flag works as in cloneNode().
The two functions allow one to export and import XML::GDOME nodes explicitly,
however, XML::LibXML also allows the transparent import of XML::GDOME nodes in
functions such as appendChild(), insertAfter() and so on. While native nodes
are automatically adopted in most functions XML::GDOME nodes are always cloned
in advance. Thus if the original node is modified after the operation, the node
in the XML::LibXML document will not have this information.
=over 4
=item import_GDOME
$libxmlnode = XML::LibXML->import_GDOME( $node, $deep );
This clones an XML::GDOME node to an XML::LibXML node explicitly.
=item export_GDOME
$gdomenode = XML::LibXML->export_GDOME( $node, $deep );
Allows one to clone an XML::LibXML node into an XML::GDOME node.
=back
=head1 CONTACTS
For bug reports, please use the CPAN request tracker on
http://rt.cpan.org/NoAuth/Bugs.html?Dist=XML-LibXML
For suggestions etc., and other issues related to XML::LibXML you may use the
perl XML mailing list (C<<<<<< perl-xml@listserv.ActiveState.com >>>>>>), where most XML-related Perl modules are discussed. In case of problems you
should check the archives of that list first. Many problems are already
discussed there. You can find the list's archives and subscription options at L<<<<<< http://aspn.activestate.com/ASPN/Mail/Browse/Threaded/perl-xml >>>>>>.
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0207
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

9713
LibXML.xs Normal file

File diff suppressed because it is too large Load Diff

223
MANIFEST Normal file
View File

@ -0,0 +1,223 @@
Av_CharPtrPtr.c
Av_CharPtrPtr.h
Changes
Devel.xs
HACKING.txt
LICENSE
LibXML.pm
LibXML.pod
LibXML.xs
MANIFEST
Makefile.PL
README
TODO
debian/changelog
debian/compat
debian/control
debian/copyright
debian/libxml-libxml-perl.docs
debian/libxml-libxml-perl.examples
debian/libxml-libxml-perl.install
debian/libxml-libxml-perl.postinst
debian/libxml-libxml-perl.prerm
debian/rules
docs/libxml.dbk
dom.c
dom.h
example/JBR-ALLENtrees.htm
example/article.xml
example/article_bad.xml
example/article_external_bad.xml
example/article_internal.xml
example/article_internal_bad.xml
example/bad.dtd
example/bad.xml
example/catalog.xml
example/cb_example.pl
example/complex/complex.dtd
example/complex/complex.xml
example/complex/complex2.xml
example/complex/dtd/f.dtd
example/complex/dtd/g.dtd
example/create-sample-html-document.pl
example/dromeds.xml
example/dtd.xml
example/enc2_latin2.html
example/enc_latin2.html
example/ext_ent.dtd
example/ns.xml
example/test.dtd
example/test.html
example/test.xhtml
example/test.xml
example/test2.xml
example/test3.xml
example/test4.xml
example/thedieline.rss
example/utf-16-1.html
example/utf-16-2.html
example/utf-16-2.xml
example/xmllibxmldocs.pl
example/xmlns/badguy.xml
example/xmlns/goodguy.xml
example/xpath.pl
example/yahoo-finance-html-with-errors.html
lib/XML/LibXML/Attr.pod
lib/XML/LibXML/AttributeHash.pm
lib/XML/LibXML/Boolean.pm
lib/XML/LibXML/CDATASection.pod
lib/XML/LibXML/Comment.pod
lib/XML/LibXML/Common.pm
lib/XML/LibXML/Common.pod
lib/XML/LibXML/DOM.pod
lib/XML/LibXML/Devel.pm
lib/XML/LibXML/Document.pod
lib/XML/LibXML/DocumentFragment.pod
lib/XML/LibXML/Dtd.pod
lib/XML/LibXML/Element.pod
lib/XML/LibXML/ErrNo.pm
lib/XML/LibXML/ErrNo.pod
lib/XML/LibXML/Error.pm
lib/XML/LibXML/Error.pod
lib/XML/LibXML/InputCallback.pod
lib/XML/LibXML/Literal.pm
lib/XML/LibXML/Namespace.pod
lib/XML/LibXML/Node.pod
lib/XML/LibXML/NodeList.pm
lib/XML/LibXML/Number.pm
lib/XML/LibXML/PI.pod
lib/XML/LibXML/Parser.pod
lib/XML/LibXML/Pattern.pod
lib/XML/LibXML/Reader.pm
lib/XML/LibXML/Reader.pod
lib/XML/LibXML/RegExp.pod
lib/XML/LibXML/RelaxNG.pod
lib/XML/LibXML/SAX.pm
lib/XML/LibXML/SAX.pod
lib/XML/LibXML/SAX/Builder.pm
lib/XML/LibXML/SAX/Builder.pod
lib/XML/LibXML/SAX/Generator.pm
lib/XML/LibXML/SAX/Parser.pm
lib/XML/LibXML/Schema.pod
lib/XML/LibXML/Text.pod
lib/XML/LibXML/XPathContext.pm
lib/XML/LibXML/XPathContext.pod
lib/XML/LibXML/XPathExpression.pod
perl-libxml-mm.c
perl-libxml-mm.h
perl-libxml-sax.c
perl-libxml-sax.h
ppport.h
scripts/Test.pm-to-Test-More.pl
scripts/bump-version-number.pl
scripts/fast-eumm.pl
scripts/prints-to-comments.pl
scripts/tag-release.pl
scripts/total-build-and-test.bash
scripts/update-HACKING-file.bash
t/00-report-prereqs.t
t/01basic.t
t/02parse.t
t/03doc.t
t/04node.t
t/05text.t
t/06elements.t
t/07dtd.t
t/08findnodes.t
t/09xpath.t
t/10ns.t
t/11memory.t
t/12html.t
t/13dtd.t
t/14sax.t
t/15nodelist.t
t/16docnodes.t
t/17callbacks.t
t/18docfree.t
t/19die_on_invalid_utf8_rt_58848.t
t/19encoding.t
t/20extras.t
t/21catalog.t
t/23rawfunctions.t
t/24c14n.t
t/25relaxng.t
t/26schema.t
t/27new_callbacks_simple.t
t/28new_callbacks_multiple.t
t/29id.t
t/30keep_blanks.t
t/30xpathcontext.t
t/31xpc_functions.t
t/32xpc_variables.t
t/35huge_mode.t
t/40reader.t
t/40reader_mem_error.t
t/41xinclude.t
t/42common.t
t/43options.t
t/44extent.t
t/45regex.t
t/46err_column.t
t/47load_xml_callbacks.t
t/48_RH5_double_free_rt83779.t
t/48_SAX_Builder_rt_91433.t
t/48_gh_pr63_detect_undef_values.t
t/48_memleak_rt_83744.t
t/48_reader_undef_warning_on_empty_str_rt106830.t
t/48_removeChild_crashes_rt_80395.t
t/48_replaceNode_DTD_nodes_rT_80521.t
t/48_rt123379_setNamespace.t
t/48_rt55000.t
t/48_rt93429_recover_2_in_html_parsing.t
t/48importing_nodes_IDs_rt_69520.t
t/49_load_html.t
t/49callbacks_returning_undef.t
t/49global_extent.t
t/50devel.t
t/51_parse_html_string_rt87089.t
t/60error_prev_chain.t
t/60struct_error.t
t/61error.t
t/62overload.t
t/71overloads.t
t/72destruction.t
t/80registryleak.t
t/90shared_clone_failed_rt_91800.t
t/90stack.t
t/90threads.t
t/91unique_key.t
t/cpan-changes.t
t/data/callbacks_returning_undef.xml
t/data/chinese.xml
t/lib/Collector.pm
t/lib/Counter.pm
t/lib/Stacker.pm
t/lib/TestHelpers.pm
t/pod-files-presence.t
t/pod.t
t/release-kwalitee.t
t/style-trailing-space.t
test/relaxng/badschema.rng
test/relaxng/demo.rng
test/relaxng/demo.xml
test/relaxng/demo2.rng
test/relaxng/demo3.rng
test/relaxng/demo4.rng
test/relaxng/invaliddemo.xml
test/relaxng/net.rng
test/relaxng/schema.rng
test/schema/badschema.xsd
test/schema/demo.xml
test/schema/invaliddemo.xml
test/schema/net.xsd
test/schema/schema.xsd
test/textReader/countries.xml
test/xinclude/entity.txt
test/xinclude/test.xml
test/xinclude/xinclude.xml
typemap
xpath.c
xpath.h
xpathcontext.h
META.yml Module YAML meta-data (added by MakeMaker)
META.json Module JSON meta-data (added by MakeMaker)

102
META.json Normal file
View File

@ -0,0 +1,102 @@
{
"abstract" : "Interface to Gnome libxml2 xml parsing and DOM library",
"author" : [
"Petr Pajas <PAJAS@cpan.org>"
],
"dynamic_config" : 0,
"generated_by" : "ExtUtils::MakeMaker version 7.60, CPAN::Meta::Converter version 2.150010",
"keywords" : [
"dom",
"html",
"libxml",
"object oriented",
"oop",
"parse",
"parser",
"parsing",
"pullparser",
"sax",
"sgml",
"xml",
"xpath",
"XPath",
"xs"
],
"license" : [
"perl_5"
],
"meta-spec" : {
"url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
"version" : 2
},
"name" : "XML-LibXML",
"no_index" : {
"directory" : [
"t",
"inc",
"xt"
]
},
"prereqs" : {
"build" : {
"requires" : {
"ExtUtils::MakeMaker" : "0"
}
},
"configure" : {
"requires" : {
"Alien::Base::Wrapper" : "0",
"Alien::Libxml2" : "0.14",
"Config" : "0",
"ExtUtils::MakeMaker" : "0"
}
},
"runtime" : {
"requires" : {
"Carp" : "0",
"DynaLoader" : "0",
"Encode" : "0",
"Exporter" : "5.57",
"IO::Handle" : "0",
"Scalar::Util" : "0",
"Tie::Hash" : "0",
"XML::NamespaceSupport" : "1.07",
"XML::SAX" : "0.11",
"XML::SAX::Base" : "0",
"XML::SAX::DocumentLocator" : "0",
"XML::SAX::Exception" : "0",
"base" : "0",
"constant" : "0",
"overload" : "0",
"parent" : "0",
"perl" : "5.008001",
"strict" : "0",
"vars" : "0",
"warnings" : "0"
}
},
"test" : {
"requires" : {
"Config" : "0",
"Errno" : "0",
"IO::File" : "0",
"IO::Handle" : "0",
"POSIX" : "0",
"Scalar::Util" : "0",
"Test::More" : "0",
"locale" : "0",
"utf8" : "0"
}
}
},
"release_status" : "stable",
"resources" : {
"repository" : {
"type" : "git",
"url" : "https://github.com/shlomif/perl-XML-LibXML.git",
"web" : "https://github.com/shlomif/perl-XML-LibXML"
}
},
"version" : "2.0207",
"x_serialization_backend" : "JSON::PP version 4.06"
}

73
META.yml Normal file
View File

@ -0,0 +1,73 @@
---
abstract: 'Interface to Gnome libxml2 xml parsing and DOM library'
author:
- 'Petr Pajas <PAJAS@cpan.org>'
build_requires:
Config: '0'
Errno: '0'
ExtUtils::MakeMaker: '0'
IO::File: '0'
IO::Handle: '0'
POSIX: '0'
Scalar::Util: '0'
Test::More: '0'
locale: '0'
utf8: '0'
configure_requires:
Alien::Base::Wrapper: '0'
Alien::Libxml2: '0.14'
Config: '0'
ExtUtils::MakeMaker: '0'
dynamic_config: 0
generated_by: 'ExtUtils::MakeMaker version 7.60, CPAN::Meta::Converter version 2.150010'
keywords:
- dom
- html
- libxml
- 'object oriented'
- oop
- parse
- parser
- parsing
- pullparser
- sax
- sgml
- xml
- xpath
- XPath
- xs
license: perl
meta-spec:
url: http://module-build.sourceforge.net/META-spec-v1.4.html
version: '1.4'
name: XML-LibXML
no_index:
directory:
- t
- inc
- xt
requires:
Carp: '0'
DynaLoader: '0'
Encode: '0'
Exporter: '5.57'
IO::Handle: '0'
Scalar::Util: '0'
Tie::Hash: '0'
XML::NamespaceSupport: '1.07'
XML::SAX: '0.11'
XML::SAX::Base: '0'
XML::SAX::DocumentLocator: '0'
XML::SAX::Exception: '0'
base: '0'
constant: '0'
overload: '0'
parent: '0'
perl: '5.008001'
strict: '0'
vars: '0'
warnings: '0'
resources:
repository: https://github.com/shlomif/perl-XML-LibXML.git
version: '2.0207'
x_serialization_backend: 'CPAN::Meta::YAML version 0.018'

811
Makefile.PL Normal file
View File

@ -0,0 +1,811 @@
# -------------------------------------------------------------------------- #
# $Id$
# -------------------------------------------------------------------------- #
# Makefile.PL for XML::LibXML.
# This file is required to generate a localized Makefile
# -------------------------------------------------------------------------- #
#
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
use strict;
use warnings;
require 5.008;
use vars qw/$DEVNULL $is_Win32 $extralibdir $skipsaxinstall/;
use ExtUtils::MakeMaker;
use Config;
use Symbol;
use File::Spec;
$|=0;
my %config;
# -------------------------------------------------------------------------- #
# -------------------------------------------------------------------------- #
# common information go to the top, so they are easier to find
# -------------------------------------------------------------------------- #
my %INFOS = (
'NAME' => 'XML::LibXML',
'VERSION_FROM' => 'LibXML.pm', # finds $VERSION
'AUTHOR' => 'Petr Pajas',
'ABSTRACT' => 'Interface to Gnome libxml2 xml parsing and DOM library',
'LICENSE' => 'perl',
(($ExtUtils::MakeMaker::VERSION >= 6.48)
? (MIN_PERL_VERSION => '5.008',)
: ()
),
'PREREQ_PM' => {
'base' => 0,
#'Hash::FieldHash' => '0.09',
'parent' => 0,
'strict' => 0,
'Test::More' => 0,
'vars' => 0,
'warnings' => 0,
'XML::NamespaceSupport' => '1.07',
'XML::SAX' => '0.11',
'XML::SAX::Base' => '0',
'XML::SAX::Exception' => '0',
},
'OBJECT' => '$(O_FILES)', # add the DOM extensions to libxml2
($ExtUtils::MakeMaker::VERSION >= 6.54)
?
(
META_MERGE =>
{
resources =>
{
repository => 'https://github.com/shlomif/perl-XML-LibXML',
homepage => 'https://github.com/shlomif/perl-XML-LibXML',
},
keywords =>
[
"dom",
"html",
"libxml",
"object oriented",
"oop",
"parse",
"parser",
"parsing",
"pullparser",
"sax",
"sgml",
"xml",
"xpath",
"XPath",
"xs",
],
},
)
: (),
);
# -------------------------------------------------------------------------- #
# -------------------------------------------------------------------------- #
use lib qw(inc);
use Devel::CheckLib;
# Prompt the user here for any paths and other configuration
# -------------------------------------------------------------------------- #
# libxml2 valid versions
# -------------------------------------------------------------------------- #
# -------------------------------------------------------------------------- #
# read extra configurations from the commandline
my %params;
@params{qw(FORCE DEBUG DEFINE EXTRALIBDIR GDOME INC LIBS SKIP_SAX_INSTALL XMLPREFIX NO_THREADS LDFLAGS)}=();
@ARGV = grep {
my ($key, $val) = split(/=/, $_, 2);
if (exists $params{$key}) {
$config{$key} = $val; 0
} else { 1 }
} @ARGV;
$extralibdir = $config{EXTRALIBDIR};
delete $config{EXTRALIBDIR};
# -------------------------------------------------------------------------- #
# -------------------------------------------------------------------------- #
# force unsupported version
my $FORCE = delete $config{FORCE};
# switch Debugging messages on
my $DEBUG = delete $config{DEBUG};
if ( $config{DEBUG} and $is_Win32 ) {
warn "win32 compile\n";
}
# -------------------------------------------------------------------------- #
# -------------------------------------------------------------------------- #
# enable perls UTF8 support if available
if ( $] >= 5.006 ) {
warn "enable native perl UTF8\n";
$config{DEFINE} .= " -DHAVE_UTF8";
}
if ( $] < 5.008 or $config{NO_THREADS} ) {
warn "disabling XML::LibXML support for Perl threads\n";
$config{DEFINE} .= " -DNO_XML_LIBXML_THREADS";
}
delete $config{NO_THREADS};
# -------------------------------------------------------------------------- #
# -------------------------------------------------------------------------- #
# get the libxml2 configuration
#
# For each release we already know which libxml2 versions work with the given
# module. All we need is to keep track of bad versions.
# If a user wants to build XML::LibXML with a newer version, there will be
# a warning, that errors are possible.
#
# We keep track of the valid versions by keeping a blacklist of intervals
# of working and not working versions where Ma.Mi.Pt <= X.Y.Z is of the same
# state.
#
# NOTE: All versions, the tests pass will be marked as working.
#
$skipsaxinstall = $ENV{SKIP_SAX_INSTALL} || $config{SKIP_SAX_INSTALL};
delete $config{SKIP_SAX_INSTALL};
unless ( $is_Win32 ) { # cannot get config in W32
my @blacklist = (
# format X,Y,Z,is_ok, X,Y,Z is version,
# is_ok applies also to *preceding* versions
[2,4,22,0],
[2,4,25,0], # broken XPath
[2,4,28,0], # unsupported, may work fine with earlier XML::LibXML versions
[2,4,29,0], # broken
[2,4,30,0], # broken
[2,5,0,0], # unsupported
[2,5,1,0], # all pre 2.5.4 version have broken attr output
[2,5,5,0], # tests pass, but known as broken
[2,5,11,0], # will partially work
[2,6,0,0], # unsupported
[2,6,4,0], # schema error
[2,6,5,0], # broken xincludes
[2,6,15,0],
# [2,6,16,1], # first version to pass all tests
[2,6,18,1], # up to 2.6.18 all ok
[2,6,19,0], # broken c14n
[2,6,20,0], # broken schemas
[2,6,24,1], # all tests pass
[2,6,25,0], # broken XPath
[2,6,32,1], # tested, works ok
[2,7,1,0], # broken release, broken utf-16
[2,7,6,1], # tested, ok
[2,7,8,1], # tested, ok
[2,9,3,1], # schema regression
[2,9,4,0], # schema regression
[2,9,9,1],
);
my $xml2cfg = "xml2-config";
my $libprefix = $ENV{XMLPREFIX} || $config{XMLPREFIX};
delete $config{XMLPREFIX}; # delete if exists, otherwise MakeMaker gets confused
if ( defined $libprefix ) {
$xml2cfg = $libprefix . '/bin/' . $xml2cfg;
}
# if a user defined INC and LIBS on the command line we must not
# override them
if ( not defined $config{LIBS} and not defined $config{INC} ) {
print "running xml2-config...";
eval {
try_libconfig( $xml2cfg, \%config, \@blacklist );
};
if ( $@ ) {
if ( $@ =~ /^VERSION|^FORCED/ ) {
my $libxml2_version;
print STDERR "The installed version of libxml2 $@ is not compatible with XML::LibXML (and probably buggy)!\n\n".
"You may continue at your own risk using 'perl Makefile.PL FORCE=1', but:\n\n".
" - don't expect XML::LibXML to build or work correctly!\n".
" - don't report errors!\n".
" - don't send patches!\n\n".
"Check the README file for more information on versions\n".
"that are tested with XML::LibXML\n\n";
if ($@ =~ /^VERSION (\S+)/) {
$libxml2_version = $1;
}
# 0 recommended by http://cpantest.grango.org (Notes for CPAN Authors)
exit 1 if !$FORCE and $libxml2_version ne "2.9.4";
}
if ( $@ =~ /^UNTESTED (\S*)/ ) {
warn "Note: libxml2 $1 was not tested with this XML::LibXML version.\n"
# warn <<"UNTESTED";
# WARNING!
# The installed version of libxml2 was not tested with this version of XML::LibXML.
# XML::LibXML may fail building or some tests may not pass.
# Expect strange errors and unstable scripts.
# Check the README file for more informations
# END OF WARNING
# UNTESTED
}
if ( not defined $config{LIBS} and not defined $config{INC} ) {
warn "didn't manage to get libxml2 config, guessing\n";
$config{LIBS} = '-L/usr/local/lib -L/usr/lib -lxml2 -lm';
$config{INC} = '-I/usr/local/include -I/usr/include';
print <<"OPT";
options:
LIBS='$config{LIBS}'
INC='$config{INC}'
If this is wrong, Re-run as:
\$ $^X Makefile.PL LIBS='-L/path/to/lib' INC='-I/path/to/include'
OPT
}
}
}
}
# -------------------------------------------------------------------------- #
# -------------------------------------------------------------------------- #
# GDOME Support
#
# GDOME Support has to get explicitly activated by setting GDOME=1 as a config param.
#
unless ( $is_Win32 ) { # cannot get config in W32
if ( $config{GDOME} ) {
my $ver;
my $state = undef; # there are three possible states:
# 1 : works
# 0 : works not
# undef : not yet tested
my @blacklist = (
[0,7,2,0],
[0,7,3,1],
);
print <<"GDOME";
GDOME Support (experimental):
XML::LibXML can parse into XML::GDOME DOMs if libgdome is installed.
This feature is optional and is not required for using XML::LibXML.
GDOME
print "running gdome-config...";
eval {
test_libconfig( "gdome-config", \%config, @blacklist );
print "NOTE: You will need to install XML::GDOME to use this feature\n";
};
if ( $@ ) {
if ( $@ =~ /^VERSION/ ) {
warn "The installed libgdome version is not supported\n";
}
elsif ( $@ =~ /^UNTESTED/ ) {
warn "The installed libgdome version was not yet tested with XML::LibXML.\n";
print "NOTE: You will need to install XML::GDOME to use this feature\n";
}
}
}
}
# -------------------------------------------------------------------------- #
my $config_LIBS_alternatives;
# -------------------------------------------------------------------------- #
# fix the ld flags
# -------------------------------------------------------------------------- #
if (!defined $config{LIBS} || $config{LIBS} !~ /\-l(?:lib)?xml2\b/) {
# in this case we are not able to run xml2-config. therefore we need to
# expand the libz as well.
if ($is_Win32) {
if( $ENV{ACTIVEPERL_MINGW} ) {
$config{LIBS} .= ' -llibxml2.lib -lzlib.lib';
}
else {
my $l = $config{LIBS};
if (!defined($l)) {
$l = '';
}
# Put several options.
$config_LIBS_alternatives = [
map { "$l $_" }
q/ -llibxml2/,
q/ -lxml2 -lzlib/,
q/ -llibxml2 -lzlib -llibgettextlib.dll/
];
$config{LIBS} = $config_LIBS_alternatives->[-1];
$config{INC} .= " -I$Config{incpath}";
}
}
else {
$config{LIBS} .= ' -lxml2 -lz -lm';
}
}
elsif ($config{LIBS} !~ /\-lz\b/ and !($is_Win32 && $config{LIBS} !~ /\-lzlib\b/)) {
# note if libxml2 has not -lz within its cflags, we should not use
# it! We should trust libxml2 and assume libz is not available on the
# current system (this is ofcourse not true with win32 systems.
# $config{LIBS} .= $is_Win32 ? ' -lzlib' :' -lz';
if ( $config{DEBUG} ) {
warn "zlib was not configured\n";
warn "set zlib\n" if $is_Win32;
}
if ($is_Win32) {
if( $ENV{ACTIVEPERL_MINGW} ) {
$config{LIBS} .= ' -lzlib.lib';
} else {
$config{LIBS} .= ' -lzlib';
}
} else {
$config{LIBS} .= ' -lz';
}
}
# -------------------------------------------------------------------------- #
# MacOS X Compiler switches have to go here
#
# if we run on MacOSX, we should check if 10.2 is running and if so,
# if the Build Target is set correctly. Otherwise we have to set it by
# hand
my $ldflags = delete $config{LDFLAGS};
if ($ldflags) {
$config{dynamic_lib} = { OTHERLDFLAGS => " $ldflags " };
}
my $incpath = $config{INC} || "";
$incpath =~ s#(\A|\s)\s*-I#$1#g;
sub _libxml_check_lib_with_config_LIBs
{
my ($lib_name, $conf_LIBS) = @_;
return
check_lib(
debug => $DEBUG,
LIBS => $conf_LIBS,
# fill in what you prompted the user for here
lib => [$lib_name],
incpath => [split(/\s/,$incpath)],
header =>
[
'libxml/c14n.h',
'libxml/catalog.h',
'libxml/entities.h',
'libxml/globals.h',
'libxml/HTMLparser.h',
'libxml/HTMLtree.h',
'libxml/parser.h',
'libxml/parserInternals.h',
'libxml/pattern.h',
'libxml/relaxng.h',
'libxml/tree.h',
'libxml/uri.h',
'libxml/valid.h',
'libxml/xinclude.h',
'libxml/xmlerror.h',
'libxml/xmlIO.h',
'libxml/xmlmemory.h',
'libxml/xmlreader.h',
'libxml/xmlregexp.h',
'libxml/xmlschemas.h',
'libxml/xmlversion.h',
'libxml/xpath.h',
'libxml/xpathInternals.h',
],
);
}
sub _libxml_check_lib {
my ($libname) = @_;
if (defined($config_LIBS_alternatives)) {
foreach my $conf_LIBS (@$config_LIBS_alternatives) {
if (_libxml_check_lib_with_config_LIBs($libname, $conf_LIBS)) {
$config{LIBS} = $conf_LIBS;
return 1;
}
}
}
else {
return _libxml_check_lib_with_config_LIBs($libname, $config{LIBS});
}
}
print "Checking for ability to link against xml2...";
if ( _libxml_check_lib('xml2') ) {
print "yes\n";
}
else {
print "no\n";
print "Checking for ability to link against libxml2...";
if ( _libxml_check_lib('libxml2')) {
print "yes\n";
}
else {
print STDERR <<"DEATH";
libxml2, zlib, and/or the Math library (-lm) have not been found.
Try setting LIBS and INC values on the command line
Or get libxml2 from
http://xmlsoft.org/
If you install via RPMs, make sure you also install the -devel
RPMs, as this is where the headers (.h files) are.
Also, you may try to run perl Makefile.PL with the DEBUG=1 parameter
to see the exact reason why the detection of libxml2 installation
failed or why Makefile.PL was not able to compile a test program.
DEATH
exit 0; # 0 recommended by http://cpantest.grango.org (Notes for CPAN Authors)
}
}
# -------------------------------------------------------------------------- #
# _NOW_ write the Makefile
WriteMakefile(
%INFOS,
%config,
);
# -------------------------------------------------------------------------- #
# -------------------------------------------------------------------------- #
# helper functions to build the Makefile
sub MY::manifypods {
package MY;
my $str = shift->SUPER::manifypods(@_);
# warn $str;
# $str =~ s/^manifypods : pure_all (.*)$/manifypods : pure_all docs $1/m;
$str .= <<"EOF";
docs-fast :
\t\@$^X -pi~ -e 's{<edition>[0-9.]*</edition>}{<edition>'"\$(VERSION)"'</edition>}' docs/libxml.dbk
\t\@$^X -Iblib/arch -Iblib/lib example/xmllibxmldocs.pl docs/libxml.dbk lib/XML/LibXML/
docs : pure_all
\t\@$^X -pi~ -e 's{<edition>[0-9.]*</edition>}{<edition>'"\$(VERSION)"'</edition>}' docs/libxml.dbk
\t\@$^X -Iblib/arch -Iblib/lib example/xmllibxmldocs.pl docs/libxml.dbk lib/XML/LibXML/
\t\@$^X -pi.old -e 's/a/a/' Makefile.PL
\t\@echo "==> YOU MUST NOW RE-RUN $^X Makefile.PL <=="
\t\@false
EOF
return $str;
}
sub MY::install {
package MY;
my $script = shift->SUPER::install(@_);
unless ( $::skipsaxinstall ) {
$script =~ s/install :: (.*)$/install :: $1 install_sax_driver/m;
$script .= <<"INSTALL";
install_sax_driver :
\t-\@\$(PERL) -I\$(INSTALLSITELIB) -I\$(INSTALLSITEARCH) -MXML::SAX -e "XML::SAX->add_parser(q(XML::LibXML::SAX::Parser))->save_parsers()"
\t-\@\$(PERL) -I\$(INSTALLSITELIB) -I\$(INSTALLSITEARCH) -MXML::SAX -e "XML::SAX->add_parser(q(XML::LibXML::SAX))->save_parsers()"
INSTALL
} else {
warn "Note: 'make install' will skip XML::LibXML::SAX registration with XML::SAX!\n";
}
return $script;
}
sub MY::test {
package MY;
my $script = shift->SUPER::test(@_);
if ( $::extralibdir ) {
$script =~ s/(\$\(TEST_VERBOSE\),)/$1 \'$::extralibdir\',/m;
}
return $script;
}
# echo perl -pi~ -e '$$_=q($(version))."\n" if /#\ VERSION TEMPLATE/ ' $(TO_INST_PM)
sub MY::postamble {
my $mpl_args = join " ", map qq["$_"], @ARGV;
my $CC =
(
exists($ENV{CC})
? "CC = $ENV{CC}"
: ''
);
my $ret = "$CC\n" . <<'MAKE_FRAG';
# emacs flymake-mode
check-syntax :
test -n "$(CHK_SOURCES)" && \
$(CCCMD) $(CCCDLFLAGS) "-I$(PERL_INC)" $(PASTHRU_DEFINE) $(DEFINE) -o /dev/null -S $(CHK_SOURCES)
# used to update version numbers in all modules
version:
@version=`grep '\# VERSION TEMPLATE' $(VERSION_FROM)`; \
echo "New version line: $$version"; \
perl -pi~ -e '$$_=q('"$$version"')."\n" if /#\ VERSION TEMPLATE/ ' $(TO_INST_PM);
runtest: pure_all
$(ABSPERLRUN) -MFile::Spec -MTest::Run::CmdLine::Iface -e \
"local @INC = @INC; unshift @INC, map { File::Spec->rel2abs(\$$_) } ('$(INST_LIB)', '$(INST_ARCHLIB)'); Test::Run::CmdLine::Iface->new({test_files => [glob(q{t/*.t})]})->run();"
distruntest: distdir
cd $(DISTVNAME) && $(ABSPERLRUN) Makefile.PL {#mpl_args#}
cd $(DISTVNAME) && $(MAKE) $(PASTHRU)
cd $(DISTVNAME) && $(MAKE) runtest $(PASTHRU)
MAKE_FRAG
$ret =~ s/{#mpl_args#}/$mpl_args/;
return $ret;
}
# -------------------------------------------------------------------------- #
# -------------------------------------------------------------------------- #
# Functions
# - these should really be in MakeMaker... But &shrug;
# -------------------------------------------------------------------------- #
use Config;
use Cwd;
use Symbol;
use File::Spec;
BEGIN {
$is_Win32 = ($^O =~ /Win32/);
if ($is_Win32) {
$DEVNULL = 'DEVNULL';
}
else {
$DEVNULL = eval { File::Spec->devnull };
if ($@) { $DEVNULL = '/dev/null' }
}
}
sub rm_f {
my @files = @_;
my @realfiles;
foreach (@files) {
push @realfiles, glob($_);
}
if (@realfiles) {
chmod(0777, @realfiles);
unlink(@realfiles);
}
}
sub rm_fr {
my @files = @_;
my @realfiles;
foreach (@files) {
push @realfiles, glob($_);
}
foreach my $file (@realfiles) {
if (-d $file) {
# warn("$file is a directory\n");
rm_fr("$file/*");
rm_fr("$file/.exists");
rmdir($file) || die "Couldn't remove $file: $!";
}
else {
# warn("removing $file\n");
chmod(0777, $file);
unlink($file);
}
}
}
sub xsystem {
my $command_aref = shift;
if ($DEBUG) {
print "@$command_aref\n";
if ((system { $command_aref->[0] } @$command_aref) != 0) {
die "system call to '@$command_aref' failed";
}
return 1;
}
open(OLDOUT, ">&STDOUT");
open(OLDERR, ">&STDERR");
open(STDOUT, ">$DEVNULL");
open(STDERR, ">$DEVNULL");
my $retval = (system { $command_aref->[0] } @$command_aref);
open(STDOUT, ">&OLDOUT");
open(STDERR, ">&OLDERR");
if ($retval != 0) {
die "system call to '@$command_aref' failed";
}
return 1;
}
sub backtick {
my $command = shift;
if ($DEBUG) {
print $command, "\n";
my $results = `$command`;
chomp $results;
if ($? != 0) {
die "backticks call to '$command' failed";
}
return $results;
}
open(OLDOUT, ">&STDOUT");
open(OLDERR, ">&STDERR");
open(STDOUT, ">$DEVNULL");
open(STDERR, ">$DEVNULL");
my $results = `$command`;
my $retval = $?;
open(STDOUT, ">&OLDOUT");
open(STDERR, ">&OLDERR");
if ($retval != 0) {
die "backticks call to '$command' failed";
}
chomp $results;
return $results;
}
sub try_link0 {
my ($src, $opt) = @_;
# local $config{LIBS};
# $config{LIBS} .= $opt;
unless (mkdir(".testlink", 0777)) {
rm_fr(".testlink");
mkdir(".testlink", 0777) || die "Cannot create .testlink dir: $!";
}
chdir(".testlink");
{
open(my $cfile, '>', 'Conftest.xs')
or die "Cannot write to file Conftest.xs: $!";
print {$cfile} <<"EOT";
#ifdef __cplusplus
extern "C" {
#endif
#include <EXTERN.h>
#include <perl.h>
#include <XSUB.h>
#ifdef __cplusplus
}
#endif
EOT
print {$cfile} $src;
print {$cfile} <<"EOT";
MODULE = Conftest PACKAGE = Conftest
PROTOTYPES: DISABLE
EOT
close($cfile);
}
{
open(my $cfile, '>', 'Conftest.pm')
or die "Cannot write to file Conftest.pm: $!";
print {$cfile} <<'EOT';
package Conftest;
$VERSION = 1.0;
require DynaLoader;
@ISA = ('DynaLoader');
bootstrap Conftest $VERSION;
1;
EOT
close($cfile);
}
{
open (my $cfile, '>', 'Makefile.PL')
or die "Cannot write to file Makefile.PL: $!";
print {$cfile} <<'EOT';
use ExtUtils::MakeMaker;
my %config;
while($_ = shift @ARGV) {
my ($k, $v) = split /=/, $_, 2;
warn("$k = $v\n");
$config{$k} = $v;
}
WriteMakefile(NAME => "Conftest", VERSION_FROM => "Conftest.pm", %config);
EOT
close($cfile);
}
{
open(my $cfile, ">test.pl")
or die "Cannot write to file test.pl: $!";
print {$cfile} <<"EOT";
use Test; BEGIN { plan tests => 1; } END { ok(\$loaded) }
use Conftest; \$loaded++;
EOT
close($cfile);
}
my $quote = $is_Win32 ? '"' : "'";
xsystem([$^X, 'Makefile.PL',
(map { "$_=$config{$_}" } keys %config),
]
);
my $def_opt = defined($opt) ? $opt : '';
# I am not sure if OTHERLDFLAGS is really required - at least the
# libraries to include do not belong here!
# I would assume a user to set OTHERLDFLAGS in the %config if they are
# really required. if done so, we don't have to pass them here ...
xsystem([$Config{make}, 'test', "OTHERLDFLAGS=${def_opt}"]);
} # end try_link0
sub try_link {
my $start_dir = cwd();
my $result = eval {
try_link0(@_);
};
warn $@ if $@;
chdir($start_dir);
rm_fr(".testlink");
return $result;
}
# -------------------------------------------------------------------------- #
# try_libconfig class a generic config file and requests --version, --libs and
# --cflags
sub try_libconfig {
my $cfgscript = shift;
my $config = shift;
my $bl = shift;
my $state = undef; # there are three possible states:
# 1 : works
# 0 : works not
# undef : not yet tested
my $ver = backtick("$cfgscript --version");
if ( defined $ver ) {
my ( $major, $minor, $point) = $ver =~ /([0-9]+).([0-9]+)\.([0-9]+)/g;
foreach my $i ( @$bl ) {
$state = $i->[3];
last if $major < $i->[0];
next if $major > $i->[0];
last if $minor < $i->[1];
next if $minor > $i->[1];
last if $point <= $i->[2];
$state = undef;
}
$config->{LIBS} = backtick("$cfgscript --libs");
$config->{INC} = backtick("$cfgscript --cflags");
if ( defined $state and $state == 0 ) {
print "failed\n";
if ($FORCE) {
die "FORCED $ver\n";
}
else {
die "VERSION $ver\n";
}
}
unless ( defined $state ) {
print "untested\n";
die "UNTESTED $ver\n";
}
print "ok ($ver)\n";
}
else {
print "failed\n";
die "FAILED\n"; # strange error
}
}
# -------------------------------------------------------------------------- #

285
README Normal file
View File

@ -0,0 +1,285 @@
INTRODUCTION
============
This module implements a Perl interface to the Gnome libxml2 library which
provides interfaces for parsing and manipulating XML files. This module allows
Perl programmers to make use of its highly capable validating XML parser and
its high performance DOM implementation.
IMPORTANT NOTES
===============
XML::LibXML was almost entirely reimplemented between version 1.40 to version
1.49. This may cause problems on some production machines. With version 1.50 a
lot of compatibility fixes were applied, so programs written for XML::LibXML
1.40 or less should run with version 1.50 again.
In 1.59, a new callback API was introduced. This new API is not compatible with
the previous one. See XML::LibXML::InputCallback manual page for details.
In 1.61 the XML::LibXML::XPathContext module, previously distributed
separately, was merged in.
An experimental support for Perl threads introduced in 1.66 has been replaced
in 1.67.
DEPENDENCIES
============
Prior to installation you MUST have installed the libxml2 library. You can get
the latest libxml2 version from
http://xmlsoft.org/
Without libxml2 installed this module will neither build nor run.
Also XML::LibXML requires the following packages:
o XML::SAX - base class for SAX parsers
o XML::NamespaceSupport - namespace support for SAX parsers
These packages are required. If one is missing some tests will fail.
Again, libxml2 is required to make XML::LibXML work. The library is not just
required to build XML::LibXML, it has to be accessible during run-time as well.
Because of this you need to make sure libxml2 is installed properly. To test
this, run the xmllint program on your system. xmllint is shipped with libxml2
and therefore should be available. For building the module you will also need
the header file for libxml2, which in binary (.rpm,.deb) etc. distributions
usually dwell in a package named libxml2-devel or similar.
INSTALLATION
============
(These instructions are for UNIX and GNU/Linux systems. For MSWin32, See Notes
for Microsoft Windows below.)
To install XML::LibXML just follow the standard installation routine for Perl
modules:
1 perl Makefile.PL
2 make
3 make test
4 make install # as superuser
Note that XML::LibXML is an XS based Perl extension and you need a C compiler
to build it.
Note also that you should rebuild XML::LibXML if you upgrade libxml2 in order
to avoid problems with possible binary incompatibilities between releases of
the library.
Notes on libxml2 versions
=========================
XML::LibXML requires at least libxml2 2.6.16 to compile and pass all tests and
at least 2.6.21 is required for XML::LibXML::Reader. For some older OS versions
this means that an update of the pre-built packages is required.
Although libxml2 claims binary compatibility between its patch levels, it is a
good idea to recompile XML::LibXML and run its tests after an upgrade of
libxml2.
If your libxml2 installation is not within your $PATH, you can pass the
XMLPREFIX=$YOURLIBXMLPREFIX parameter to Makefile.PL determining the correct
libxml2 version in use. e.g.
> perl Makefile.PL XMLPREFIX=/usr/brand-new
will ask '/usr/brand-new/bin/xml2-config' about your real libxml2
configuration.
Try to avoid setting INC and LIBS directly on the command-line, for if used,
Makefile.PL does not check the libxml2 version for compatibility with
XML::LibXML.
Which version of libxml2 should be used?
========================================
XML::LibXML is tested against a couple versions of libxml2 before it is
released. Thus there are versions of libxml2 that are known not to work
properly with XML::LibXML. The Makefile.PL keeps a blacklist of the
incompatible libxml2 versions using Alien::Libxml2. The blacklist itself is
kept inside its "alienfile" file.
If Makefile.PL detects one of the incompatible versions, it notifies the user.
It may still happen that XML::LibXML builds and pass its tests with such a
version, but that does not mean everything is OK. There will be no support at
all for blacklisted versions!
As of XML::LibXML 1.61, only versions 2.6.16 and higher are supported.
XML::LibXML will probably not compile with earlier libxml2 versions than 2.5.6.
Versions prior to 2.6.8 are known to be broken for various reasons, versions
prior to 2.1.16 exhibit problems with namespaced attributes and do not
therefore pass XML::LibXML regression tests.
It may happen that an unsupported version of libxml2 passes all tests under
certain conditions. This is no reason to assume that it shall work without
problems. If Makefile.PL marks a version of libxml2 as incompatible or broken
it is done for a good reason.
Full linking information for libxml2 can be obtained by invoking "xml2-config
--libs".
Notes for Microsoft Windows
===========================
Thanks to Randy Kobes there is a pre-compiled PPM package available on
http://theoryx5.uwinnipeg.ca/ppmpackages/
Usually it takes a little time to build the package for the latest release.
If you want to build XML::LibXML on Windows from source, you can use the
following instructions contributed by Christopher J. Madsen:
These instructions assume that you already have your system set up to compile
modules that use C components.
First, get the libxml2 binaries from http://xmlsoft.org/sources/win32/
(currently also available at http://www.zlatkovic.com/pub/libxml/).
You need:
> iconv-VERSION.win32.zip
> libxml2-VERSION.win32.zip
> zlib-VERSION.win32.zip
Download the latest version of each. (Each package will probably have a
different version.) When you extract them, you'll get directories named
iconv-VERSION.win32, libxml2-VERSION.win32, and zlib-VERSION.win32, each
containing bin, lib, and include directories.
Combine all the bin, include, and lib directories under c:\Prog\LibXML. (You
can use any directory you prefer; just adjust the instructions accordingly.)
Get the latest version of XML-LibXML from CPAN. Extract it.
Issue these commands in the XML-LibXML-VERSION directory:
> perl Makefile.PL INC=-Ic:\Prog\LibXML\include LIBS=-Lc:\Prog\LibXML\lib
> nmake
> copy c:\Prog\LibXML\bin\*.dll blib\arch\auto\XML\LibXML
> nmake test
> nmake install
(Note: Some systems use dmake instead of nmake.)
By copying the libxml2 DLLs to the arch directory, you help avoid conflicts
with other programs you may have installed that use other (possibly
incompatible) versions of those DLLs.
Notes for Mac OS X
==================
Due to a refactoring of the module, XML::LibXML will not run with some earlier
versions of Mac OS X. It appears that this is related to special linker options
for that OS prior to version 10.2.2. Since the developers do not have full
access to this OS, help/ patches from OS X gurus are highly appreciated.
It is confirmed that XML::LibXML builds and runs without problems since Mac OS
X 10.2.6.
Notes for HPUX
==============
XML::LibXML requires libxml2 2.6.16 or later. There may not exist a usable
binary libxml2 package for HPUX and XML::LibXML. If HPUX cc does not compile
libxml2 correctly, you will be forced to recompile perl with gcc (unless you
have already done that).
Additionally I received the following Note from Rozi Kovesdi:
> Here is my report if someone else runs into the same problem:
>
> Finally I am done with installing all the libraries and XML Perl
> modules
>
> The combination that worked best for me was:
> gcc
> GNU make
>
> Most importantly - before trying to install Perl modules that depend on
> libxml2:
>
> must set SHLIB_PATH to include the path to libxml2 shared library
>
> assuming that you used the default:
>
> export SHLIB=/usr/local/lib
>
> also, make sure that the config files have execute permission:
>
> /usr/local/bin/xml2-config
> /usr/local/bin/xslt-config
>
> they did not have +x after they were installed by 'make install'
> and it took me a while to realize that this was my problem
>
> or one can use:
>
> perl Makefile.PL LIBS='-L/path/to/lib' INC='-I/path/to/include'
CONTACT
=======
For bug reports, please use the issue tracker at
https://github.com/shlomif/perl-XML-LibXML/issues .
For suggestions etc. you may contact the maintainer directly at
https://www.shlomifish.org/me/contact-me/ , but in general, it is recommended
to use the mailing list given below.
For suggestions etc., and other issues related to XML::LibXML you may use the
perl XML mailing list (perl-xml@listserv.ActiveState.com), where most
XML-related Perl modules are discussed. In case of problems you should check
the archives of that list first. Many problems are already discussed there. You
can find the list's archives and subscription options at
http://aspn.activestate.com/ASPN/Mail/Browse/Threaded/perl-xml
PACKAGE HISTORY
===============
Version < 0.98 were maintained by Matt Sergeant
0.98 > Version > 1.49 were maintained by Matt Sergeant and Christian Glahn
Versions >= 1.49 are maintained by Christian Glahn
Versions > 1.56 are co-maintained by Petr Pajas
Versions >= 1.59 are provisionally maintained by Petr Pajas
PATCHES AND DEVELOPER VERSION
=============================
As XML::LibXML is open source software, help and patches are appreciated. If
you find a bug in the current release, make sure this bug still exists in the
developer version of XML::LibXML. This version can be cloned from its Git
repository. For more information about that, see:
https://github.com/shlomif/perl-XML-LibXML
Please consider all regression tests as correct. If any test fails it is most
certainly related to a bug.
If you find documentation bugs, please fix them in the libxml.dbk file, stored
in the docs directory.
KNOWN ISSUES
============
The push-parser implementation causes memory leaks.

10
TODO Normal file
View File

@ -0,0 +1,10 @@
* Fix 'line_nubers' in LibXML.pm (with a test).
* Fix the 'suppress_warnings' similarly to https://rt.cpan.org/Ticket/Display.html?id=53270 .
- add a flag to disable touching the I/O callbacks (as requested by
thread users on xml@gnome.org)
- apply user-data patch (changes the proxy node data structure)

7668
docs/libxml.dbk Normal file

File diff suppressed because it is too large Load Diff

1325
dom.c Normal file

File diff suppressed because it is too large Load Diff

281
dom.h Normal file
View File

@ -0,0 +1,281 @@
/* dom.h
* $Id$
* Author: Christian Glahn (2001)
*
* This header file provides some definitions for wrapper functions.
* These functions hide most of libxml2 code, and should make the
* code in the XS file more readable .
*
* The Functions are sorted in four parts:
* part 0 ..... general wrapper functions which do not belong
* to any of the other parts and not specified in DOM.
* part A ..... wrapper functions for general nodeaccess
* part B ..... document wrapper
* part C ..... element wrapper
*
* I did not implement any Text, CDATASection or comment wrapper functions,
* since it is pretty straightforeward to access these nodes.
*/
#ifndef __LIBXML_DOM_H__
#define __LIBXML_DOM_H__
#ifdef __cplusplus
extern "C" {
#endif
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
#include "ppport.h"
#include <libxml/tree.h>
#include <libxml/xpath.h>
#include <libxml/encoding.h>
#include <libxml/xmlerror.h>
#include <libxml/xmlmemory.h>
#include <libxml/parser.h>
#include <libxml/parserInternals.h>
#include <libxml/xmlIO.h>
#include <libxml/xpathInternals.h>
#include <libxml/globals.h>
#include <stdio.h>
#ifdef __cplusplus
}
#endif
/**
* part 0:
*
* unsortet.
**/
void
domReconcileNs(xmlNodePtr tree);
/**
* NAME domParseChar
* TYPE function
* SYNOPSIS
* int utf8char = domParseChar( curchar, &len );
*
* The current char value, if using UTF-8 this may actually span
* multiple bytes in the given string. This function parses an utf8
* character from a string into a UTF8 character (an integer). It uses
* a slightly modified version of libxml2's character parser. libxml2
* itself does not provide any function to parse characters dircetly
* from a string and test if they are valid utf8 characters.
*
* XML::LibXML uses this function rather than perls native UTF8
* support for two reasons:
* 1) perls UTF8 handling functions often lead to encoding errors,
* which partly comes, that they are badly documented.
* 2) not all perl versions XML::LibXML intends to run with have native
* UTF8 support.
*
* domParseChar() allows to use the very same code with all versions
* of perl :)
*
* Returns the current char value and its length
*
* NOTE: If the character passed to this function is not a UTF
* character, the return value will be 0 and the length of the
* character is -1!
*/
int
domParseChar( xmlChar *characters, int *len );
xmlNodePtr
domReadWellBalancedString( xmlDocPtr doc, xmlChar* string, int repair );
/**
* NAME domIsParent
* TYPE function
*
* tests if a node is an ancestor of another node
*
* SYNOPSIS
* if ( domIsParent(cur, ref) ) ...
*
* this function is very useful to resolve if an operation would cause
* circular references.
*
* the function returns 1 if the ref node is a parent of the cur node.
*/
int
domIsParent( xmlNodePtr cur, xmlNodePtr ref );
/**
* NAME domTestHierarchy
* TYPE function
*
* tests the general hierarchy error
*
* SYNOPSIS
* if ( domTestHierarchy(cur, ref) ) ...
*
* this function tests the general hierarchy error.
* it tests if the ref node would cause any hierarchical error for
* cur node. the function evaluates domIsParent() internally.
*
* the function will retrun 1 if there is no hierarchical error found.
* otherwise it returns 0.
*/
int
domTestHierarchy( xmlNodePtr cur, xmlNodePtr ref );
/**
* NAME domTestDocument
* TYPE function
* SYNOPSIS
* if ( domTestDocument(cur, ref) )...
*
* this function extends the domTestHierarchy() function. It tests if the
* cur node is a document and if so, it will check if the ref node can be
* inserted. (e.g. Attribute or Element nodes must not be appended to a
* document node)
*/
int
domTestDocument( xmlNodePtr cur, xmlNodePtr ref );
/**
* NAME domAddNodeToList
* TYPE function
* SYNOPSIS
* domAddNodeToList( cur, prevNode, nextNode )
*
* This function inserts a node between the two nodes prevNode
* and nextNode. prevNode and nextNode MUST be adjacent nodes,
* otherwise the function leads into undefined states.
* Either prevNode or nextNode can be NULL to mark, that the
* node has to be inserted to the beginning or the end of the
* nodelist. in such case the given reference node has to be
* first or the last node in the list.
*
* if prevNode is the same node as cur node (or in case of a
* Fragment its first child) only the parent information will
* get updated.
*
* The function behaves different to libxml2's list functions.
* The function is aware about document fragments.
* the function does not perform any text node normalization!
*
* NOTE: this function does not perform any highlevel
* errorhandling. use this function with caution, since it can
* lead into undefined states.
*
* the function will return 1 if the cur node is appended to
* the list. otherwise the function returns 0.
*/
int
domAddNodeToList( xmlNodePtr cur, xmlNodePtr prev, xmlNodePtr next );
/**
* part A:
*
* class Node
**/
/* A.1 DOM specified section */
xmlChar *
domName( xmlNodePtr node );
void
domSetName( xmlNodePtr node, xmlChar* name );
xmlNodePtr
domAppendChild( xmlNodePtr self,
xmlNodePtr newChild );
xmlNodePtr
domReplaceChild( xmlNodePtr self,
xmlNodePtr newChlid,
xmlNodePtr oldChild );
xmlNodePtr
domRemoveChild( xmlNodePtr self,
xmlNodePtr Child );
xmlNodePtr
domInsertBefore( xmlNodePtr self,
xmlNodePtr newChild,
xmlNodePtr refChild );
xmlNodePtr
domInsertAfter( xmlNodePtr self,
xmlNodePtr newChild,
xmlNodePtr refChild );
/* A.3 extra functionality not specified in DOM L1/2*/
xmlChar*
domGetNodeValue( xmlNodePtr self );
void
domSetNodeValue( xmlNodePtr self, xmlChar* value );
xmlNodePtr
domReplaceNode( xmlNodePtr old, xmlNodePtr new );
/**
* part B:
*
* class Document
**/
/**
* NAME domImportNode
* TYPE function
* SYNOPSIS
* node = domImportNode( document, node, move, reconcileNS);
*
* the function will import a node to the given document. it will work safe
* with namespaces and subtrees.
*
* if move is set to 1, then the node will be entirely removed from its
* original document. if move is set to 0, the node will be copied with the
* deep option.
*
* if reconcileNS is 1, namespaces are reconciled.
*
* the function will return the imported node on success. otherwise NULL
* is returned
*/
xmlNodePtr
domImportNode( xmlDocPtr document, xmlNodePtr node, int move, int reconcileNS );
/**
* part C:
*
* class Element
**/
xmlNodeSetPtr
domGetElementsByTagName( xmlNodePtr self, xmlChar* name );
xmlNodeSetPtr
domGetElementsByTagNameNS( xmlNodePtr self, xmlChar* nsURI, xmlChar* name );
xmlNsPtr
domNewNs ( xmlNodePtr elem , xmlChar *prefix, xmlChar *href );
xmlAttrPtr
domGetAttrNode(xmlNodePtr node, const xmlChar *qname);
xmlAttrPtr
domSetAttributeNode( xmlNodePtr node , xmlAttrPtr attr );
int
domNodeNormalize( xmlNodePtr node );
int
domNodeNormalizeList( xmlNodePtr nodelist );
int
domRemoveNsRefs(xmlNodePtr tree, xmlNsPtr ns);
void
domAttrSerializeContent(xmlBufferPtr buffer, xmlAttrPtr attr);
void
domClearPSVI(xmlNodePtr tree);
#endif

601
example/JBR-ALLENtrees.htm Normal file
View File

@ -0,0 +1,601 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<!-- W3C Validator filename: E:/Web/WS-JBR/JBR-ALLENtrees.htm -->
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
<title>ALLEN Descendancies</title>
<link rel="stylesheet" href="JBRindex.css" type="text/css" />
<style type="text/css" media="screen,print"> </style>
</head>
<body>
<div id="PageContainer"> <!-- this container, used for every page, allows reference font size fine tuning etc.-->
<div class="pageHead">
<h1>The ALLEN Patrilineage Descendancies</h1>
<div class="navTop">
<ul>
<li><a href="mailto:john@johnbrobb.com">Email&nbsp;JBR</a></li>
<li><a href="index.html">JBRobb Home</a></li>
<li><a href="JBRdna.htm">About DNA Testing</a></li>
<li><a href="JBR-ALLEN.htm">The ALLEN Patrilineage Page</a></li>
</ul>
</div>
</div>
<!-- start pageRight -->
<div class="pageRightW66">
<h3 class="marginTop1" id="PostedDescendancies">Posted Patrilineage Pedigrees &amp; Descendancies</h3>
<div class="surPedFont"> <!-- start surPed section -->
<p class="indented">
The following ancestral ALLEN descendancies have been contributed by researchers of this ALLEN patrilineage, and are generally substantiated with
evidence. Each descendancy begins with the earliest known male ancestor of a particular sublineange and continues down to the tested male descendant.
Since this DNA patrilineage project is focused on tested or testable male ALLENs, these descendancy trees have been pruned not only of daughters, but
also of male lines which are known to have gone extinct or &ldquo;daughtered out&rdquo;. However, in some instances complete reconstructed families of
the first generation or two will be included because of their broad-based genealogical interest; in such cases males known or presumed to have died
without children will be flagged &ldquo;no known issue&rdquo;, or &ldquo;(NKI)&rdquo;.</p>
<p class="indented">
The information provided for each male ALLEN should be sufficient in most cases to uniquely identify him in the USCensus and other readily available
sources. These data comprise (insofar as is known): date and place of birth, date and place of death, the name(s) of his wife (or wives) and the
date and place of marriage. Indefinite dates are always qualified as either approximate (&ldquo;abt&rdquo;, &ldquo;bef&rdquo;, &ldquo;aft&rdquo;, or
&ldquo;by&rdquo;) or merely estimated (&ldquo;say&rdquo;). Approximated dates imply supporting evidence which merely fails of complete accuracy, while
&ldquo;say&rdquo; dates are guesstimates based on typical patterns of the time, place, and social group. In some places, I have adjusted dates provided
by the sources to conform to these conventions, taking into consideration the accompanying evidence.</p>
<p class="indented">
The yDNA-tested male descendants are flagged below with their Project #s and the &ldquo;handle&rdquo; of the Principal Researcher, e.g.&nbsp;
<span class="clr-Descendant">Donald-05, Camilla-06</span>).</p>
<p class="indented">
Inferences about the placement of the distinctive yDNA mutations of project members have been interwoven with their descendancies, below, <span
class="clr-Mutation_sentence">in red</span>; please note, however, the careful qualifications where they appear. Most inferences drawn from DNA evidence
are probabilistic in nature and one needs to keep an open mind about alternative interpretations, just as one does with the genealogy itself.</p>
<!-- Some id= Descendant targets are placed above the actual D-xx text to better fill the screen window with relevant text -->
<p class="shim">invisible writing</p>
<h2>Early ALLENs&mdash;Possible Project Ancestors</h2>
<p class="shim">invisible writing</p>
<p class="surPedHeader">1-William of York County, Virginia, died about 1677</p>
<p class="sourcesparagraph">(<strong>Sources</strong>:&nbsp;<a href="JBR-ALLEN.htm#sourceHT">HT ALLEN</a>)</p>
<p>1--William Allen (- bef 12Nov1677 YorkCoVA)&nbsp;&nbsp;m. Judith</p>
<p>|--2-Hudson Allen [no known descendants]</p>
<p>|--2-William Allen [supposed to be father of William of New Kent &#133; Albemarle Cos, below]</p>
<p class="shim">invisible writing</p>
<h2 id="Allen_Descendancies">Known ALLEN Project Sublineages</h2>
<p id="Robert1680" class="shim">invisible writing</p>
<p class="surPedHeader">1-Robert Allen of New Kent, Hanover, Goochland, and Henrico Counties, VA, born say 1680</p>
<p class="sourcesparagraph">(<strong>Sources</strong>:&nbsp;This Robert descendancy is the main focus of <a href="JBR-ALLEN.htm#sourceMILLER">MILLER</a>,
and except for the final generations connecting to the present, has been derived from MILLER by <a href="mailto:john@johnbrobb.com">John B. Robb</a>.
&nbsp;It should be noted, however, that Melba Allen of Mississippi has an entirely different reconstruction of the top of this Allen tree. &nbsp; <a
href="JBR-ALLEN.htm#A-08">Donna Bailey</a>, who is a descendant of this line through Robert->Robert->Josiah of South Carolina, has furnished additional
material for her more recent ancestors. </p>
<p>1-Robert Allen (say 1680 - abt 1756 HenricoCoVA)</p>
<p><span class="clr-Mutation_sentence">The mutation DYS447- probably appears early downstream of the patriarch Robert.</span></p>
<p>|--m1. say 1708 Elizabeth [WALKER?]</p>
<p>|--2-Robert Allen (say 1709 NewKentCoVA - abt 1783 CaswellCoNC)</p>
<p>|&nbsp;&nbsp;&nbsp;|--m1. Elizabeth [YOUNG??]</p>
<p>|&nbsp;&nbsp;&nbsp;|--3-Young Allen (by 1732 - 1774 WakeCoNC)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. Martha [COLEMAN?]</p>
<p class="clr-Comment">|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;MILLER adds sons Robert &amp; Drury citing Young&rsquo;s will
but that document lists only the sons below;</p>
<p class="clr-Comment">|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;the bogus Drury was supposedly the progenitor of one of the IredellCoNC Allen families.</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-William Allen (say 1757 - after 1791)&mdash;no known issue</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-Coleman Allen (say 1760 - [GA?])&mdash;Colemans &amp; other descendants turn up in GA</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-Robin Allen (say 1763 -)&mdash;no known issue</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-Young Allen (say 1766 -)&mdash;no known issue</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-Josiah Allen (say 1766 - 1823 AnsonCoNC)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. 1794 Nancy WHITE, GranvilleCoNC&mdash;Sons William, Joshua, Young, &amp; Joseph</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-David Allen (abt 1771 -)&mdash;removed to AnsonCoNC; no known issue</p>
<p id="A-08">
|&nbsp;&nbsp;&nbsp;|--3-Josiah Allen (abt 1733 - 1781 EdgefieldCoSC)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m1. say 1757 _?_</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-James Allen (abt 1758 - 1826 MontgomeryCoAL)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m1. _?_</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--5-Lemuel Young Allen (abt 1797 EdgefieldCoSC - abt 1851 RuskCoTX)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m2. Rebecca HUDSON</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--5- ...who had sons James Jefferson, Alsey, Madison, and Josiah (all minors in 1826)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-Robert Allen (abt 1760 - 1829 MontgomeryCoAL)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. Nancy HAHM, in SC&mdash;Sons Dean, Wade Hampton, James, Robert, George</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-Josiah Allen (abt 1762 - 1796 EdgefieldCoSC)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. Jennet&mdash;Sons William Winn, and Benjamin</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-Young Allen (abt 1764 - abt 1834 EdgefieldCoSC)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. Susanna RICHARDSON&mdash; Son Aaron</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-Drury Allen (1777 EdgefieldCoSC - btw 26Apr1856 and 4Feb1857 ClarkeCoAL)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m1. abt 1799 [Margaret WAITE], EdgefieldCoSC</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--5-Josiah Allen 1800 EdgefieldCoSC - 1891 JasperCoMS)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. abt 1833 Elizabeth HOWELL, RankinCoMS</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--5-Henry Allen abt 1801 EdgefieldCoSC - 1891 JasperCoMS)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m2. Sarah LATHAM</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--5-Drury Allen 1806 EdgefieldCoSC - 1908 ErathCoTX)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m1. 1826 Elizabeth BUCKELEW, ClarkeCoAL</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m2. 1841 Elizabeth (_?_) WOOD, ClarkeCoAL</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--6-George Washington Allen (1851 ClarkeCoAL - 1943 ErathCoTX)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. 1869 Sarah Elizabeth JORDAN, ClarkeCoAL</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--7-Henry Hampton Allen (1874 ClarkeCoAL - 1964 HoodCoTX)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. 1892 Ida Leona Batts</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--8-Boyd Earl Allen (1910 ErathCoTX - 1990 ErathCoTX)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--9-Danny Ray Allen &nbsp;&nbsp;&nbsp;
<span class="clr-Descendant">*** Donna-08 *** &nbsp;&nbsp;&nbsp;</span> <span class="clr-Mutation_sentence">(has the DYS447- mutation)</span></p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m2. 1821 Nanch/Fannie WEBB, ClarkeCoAL</p>
<p>|&nbsp;&nbsp;&nbsp;|--3-Robert Allen (abt 1735 - 1801 RichmondCoGA)</p>
<p class="clr-Comment">|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Robert settled initially with brother Josiah in old ColletonCoSC (now extinct)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. abt 1760 Elizabeth WEST&mdash;Sons West, Jesse, Robert, Drury, Young, William</p>
<p class="clr-Comment">|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;The wide gap between 3-Robert &amp; 3-Drury suggests that there may have been 2 fathers Robert;</p>
<p class="clr-Comment">|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;there is solid evidence at least that 3-Drury was the son of 2-Robert who died in CaswellCoNC..</p>
<p>|&nbsp;&nbsp;&nbsp;|--3-Drury Allen (1749 LunenburgCoVA - 1826 PikeCoGA)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. Elizabeth YARBOROUGH</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-Josiah Allen (abt 1769 [ButeCoNC?] - abt 1816 GreeneCoGA)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. 1791 Elizabeth BROWNING&mdash;Sons Robert A., Young Drury, and Pleasant Josiah</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-Clement Allen (say 1773 - bef 1823 PikeCoGA</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. Nancy McKISSICK&mdash;Sons Josiah, Young Stokes, John Cunningham, Drury, and James.</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-Stokes Allen (say 1775 - by 1831 PikeCoGA</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. Susanna (GRAVES) FOUSHEE&mdash;Sons Clement Young, Thomas G., Stephen W., Young Drewry</p>
<p>|&nbsp;&nbsp;&nbsp;|--m2. by 1765 Hannah (EDWARDS) HUDSON</p>
<p>|--2-Benjamin Allen (twin) (1711 NewKentCoVA - aft 1766 [HenricoCoVA?])&mdash;no known issue</p>
<p>|--2-Joseph Allen (twin) (1711 NewKentCoVA - 1771 CharlotteCoVA)</p>
<p>|&nbsp;&nbsp;&nbsp;|--m. Sarah HUNT</p>
<p>|&nbsp;&nbsp;&nbsp;|--3-Memucan Hunt Allen (1753 - abt 1833 AndersonCoKY)&mdash;sons Joseph and Charles</p>
<p>|&nbsp;&nbsp;&nbsp;|--3-Memucan had brothers Benjamin, Robert, and Charles; the latter came with him to KY.</p>
<p>|--2-Drury Allen (1714 NewKentCoVA - abt 1803 PersonCoNC)</p>
<p>|&nbsp;&nbsp;&nbsp;|--m. Sarah TINSLEY</p>
<p>|&nbsp;&nbsp;&nbsp;|--3-David Allen (1746 LunenburgCoVA - 1828 PersonCoNC)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. Catherine LANIER&mdash;sons Thomas, Drury, William</p>
<p>|--2-Richard Allen (say 1718 NewKentCoVA - aft 1772 [Henrico/HanoverCoVA])</p>
<p>|&nbsp;&nbsp;&nbsp;|--m. _?_ (no record of marriage); probably had son Drury who sold his HanoverCo land</p>
<p>|--2-William Allen (say 1721 NewKentCoVA - abt 1785 GranvilleCoNC)&mdash;married twice but left no sons</p>
<p>|--m2. aft 1734 Sarah [WINSTON??]</p>
<p class="shim">invisible writing</p>
<p id="William1725" class="shim">invisible writing</p>
<p id="A-09" class="surPedHeader">1-William Allen of Mecklenburg County, VA, born say 1725</p>
<p class="sourcesparagraph">(<strong>Sources</strong>:&nbsp;<a href="JBR-ALLEN.htm#A-04">Dr. Bob Allen</a>;
additions by <a href="mailto:john@johnbrobb.com">John B. Robb</a> based on <a href="JBR-ALLEN.htm#sourceMILLER">MILLER</a>,
<a href="JBR-ALLEN.htm#sourceCARPENTER-McKEE">CARPENTER/McKEE</a>,<br /> and the USCensus)</p>
<p>1-William Allen (say 1725 VA - 1789 MecklenburgCoVA)</p>
<p>|--m1. say 1750, _?_ WRAY</p>
<p>|--2-Turner Allen (abt 1751 - [LunenburgCoVA?])&nbsp;&nbsp;m. Sally NESBITT</p>
<p>|--2-Drury Allen (abt 1753 - 1823 AnsonCoNC)</p>
<p>|&nbsp;&nbsp;&nbsp;|--m. abt 1772, Eleanor (&ldquo;Nelly&rdquo;) JARROTT</p>
<p>|&nbsp;&nbsp;&nbsp;|--3-Julius Allen (say 1773 - 1845 AnsonCoNC)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. _?_ SMITH</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-Henderson Allen (1814 AnsonsCoNC - 1897 GA)---m. 1842 Sarah C. HAMMOND</p>
<p>|&nbsp;&nbsp;&nbsp;|--3-Thomas Allen (abt 1775 - 1858 FayetteCoTN</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m1. _?_ SMITH</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-Jeremiah Allen (say 1808 AnsonCoNC - aft 1858)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-Miles Allen (abt 1811 AnsonCoNC - abt 1843 [FayetteCoTN?])</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. abt 1832 Eliza Ann MORRIS</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--5-William Thomas Allen (1834 TN - abt 1859)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. abt 1857 Rosanna HYATT</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--6-William Addison Allen (1858 AnsonCoNC - 1928 AnsonCoNC)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. 1878 Louisa Charlotte TOMLINSON</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--7-William Addison Allen (1891 AnsonCoNC - 1955 AnsonCoNC)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--8-Clinton Thomas Allen &nbsp;&nbsp;&nbsp;
<span class="clr-Descendant">*** Lynda-09 ***</span> &nbsp;&nbsp;&nbsp;<span class="clr-Mutation_sentence">(no mutations)</span></p>
<p id="A-04">
|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-Edmond Allen (abt 1816 NC -)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. Rebecca SMITH b.AL)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m2. Nancy</p>
<p>|&nbsp;&nbsp;&nbsp;|--3-David Allen (abt 1780 VA - abt 1850 MarshallCoMS)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m1. Mary PARKER; m2. Elizabeth TURNER</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|-?4-Claiborne Allen (abt 1815 NC - aft 1850 [MarshallCoMS?])</p>
<p>|&nbsp;&nbsp;&nbsp;|--3-Benjamin Allen (abt 1781 NC - 1878 SumterCoAL)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. abt 1812 Huldah PARKER</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-Richmond Allen (1813 NC - 1889 SumterCoAL)---m. 1834 Sarah R. ALLEN</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-Wiley Allen (1815 NC - 1891 SumterCoAL)---m. Elvira Ann Tiras PONDS</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(removed to AL about 1840)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-William Allen (abt 1817 NC - aft 1850 [ChoctawCoAL?])---m. Frances L.</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|- 4-Enoch Allen (abt 1824 NC - 1896 Sumter/ChoctawCoAL?])---m. Jane S.</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|- 4-Hampton Allen (1829 NC - 1903)---m. Susan Elizabeth BOWERS</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|- 4-Robert Allen (abt 1834 NC - 1903)---m. Susan Elizabeth BOWERS</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|- 4-Alexander Allen, a twin (abt 1837 NC - 1903)---m. Susan Elizabeth BOWERS</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|- 4-Jeremiah Allen, a twin (abt 1837 NC -)---m. Molly SIMPSON (NKI)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-other children: Jules, Dock (NKI), Norriss (NKI), Mary Ann, Harriet, Thomas (NKI)</p>
<p>|&nbsp;&nbsp;&nbsp;|--3-William Allen ( -)</p>
<p>|&nbsp;&nbsp;&nbsp;|--3-John Allen (abt 1792- 1857 AnsonCoNC)</p>
<p><span class="clr-Mutation_sentence">The mutation CDYb- first appears with John or one of his descendants.</span></p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. abt 1812 Mary (&ldquo;Polly&rdquo;) ALLEN (a 1st cousin)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-Drury Allen (abt 1825 NC - bef 1880 [AnsonCoNC?])</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. Catharine Rowena BAUCOM</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--5-William Henry Allen (abt 1851 AnsonCoNC - 1924 StanlyCoNC)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. 1873 Ellen CURLEE</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--6-Robert Sidney Allen (1876 AnsonCoNC - 1953 LeeCoSC)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. 1903 Lilla Emile CRUMP</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--7-Flake Shellum Allen (1918 AnsonCoNC - 1963 AnsonCoNC)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--8-Robert Shepherd Allen &nbsp;&nbsp;&nbsp;
<span class="clr-Descendant">*** Dr.Bob-04 ***</span>
&nbsp;&nbsp;&nbsp;<span class="clr-Mutation_sentence">(has the CDYb- mutation)</span></p>
<p>|--2-William Allen (abt 1757 -)</p>
<p>|--2-John Allen (abt 1759 -)&nbsp;&nbsp;m. 1783, Nancy MORGAN</p>
<p>|--2-Darling Allen (abt 1760 - 1802 AnsonCoNC)</p>
<p>|&nbsp;&nbsp;&nbsp;|--m. 1793, Judith NANCE (Sons: Robert Nance b.1792, Darling)</p>
<p>|--2-Young Allen (abt 1764 - [LunenburgCoVA?])&nbsp;&nbsp;m. Sarah POOLE</p>
<p>|--2-Pleasant Allen (abt 1766 -)&nbsp;&nbsp;m. 1787, Rebecca WATSON</p>
<p>|--2-Meredith Allen (say 1768 - 1829 HenryCoVA)&nbsp;&nbsp;m. 1788, Nancy COOPER</p>
<p>|--2-Gray Allen (abt 1770 -)&nbsp;&nbsp;m. 1791, Molly NANCE (Son: Gray)</p>
<p>|--m2. say 1775, Ann</p>
<p>|--2-Robert Allen (abt 1776 - [HenryCoVA?])&nbsp;&nbsp;m. Celia MULLINS</p>
<p>|--2-Joseph Smith Allen (1779 MecklenburgCoVA - StCharlesCoMO)</p>
<p>|&nbsp;&nbsp;&nbsp;|--m1. 1807, Sarah WADE (Son William)</p>
<p>|&nbsp;&nbsp;&nbsp;|--m2. abt 1811, Rachel MAY (Sons: William M., Robert L., John Pines, Joseph J.)</p>
<p>|--2-Pines Allen (abt 1782 MecklenburgCoVA - MO)</p>
<p>|&nbsp;&nbsp;&nbsp;|--m1. 1807 HenryCoVA, Charlotte BAILEY (Sons: Robert B., Joseph J., John Parks, Charles C.</p>
<p>|&nbsp;&nbsp;&nbsp;|--m2. 1821 Nancy HUGHES (Sons: Pines Henderson b.1825 NC, William M. b.1832 MO</p>
<p class="shim">-</p>
<p id="Reynold1728" class="shim">invisible writing</p>
<p id="A-xx" class="surPedHeader">1-Reynold Allen of Granville, Johnston, Wake, and Iredell Cos NC, born say 1723</p>
<p class="sourcesparagraph">(<strong>Sources</strong>:&nbsp; <a href="JBR-ALLEN.htm#sourceMILLER">MILLER</a>
(mostly based on contributions by Sara C. Allen))</p>
<p>1-Reynold Allen (say 1723 - abt 1808 [IredellCoNC?])</p>
<p>|--m. say 1751, Mary</p>
<p>|--2-Young Allen (say 1756 NC - 1834 WakeCoNC)</p>
<p>|&nbsp;&nbsp;&nbsp;|--m. say 1779 Phebe PULLEN</p>
<p>|&nbsp;&nbsp;&nbsp;|--3-James Akin Allen (abt 1782 WakeCoNC - 1862 WakeCoNC)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m1. Mary WYNNE</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-William Gaston Allen (1810 NC - 1858)&nbsp;&nbsp; m. 1839 Martha B. SHIPP, WakeCoNC</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-Henry Young Allen (1824 NC -)&nbsp;&nbsp;1850 Susan Ann OVERBY, GranvilleCoNC</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-Solomon J. Allen (abt 1831 - abt 1901)&mdash;no known issue</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m2. 1849 Priscilla JACKSON</p>
<p>|&nbsp;&nbsp;&nbsp;|--3-John Allen (abt 1784 NC - [aft 1860 CarrollCoTN??])&nbsp;&nbsp;m. 1808 Sarah HARRISON WakeCoNC</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-Moses Harrison Allen (1808-1886 NC)&nbsp;&nbsp; m. 1827 Lucy Williams Rhodes, WakeCoNC</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-Henry Anderson Allen (1814 - aft 1880 [WakeCoNC?]).</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. 1837, Sarah Elizabeth ROGERS, WakeCoNC</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-Wyatt Marion Allen (1824 WakeCoNC - 1863 WakeCoNC)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. 1855 Martha Ann BAILEY, WakeCoNC</p>
<p>|&nbsp;&nbsp;&nbsp;|--3-Henry Allen (- abt 1841 WakeCoNC)&nbsp;&nbsp;m. 1831 Aley Sharpe Allen</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-James H. Allen (1815 NC -)&nbsp;&nbsp;m. 1838, Elizabeth C. SPAIN</p>
<p>|&nbsp;&nbsp;&nbsp;|--3-Miles Allen (- bef 1839 GibsonCoTN)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. 1810 Jinsey ("Jane") BLEDSOE</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-Reynold Allen (1791 NC - abt 1879 WakeCoNC)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m1. 1824 Betsy Ann HARRISON</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--5-William Anderson Allen (1825 NC - 1884)&nbsp;&nbsp;m. 1849 Maria G. HICKS</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m2. 1836 Jane H. CANNON</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--5-Charles Nickolas Allen (abt 1837 -)&nbsp;&nbsp;m. 1862 Caroline V. JOHNS WakeCoNC</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--5-James Bascumb Allen (abt 1851 -)</p>
<p>|&nbsp;&nbsp;&nbsp;|--3-Young W. Allen (abt 1796 NC - aft 1880 [CarrollCoTN?]&nbsp;&nbsp; m. Ann ROGERS</p>
<p>|--2-Reynold Allen (say 1758 - abt 1812 IredellCoNC)</p>
<p>|&nbsp;&nbsp;&nbsp;|--3-William Moore Allen (abt 1784 NC - abt 1851 IredellCoNC)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-Hinchea Allen&nbsp;&nbsp;m. 1835 Catherine LITTLE, LincolnCoNC</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-David Allen</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-Burrell Allen&nbsp;&nbsp;m. 1832 Rhoda L. HOKE, LincolnCoNC</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-John Allen</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-William Lee Allen</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-Augustus Allen</p>
<p>|&nbsp;&nbsp;&nbsp;|--3-David Allen (say 1786 -)</p>
<p>|&nbsp;&nbsp;&nbsp;|--3-Darling Allen (1788 IredellCoNC - 1867 WilkesCoNC)&nbsp;&nbsp;m. 1809 Susan WALLIS, WilkesCoNC</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-(1841 - 1843 IredellCoNC)&mdash;no known issue</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-William Wallace Allen (1827 IredellCoNC - abt 1863)&nbsp;&nbsp;m. Agnes MOORE</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-John Allen</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-Benton Carlton Allen (abt 1828 IredellCoNC -)</p>
<p>|&nbsp;&nbsp;&nbsp;|--3-John Haden Allen ()</p>
<p class="shim">-</p>
<p id="William1691" class="shim">invisible writing</p>
<p class="surPedHeader">1-William Allen of New Kent, Goochland, and Albemarle Counties, Virginia, born say 1691</p>
<p class="sourcesparagraph">(<strong>Sources</strong>:&nbsp;<a href="JBR-ALLEN.htm#A-03">Bill A. Allen</a>, <a href="JBR-ALLEN.htm#A-05">Donald
Allen</a>, <a href="JBR-ALLEN.htm#sourceWICKER">WICKER</a>. Project members Bill A., and Donald have researched their lines extensively and published
books on them which credibly link their TN ancestors to NC and VA respectively, and both books are full of anecdotal and background information which
brings their genealogy to life: see <a href="JBR-ALLEN.htm#sourceBill-A_ALLEN">Bill-A_ALLEN</a>, and <a href="JBR-ALLEN.htm#sourceDonald_ALLEN">Donald_ALLEN</a>)</p>
<p>1-William Allen (say 1691 - 1752 AlbemarleCoVA) </p>
<p>|--m1. abt 1712 Hannah [WATSON?]</p>
<p>|--2-Samuel Allen (1713 -)</p>
<p>|&nbsp;&nbsp;&nbsp;|--m. Martha ARCHER</p>
<p>|--m2. 1720 Mary (HUNT) MINGE</p>
<p>|--2-William Hunt Allen (1724 -1806 BuckinghamCoVA)&nbsp;&nbsp;m. Elizabeth</p>
<p id="A-03">
|--2-John Allen (1726 - 1754 AlbemarleCoVA)</p>
<p>|&nbsp;&nbsp;&nbsp;|--m. abt 1748 Betheniah Thomas NEVIL</p>
<p>|&nbsp;&nbsp;&nbsp;|--3-Samuel Allen (1747 - 1800 AmherstCoVA)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. 1771 Hannah JOPLING</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-George Allen (1773 [AmherstCoVA?] - 1835 WhiteCo, later DekalbCoTN)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. 1798 Phebe WALKER, VA</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--5-John W[alker?] Allen (abt 1800 [AmherstCoVA?] - aft 1870 [DeKalbCoTN?])</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. abt 1823 Lucy W. FLOWERS</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--6-George Allen (abt 1832 TN -)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--6-William Allen (abt 1834 TN -)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--6-John M. Allen (1843 SmithCoTN - 1902 WiseCoTN)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. 1876 Sarah Ann PELTON TarrantCoTX</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--7-O'Guster ("Gus") Allen (1877 TarrantCoTN - 1959 OklahomaCityOK)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. 1895 Frances Rosine JACKSON JackCoTX</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--8-John Elmer Allen (1907 OklahomaTerr - 1989 OklahomaCityOK)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--9-Donald Lee Allen &nbsp;&nbsp;&nbsp;
<span class="clr-Descendant">*** Donald-05 ***</span> </p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--5-John W. had possible brothers Jesse, Samuel, William D, and George H</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-Jesse Allen (1777 [AmherstCoVA?] - 1857 DekalbCoTN)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. Nancy WALKER va</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-Samuel Hunt Allen (abt 1785 AmherstCoVA? - abt 1831)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. by 1810 Polly WALKER VA</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-John Allen (abt 1786 AmherstCoVA? - 1854)</p>
<p>|&nbsp;&nbsp;&nbsp;|--3-Jesse Allen (abt 1748 - abt 1781 BuckinghamCoVA)</p>
<p>|--2-Valentine Allen (1730 - 1797 RockinghamCoNC)&nbsp;&nbsp;m. Nancy Ann ARNOLD</p>
<p><span class="clr-Mutation_sentence">The mutation DYS449+ first appears with Valentine or one of his descendants.</span></p>
<p>|&nbsp;&nbsp;&nbsp;|--m. 1753 Nancy Ann ARNOLD CumberlandCoVA</p>
<p>|&nbsp;&nbsp;&nbsp;|--3-William Hunt Allen (by 1755 - abt 1822 BedfordCoTN)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. 1777 Agatha SCALES RockinghamCoNC</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-George Hunt Allen (1780 RockinghamCoNC - 1874 MarshallCoTN)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. 1804 Mary OGILVIE DavidsonCoTN</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--5-Grant Iverson Allen (1805 WilliamsonCoTN - 1891 MarshallCoTN)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. 1830 Nancy Elizabeth ALLEN WilliamsonCoTN</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|
--6-Thomas Alexander Allen (1837 MarshallCoTN - 1917 MarshallCoTN)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;
---m. 1867 Mary Fredonia Jane Ewing WilliamsonCoTN</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|
--7-William Harris Allen (1876 MarshallCoTN - 1942 MarshallCoTN)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;
---m. 1909 Ruth Hunter WilliamsonCoTN</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|
--8-Thomas Hunter Allen (1912 MarshallCoTN - 1972 MarshallCoTN)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|
--9-William Alfred Allen &nbsp;&nbsp;&nbsp;<span class="clr-Descendant">*** Bill-A-03 ***</span>
&nbsp;&nbsp;&nbsp;<span class="clr-Mutation_sentence">(has the DYS449+ mutation)</span></p>
<p>|&nbsp;&nbsp;&nbsp;|--3-George Allen (abt 1774 - 1854 MarshallCoTN)&nbsp;&nbsp;m. Annie Eliza PATRICK</p>
<p>|--2-George Hunt Allen (1734 - 1778)&nbsp;&nbsp;m. Mary BALLARD</p>
<p>|--2-Phillip Allen (1740 - 1763)</p>
<p class="shim">-</p>
<p id="A-01" class="shim">invisible writing</p>
<p class="surPedHeader">1-James T Allen of Bastrop, and Wise County, TX, born 1832</p>
<p class="sourcesparagraph">(<strong>Sources</strong>:&nbsp;<a href="JBR-ALLEN.htm#A-01">Bill Bernard Allen</a>,
<a href="JBR-ALLEN.htm#A-12">Fletcher Thomason</a>)</p>
<p><span class="clr-Mutation_sentence">The mutation DYS449+ occurs either downstream or within a few generations upstream of James T. Allen.</span></p>
<p>1-James T. Allen (1832 TN - 1900 WiseCoTX)</p>
<p>|--m. say 1755, Sarah ("Sallie") C. MILLER</p>
<p>|--2-Henry Clay Allen (1857 EllisCoTX - 1946 PhoenixAZ)</p>
<p>|&nbsp;&nbsp;&nbsp;|--m1. abt 1878 Edna Eliza TIMMONS</p>
<p>|&nbsp;&nbsp;&nbsp;|--3-William Louis Allen (1879 WiseCoTX - 1966 MercerCoIL)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. abt 1910 Maude SWARTOUT</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-James Elwood Allen (1927 MercerCoIL - 2008 RockIslandCoIL)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. Violet Irene GEORGE</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--5-William Bernard Allen (1949 -)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|
--6-William Bernard Allen (1971) &nbsp;&nbsp;&nbsp;<span class="clr-Descendant">*** Bill-B-01 ***</span>
&nbsp;&nbsp;&nbsp;<span class="clr-Mutation_sentence">(has the DYS449+ mutation)</span></p>
<p>|&nbsp;&nbsp;&nbsp;|--m2. 1881 Molly Eugenia Brazziel (Children: Tom, James, Etta Anna)</p>
<p>|--2-Frank Marion Allen (1866 - )</p>
<p>|--m1. abt 1895 Loucille LYNCH</p>
<p>|&nbsp;&nbsp;&nbsp;|--3-Tincy boy Kenneth Allen (1912 CottleCoTX - 1992 WiseCoTX)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. Dovie Elizabeth BARTON</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-Lee Roy Allen (1942 TarrantCoTX -) <span class="clr-Descendant">*** Fletcher-12 ***</span></p>
<p id="A-14" class="shim">invisible writing</p>
<p class="surPedHeader">1-Charles Henry Francis Marion Allen of Georgia, and Cherokee County, Alabama, born abt 1826</p>
<p class="sourcesparagraph">(<strong>Sources</strong>:&nbsp;Winston Allen, and <a href="mailto:john@johnbrobb.com">John B. Robb</a>)</p>
<p><span class="clr-Mutation_sentence">The mutation DYS447- occurs either downstream or within a few generations upstream of Charles Henry Allen.</span></p>
<p>1-Charles Henry Francis Marion Allen (abt 1825 GA - bef 1900 [AL?]) </p>
<p>|--m. abt 1846 Francis [TOWERS?]</p>
<p>|--2-James T. Allen (abt 1849 AL -)</p>
<p>|--2-A[sa] Benjamin R. Allen (1861 AL - bef 1910 [LamarCoTX?])</p>
<p>|&nbsp;&nbsp;&nbsp;|--m. abt 1884 Martha Lou CHISENHALL</p>
<p>|&nbsp;&nbsp;&nbsp;|--3-James Lawrence Allen (1891 LamarCoTX -)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. Essie Velma RATLIFF</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-Winston Allen &nbsp;&nbsp;&nbsp;
<span class="clr-Descendant">*** Winston-14 ***</span>
<span class="clr-Mutation_sentence">(has the DYS447- mutation)</span>
</p>
<p id="A-07" class="shim">invisible writing</p>
<p class="surPedHeader">1-Joseph Allen of Elbert County, Georgia, born abt 1760</p>
<p class="sourcesparagraph">(<strong>Sources</strong>:&nbsp;<a href="JBR-ALLEN.htm#A-07">LouDean Mayes</a>)</p>
<p><span class="clr-Mutation_sentence">The mutation DYS447- occurs upstream of the patriarch, Joseph, of this sublineage.</span></p>
<p><span class="clr-Mutation_sentence">The mutations DYS570- &amp; 464- occur probably downstream of Joseph.</span></p>
<p>1-Joseph Allen (say 1760 GA - 1833 ElbertCoGA) </p>
<p>|--m. bef 1790 Agnes PATTERSON</p>
<p>|--2-James Allen (1808 ElbertCoGA - 1880 HartCoGA)</p>
<p>|&nbsp;&nbsp;&nbsp;|--m. 1837 Mary A. HAYNES, ElbertCoGA</p>
<p>|&nbsp;&nbsp;&nbsp;|--3-James Monroe Allen (1846 ElbertCoGA - 1921 AndersonCoSC)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. 1867 Mary Elizabeth Frances SANDERS, HartCoGA</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-James Henry Edward Allen (1869 HartCoGA - 1943 HartCoGA)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. 1910 Mary Lula OWENS, HartCoGA </p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--5-Ira Wilson Allen 1914 HartCoGA - 1976 HartCoGA)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--6-Ira Wilson Allen, Jr &nbsp;&nbsp;&nbsp;
<span class="clr-Descendant">*** LouDean-07 ***</span> &nbsp;&nbsp;&nbsp;
<span class="clr-Mutation_sentence">(has the DYS447- mutation, also 570- and 464-)</span></p>
<p id="A-10" class="shim">invisible writing</p>
<p class="surPedHeader">1-Russell Allen of Franklin County, AL, born abt 1828 in GA</p>
<p class="sourcesparagraph">(<strong>Sources</strong>:&nbsp;<a href="JBR-ALLEN.htm#A-10">Owen Allen</a>)</p>
<p><span class="clr-Mutation_sentence">The mutation CDYb- occurs either downstream or within a few generations upstream of Russell. </span></p>
<p>1-Russell Allen (abt 1828 GA - bef 1880 [FranklinCoAL?])</p>
<p>|--m. abt 1849 Millie</p>
<p>|--2-Peter Allen (abt 1857 - bef 1920)</p>
<p>|&nbsp;&nbsp;&nbsp;|--m. abt 1880, Savannah</p>
<p>|&nbsp;&nbsp;&nbsp;|--3-Morrison Allen (1881 AL - 1976)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. Lula Elizabeth WHITE</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-Owen Tillman Allen (1909 ColbertCoAL - 1958)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--5-Owen Allen &nbsp;&nbsp;&nbsp;<span class="clr-Descendant">*** Owen-10 ***</span>
&nbsp;&nbsp;&nbsp;<span class="clr-Mutation_sentence">(has the CDYb- mutation)</span></p>
<p class="shim">-</p>
<p id="A-06" class="shim">invisible writing</p>
<p class="surPedHeader">1-James Allen of Gonzales County, TX, born 1814 in NC</p>
<p class="sourcesparagraph">(<strong>Sources</strong>:&nbsp;<a href="JBR-ALLEN.htm#A-06">Camilla Mitchell</a>)</p>
<p>1-James Allen (1814 NC - 1868 GonzalesCoTX)</p>
<p>|--m1. 1835 Camilla Catherine Tores LILLY [MontgomeryCoNC?])</p>
<p>|--2-James Robberson Allen (1841 - 1842)</p>
<p>|--2-Robbert Alexander Allen (1843 - 1863 MadisonParishLA)&mdash;no known issue</p>
<p>|--m2. 1852 Mary Ann Key, MaconCoAL</p>
<p>|--2-Henry Clarence Allen (1852 AL -)</p>
<p>|--2-Augustus Key Allen (1858 -)</p>
<p>|--2-Marion Jackson Allen (1861 -)</p>
<p>|--2-Thomas Jefferson Allen (1865 GonzalesCoTX - 1926 SanAntonioTX)</p>
<p>|&nbsp;&nbsp;&nbsp;|--m. 1898 Lillie Louisa BEACH, ElPasoTX</p>
<p>|&nbsp;&nbsp;&nbsp;|--3-Marion Thomas Allen (1905 DimmitCoTX - 1986)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-William Douglas Allen &nbsp;&nbsp;&nbsp;<span class="clr-Descendant">*** Camilla-06 ***</span>
&nbsp;&nbsp;&nbsp;<span class="clr-Mutation_sentence">(no mutations&mdash;is the project RPH)</span></p>
<p class="shim">-</p>
<p id="A-11" class="shim">invisible writing</p>
<p class="surPedHeader">1-Robert N. Nesbitt of Hunt County, TX, born 1831 in TN</p>
<p class="sourcesparagraph">(<strong>Sources</strong>:&nbsp;<a href="JBR-ALLEN.htm#A-11">Geoff Nesbitt</a>)</p>
<p>1-Robert N. Nesbitt (1831 TN - aft 1910)</p>
<p>|--m. abt 1854 Martha E. SHEPARD</p>
<p>|--2-William Allen Nesbitt (1860 TX - 1937)</p>
<p>|&nbsp;&nbsp;&nbsp;|--m. 1862 Caddie VANNERSON, HopkinsCoTX</p>
<p>|&nbsp;&nbsp;&nbsp;|--3-Robert Allen Nesbitt (1889 TX - 1934)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. Robbie RIKE</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-Robert Allen Nesbitt, Jr. (1914 AdaOK - 1988 GalvestonTX)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--5-Geoffrey Robert Nesbitt
&nbsp;&nbsp;&nbsp;<span class="clr-Descendant">*** Geoff-11 ***</span>
&nbsp;&nbsp;&nbsp;<span class="clr-Mutation_sentence">(no mutations)</span></p>
<p class="shim">-</p>
<p id="A-13" class="shim">invisible writing</p>
<p class="surPedHeader">1-Frank Harris of Marion County, Alabama, born say 1850</p>
<p class="sourcesparagraph">(<strong>Sources</strong>:&nbsp;<a href="JBR-ALLEN.htm#A-13">Sandra Hunt</a>)</p>
<p><span class="clr-Mutation_sentence">The mutation CDYB+ occurs either downstream or within a few generations upstream of Frank Harris. </span></p>
<p>1-Frank Harris (say 1850 AL - 1900 MarionCoAL)</p>
<p>|--m. abt 1875 Lucy M. [PATE?]</p>
<p>|--2-William Earl Harris (1883 AL - 1972)</p>
<p>|&nbsp;&nbsp;&nbsp;|--m1. abt 1903 Neety PALMER</p>
<p>|&nbsp;&nbsp;&nbsp;|--3-Willie Dalton Harris (abt 1904 MarionCoAL - 1992)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-Hugh E. Harris
&nbsp;&nbsp;&nbsp;<span class="clr-Descendant">*** Sandra-13 ***</span>
&nbsp;&nbsp;&nbsp;<span class="clr-Mutation_sentence">(has the CDYb+ mutation)</span></p>
<p>|&nbsp;&nbsp;&nbsp;|--m2. abt 1909 Artie TICE</p>
<p id="A-02" class="shim">invisible writing</p>
<p class="surPedHeader">1-Gideon A. Allen of Twiggs Co GA, and Bossier Parish Louisiana, born 1804 in NC</p>
<p class="sourcesparagraph">(<strong>Sources</strong>:&nbsp;<a href="JBR-ALLEN.htm#A-02">Cary Allen</a>, whose <a
href="http://www.allensofmarietta.com/">web page</a> has more on his Allen line)</p>
<p><span class="clr-Mutation_sentence">The mutation 464- occurs either downstream or within a few generations upstream of Gideon. </span></p>
<p>1-Gideon A. Allen (1804 NC - 1875 BossierParLA)</p>
<p>|--m. 1828 Mary Ann HORN, TwiggsCoGA</p>
<p>|--2-John G. Allen (1835 TwiggsCoGA - 1906 BossierParLA)</p>
<p>|&nbsp;&nbsp;&nbsp;|--m. 1854 Emily L. SPURLIN, BossierParLA</p>
<p>|&nbsp;&nbsp;&nbsp;|--3-John Claud Allen (1869 BossierParLA - 1927 BossierParLA)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. 1896 Mary Louwellen ALLEN</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--4-Claud Tarkington Allen (1900 BossierParLA - 1990 GreenvilleCoSC)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--m. 1921 Iona Olive ARNOLD, CaddoParLA</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--5-John Clyde Allen (1931 BossierParLA - 2003)</p>
<p>|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;|--6-Cary Neal Allen
&nbsp;&nbsp;&nbsp;<span class="clr-Descendant">*** Cary-02 ***</span>
&nbsp;&nbsp;&nbsp;<span class="clr-Mutation_sentence">(has a 464- mutation, to 11-13-14-15)</span></p>
<p class="marginBotLots"> </p>
</div> <!-- end surPed section -->
</div> <!--end pageRight -->
<!-- start pageLeft -->
<div class="pageLeftW30">
<p class="shim">-</p> <!-- invisible writing to trick Firefox & Netscape into not drawing navLeft box too high -->
<div class="navLeft">
<div class="navLeftLinksLoose">
<h3>ALLEN Patriarchs</h3>
<p class="multiline-entry"> <a href="#A-14">Charles Henry F. M.(abt 1825 - bef 1910)</a>
<br />&nbsp;&nbsp;&nbsp; GA, Cherokee Co AL
<br />&nbsp;&nbsp;&nbsp; m. Francis [TOWERS?]
<br />&nbsp;&nbsp;&nbsp;<strong>Descendants</strong>:<a href="JBR-ALLEN.htm#A-14">Winston</a>&mdash;<a href="#A-14">A-14</a></p>
<p class="multiline-entry"> <a href="#A-02">Gideon (1804 - 1875)</a>
<br />&nbsp;&nbsp;&nbsp; NC, Twiggs Co GA, &amp; Bossier Parish LA
<br />&nbsp;&nbsp;&nbsp; m. Mary Ann HORN
<br />&nbsp;&nbsp;&nbsp;<strong>Descendants</strong>:<a href="JBR-ALLEN.htm#A-02">Cary</a>&mdash;<a href="#A-02">A-02</a></p>
<p class="multiline-entry"> <a href="#A-06">James (1814 - 1868)</a>
<br />&nbsp;&nbsp;&nbsp; Montgomery Co NC, Gonzales Co TX
<br />&nbsp;&nbsp;&nbsp; m1. Camilla Catherine Tores Lilly
<br />&nbsp;&nbsp;&nbsp; m2. Mary Ann KEY
<br />&nbsp;&nbsp;&nbsp;<strong>Descendants</strong>:<a href="JBR-ALLEN.htm#A-06">Camilla</a>&mdash;<a href="#A-06">A-06</a></p>
<p class="multiline-entry"> <a href="#A-01">James T (abt 1832 - 1900)</a>
<br />&nbsp;&nbsp;&nbsp; Wise Co TX
<br />&nbsp;&nbsp;&nbsp; m. Sarah ("Sallie") C. Miller
<br />&nbsp;&nbsp;&nbsp;<strong>Descendants</strong>:
<a href="JBR-ALLEN.htm#A-01">Bill-B</a>&mdash;<a href="#A-01">A-01</a>,
<a href="JBR-ALLEN.htm#A-12">Fletcher</a>&mdash;<a href="#A-01">A-12</a></p>
<p class="multiline-entry"> <a href="#A-07">Joseph (say 1760 - 1833)</a>
<br />&nbsp;&nbsp;&nbsp;ElbertCoGA
<br />&nbsp;&nbsp;&nbsp; m. Agnes PATTERSON
<br />&nbsp;&nbsp;&nbsp;<strong>Descendants</strong>:<a href="JBR-ALLEN.htm#A-07">LouDean</a>&mdash;<a href="#A-07">A-07</a></p>
<p class="multiline-entry"> <a href="#Reynold1728">Reynold (say 1728 - abt 1808)</a>
<br />&nbsp;&nbsp;&nbsp;Granville, Johnston, Wake, and Iredell Cos NC
<br />&nbsp;&nbsp;&nbsp; m. Mary</p>
<p class="multiline-entry"> <a href="#Robert1680">Robert (say 1680 - abt 1756)</a>
<br />&nbsp;&nbsp;&nbsp;NewKent, Goochland, ;&amp; Henrico Cos VA
<br />&nbsp;&nbsp;&nbsp; m1. Elizabeth [WALKER?]
<br />&nbsp;&nbsp;&nbsp; m2. Sarah [Winston??]
<br />&nbsp;&nbsp;&nbsp;<strong>Descendants</strong>:<a href="JBR-ALLEN.htm#A-08">Donna</a>&mdash;<a href="#A-08">A-08</a></p>
<p class="multiline-entry"> <a href="#A-10">Russell (abt 1828 - bef 1880)</a>
<br />&nbsp;&nbsp;&nbsp; Forsyth Co GA, &amp; Franklin Co AL
<br />&nbsp;&nbsp;&nbsp; m. Millie
<br />&nbsp;&nbsp;&nbsp;<strong>Descendants</strong>:<a href="JBR-ALLEN.htm#A-10">Owen</a>&mdash;<a href="#A-10">A-10</a></p>
<p class="multiline-entry"> <a href="#William1691">William (say 1691 - 1752)</a>
<br />&nbsp;&nbsp;&nbsp;NewKent, Goochland, &amp; Albemarle Cos VA
<br />&nbsp;&nbsp;&nbsp; m1. Hannah [Watson?]
<br />&nbsp;&nbsp;&nbsp; m2. Mary (Hunt) Minge
<br />&nbsp;&nbsp;&nbsp;<strong>Descendants</strong>:
<a href="JBR-ALLEN.htm#A-03">Bill-A</a>&mdash;<a href="#A-03">A-03</a>,
<a href="JBR-ALLEN.htm#A-05">Donald</a>&mdash;<a href="#William1691">A-05</a></p>
<p class="marginBot1">
<a href="#William1725">William (say 1725 - 1789)</a>
<br />&nbsp;&nbsp;&nbsp; Mecklenburg Co VA
<br />&nbsp;&nbsp;&nbsp; m. _?_ Wray
<br />&nbsp;&nbsp;&nbsp;<strong>Descendants</strong>:
<a href="JBR-ALLEN.htm#A-04">Dr.Bob</a>&mdash;<a href="#A-04">A-04</a>,
<a href="JBR-ALLEN.htm#A-09">Lynda</a>&mdash;<a href="#William1725">A-09</a></p>
</div>
<div class="navLeftLinksLoose">
<h3>Other Surname Patriarchs</h3>
<p class="multiline-entry"> <a href="#A-13">Frank HARRIS (say 1850 - 1900)</a>
<br />&nbsp;&nbsp;&nbsp; Marion Co AL
<br />&nbsp;&nbsp;&nbsp; m1. Lucy M. [PATE?]
<br />&nbsp;&nbsp;&nbsp;<strong>Descendants</strong>:<a href="JBR-ALLEN.htm#A-13">Sandra</a>&mdash;<a href="#A-13">A-13</a></p>
<p class="multiline-entry"> <a href="#A-11">Robert N. NESBITT (1831 - aft 1910)</a>
<br />&nbsp;&nbsp;&nbsp; TN, then Upshur, Hopkins, &amp; Hunt Cos TX
<br />&nbsp;&nbsp;&nbsp; m. Martha E. Shepard
<br />&nbsp;&nbsp;&nbsp;<strong>Descendants</strong>:<a href="JBR-ALLEN.htm#A-11">Geoff</a>&mdash;<a href="#A-11">A-11</a></p>
</div>
</div> <!--end navLeft -->
<div class="pageLeftRest">
<h5 class="bold italic">Navigating from here</h5>
<p class="notes">
The menu buttons at top right takes you to other pages on this site, while the nav panel above targets other points on
thispage, or brings up other resources. If you find yourself lost, the browser BACK button will take you back to where you
were (some people also have a convenient BACK button on their mouse, right under their thumb). Or hitting the Home key of
your keyboard will take you back to the top of this page where you are now. </p>
</div> <!--end pageLeftRest -->
</div> <!--end pageLeft -->
<div class="siteInfo">
<hr class="clr-dark-tan" />
<a>Last updated 8Apr2010<br />&copy; John Barrett Robb</a>
<img src="Pics/valid-xhtml10.gif" alt="Valid XHTML 1.0 Strict" height="31px" width="88px" />
<img src="Pics/valid-css2.gif" alt="Valid CSS 2.1" height="31px" width="88px" />
</div>
</div> <!-- end of PageContainer block -->
<!-- Start of StatCounter Code -->
<script type="text/javascript">
var sc_project=4699798;
var sc_invisible=1;
var sc_partition=56;
var sc_click_stat=1;
var sc_security="2d713ec5";
</script>
<script type="text/javascript"
src="http://www.statcounter.com/counter/counter_xhtml.js"></script><noscript><div
class="statcounter"><a title="wordpress com stats plugin"
class="statcounter"
href="http://www.statcounter.com/wordpress.com/"><img
class="statcounter"
src="http://c.statcounter.com/4699798/0/2d713ec5/1/"
alt="wordpress com stats plugin" /></a></div></noscript>
<!-- End of StatCounter Code -->
</body>
</html>

9
example/article.xml Normal file
View File

@ -0,0 +1,9 @@
<article>
<pubData>Something here</pubData>
<pubArticleID>12345</pubArticleID>
<pubDate>2001-04-01</pubDate>
<pubName>XML.com</pubName>
<section>Foo</section>
<lead>Here's some leading text</lead>
<rest>And here is the rest...</rest>
</article>

8
example/article_bad.xml Normal file
View File

@ -0,0 +1,8 @@
<article>
<pubData>Something here</pubData>
<pubArticleID>12345</pubArticleID>
<pubName>XML.com</pubName>
<section>Foo</section>
<lead>Here's some leading text</lead>
<rest>And here is the rest...</rest>
</article>

View File

@ -0,0 +1,10 @@
<!DOCTYPE article SYSTEM "test.dtd">
<article>
<pubData>Something here</pubData>
<pubArticleID>12345</pubArticleID>
<pub_date>2001-04-01</pub_date>
<pubName>XML.com</pubName>
<section>Foo</section>
<lead>Here's some leading text</lead>
<rest>And here is the rest...</rest>
</article>

View File

@ -0,0 +1,26 @@
<!DOCTYPE article [
<!ELEMENT article (pubData*, pubArticleID*, pubDate+, pubName+, section+, memo*, headline1*, headline2*, byline*, lead*, rest*, graphics*, keywords*, caption*)>
<!ELEMENT pubData (#PCDATA)>
<!ELEMENT pubArticleID (#PCDATA)>
<!ELEMENT pubDate (#PCDATA)>
<!ELEMENT pubName (#PCDATA)>
<!ELEMENT section (#PCDATA)>
<!ELEMENT memo (#PCDATA)>
<!ELEMENT headline1 (#PCDATA)>
<!ELEMENT headline2 (#PCDATA)>
<!ELEMENT byline (#PCDATA)>
<!ELEMENT lead (#PCDATA)>
<!ELEMENT rest (#PCDATA)>
<!ELEMENT graphics (#PCDATA)>
<!ELEMENT keywords (#PCDATA)>
<!ELEMENT caption (#PCDATA)>
]>
<article>
<pubData>Something here</pubData>
<pubArticleID>12345</pubArticleID>
<pubDate>2001-04-01</pubDate>
<pubName>XML.com</pubName>
<section>Foo</section>
<lead>Here's some leading text</lead>
<rest>And here is the rest...</rest>
</article>

View File

@ -0,0 +1,26 @@
<!DOCTYPE article [
<!ELEMENT article (pubData*, pubArticleID*, pubDate+, pubName+, section+, memo*, headline1*, headline2*, byline*, lead*, rest*, graphics*, keywords*, caption*)>
<!ELEMENT pubData (#PCDATA)>
<!ELEMENT pubArticleID (#PCDATA)>
<!ELEMENT pubDate (#PCDATA)>
<!ELEMENT pubName (#PCDATA)>
<!ELEMENT section (#PCDATA)>
<!ELEMENT memo (#PCDATA)>
<!ELEMENT headline1 (#PCDATA)>
<!ELEMENT headline2 (#PCDATA)>
<!ELEMENT byline (#PCDATA)>
<!ELEMENT lead (#PCDATA)>
<!ELEMENT rest (#PCDATA)>
<!ELEMENT graphics (#PCDATA)>
<!ELEMENT keywords (#PCDATA)>
<!ELEMENT caption (#PCDATA)>
]>
<article>
<pubData>Something here</pubData>
<pubArticleID>12345</pubArticleID>
<pub_date>2001-04-01</pub_date>
<pubName>XML.com</pubName>
<section>Foo</section>
<lead>Here's some leading text</lead>
<rest>And here is the rest...</rest>
</article>

1
example/bad.dtd Normal file
View File

@ -0,0 +1 @@
<!ELEMENT test (%undeclared.entity;)>

3
example/bad.xml Normal file
View File

@ -0,0 +1,3 @@
<foo>
</foo>
<bar>

5
example/catalog.xml Normal file
View File

@ -0,0 +1,5 @@
<?xml version="1.0"?>
<!DOCTYPE catalog PUBLIC "-//OASIS//DTD Entity Resolution XML Catalog V1.0//EN" "http://www.oasis-open.org/committees/entity/release/1.0/catalog.dtd">
<catalog xmlns="urn:oasis:names:tc:entity:xmlns:xml:catalog">
<rewriteURI uriStartString="http://axkit.org/xml-libxml" rewritePrefix="./"/>
</catalog>

59
example/cb_example.pl Normal file
View File

@ -0,0 +1,59 @@
use strict;
use warnings;
use XML::LibXML;
use IO::File;
# first instanciate the parser
my $parser = XML::LibXML->new();
# initialize the callbacks
$parser->match_callback( \&match_uri );
$parser->read_callback( \&read_uri );
$parser->open_callback( \&open_uri );
$parser->close_callback( \&close_uri );
# include XIncludes on the fly
$parser->expand_xinclude( 1 );
# parse the file "text.xml" in the current directory
my $dom = $parser->parse_file("test.xml");
print $dom->toString() , "\n";
# the callbacks follow
# these callbacks are used for both the original parse AND the XInclude
sub match_uri {
my $uri = shift;
return $uri !~ /:\/\// ? 1 : 0; # we handle only files
}
sub open_uri {
my $uri = shift;
my $handler = new IO::File;
if ( not $handler->open( "<$uri" ) ){
$handler = 0;
}
return $handler;
}
sub read_uri {
my $handler = shift;
my $length = shift;
my $buffer = undef;
if ( $handler ) {
$handler->read( $buffer, $length );
}
return $buffer;
}
sub close_uri {
my $handler = shift;
if ( $handler ) {
$handler->close();
}
return 1;
}

View File

@ -0,0 +1,2 @@
<!ENTITY % f SYSTEM "dtd/f.dtd">
%f;

View File

@ -0,0 +1,6 @@
<!DOCTYPE doc [
<!ENTITY % e SYSTEM "complex.dtd">
%e;
]>
<doc></doc>

View File

@ -0,0 +1,3 @@
<!DOCTYPE doc SYSTEM "complex.dtd">
<doc>&foo;</doc>

View File

@ -0,0 +1,3 @@
<!ENTITY % g SYSTEM "g.dtd">
<!ELEMENT doc ANY>
%g;

View File

@ -0,0 +1,2 @@
<!ENTITY foo "foo">

View File

@ -0,0 +1,167 @@
#!/usr/bin/perl
=head1 ABOUT
This is a sample program to generate an HTML document using XML::LibXML's
DOM routines. It was written to resolve
L<https://rt.cpan.org/Ticket/Display.html?id=117923> . Thanks to Dan Jacobson.
=cut
use strict;
use warnings;
use XML::LibXML;
{
my $doc = XML::LibXML->createDocument;
# A small Domain-Specific-Language for generating DOM:
my $_text = sub {
my ($content) = @_;
return $doc->createTextNode($content);
};
# Short for element.
my $_el = sub {
my $name = shift;
my $param = shift;
my $attrs = {};
if (ref($param) eq 'HASH')
{
$attrs = $param;
$param = shift;
}
my $childs = $param;
my $elem = $doc->createElementNS("", $name);
while (my ($k, $v) = each %$attrs)
{
$elem->setAttribute($k, $v);
}
foreach my $child (@$childs)
{
$elem->appendChild($child);
}
return $elem;
};
my $html = $_el->(
'html',
[
$_el->(
'head',
[
$_el->(
'title',
[
$_text->("Sample HTML document as generated by XML::LibXML"),
],
),
$_el->(
'meta',
{ 'http-equiv' => 'Content-Type',
'content' => 'text/html; charset=utf-8'
},
[],
),
],
),
$_el->(
'body',
[
$_el->(
'p',
[
$_text->("Introducing a link - "),
$_el->(
'a',
{ 'href' => 'http://www.wikipedia.org/', },
[
$_text->("Link to Wikipedia"),
],
),
$_text->(". We hope you enjoyed it."),
],
),
$_el->(
'p',
[
$_el->(
'img',
{ 'src' => 'http://example.com/non-exist.png',
'alt' => 'non-existing image',
},
[],
),
],
),
$_el->(
'ol',
[
$_el->(
'li',
[
$_el->(
'p',
[
$_text->("First item."),
],
),
],
),
$_el->(
'li',
[
$_el->(
'p',
[
$_text->("Second item."),
],
),
],
),
],
),
],
),
],
);
$doc->setDocumentElement( $html );
print $doc->toStringHTML();
}
=head1 COPYRIGHT & LICENSE
Copyright 2016 by Shlomi Fish
This program is distributed under the MIT (X11) License:
L<http://www.opensource.org/licenses/mit-license.php>
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
files (the "Software"), to deal in the Software without
restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
=cut

15
example/dromeds.xml Normal file
View File

@ -0,0 +1,15 @@
<?xml version="1.0" encoding="UTF-8"?>
<dromedaries>
<species name="Camel">
<humps>1 or 2</humps>
<disposition>Cranky</disposition>
</species>
<species name="Llama">
<humps>1 (sort of)</humps>
<disposition>Aloof</disposition>
</species>
<species name="Alpaca">
<humps>(see Llama)</humps>
<disposition>Friendly</disposition>
</species>
</dromedaries>

5
example/dtd.xml Normal file
View File

@ -0,0 +1,5 @@
<!DOCTYPE doc [
<!ELEMENT doc (#PCDATA)>
<!ENTITY foo " test ">
]>
<doc>This is a valid document &foo; !</doc>

5
example/enc2_latin2.html Normal file
View File

@ -0,0 +1,5 @@
<html>
<body>
<p><EFBFBD></p>
</body>
</html>

8
example/enc_latin2.html Normal file
View File

@ -0,0 +1,8 @@
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-2">
</head>
<body>
<p>ì¹èø</p>
</body>
</html>

2
example/ext_ent.dtd Normal file
View File

@ -0,0 +1,2 @@
<!ELEMENT doc (#PCDATA)>
<!ENTITY foo " test ">

4
example/ns.xml Normal file
View File

@ -0,0 +1,4 @@
<dromedaries xmlns="urn:camels" xmlns:mam="urn:mammals">
<species>Camelid</species>
<mam:legs xml:lang="en" yyy="zzz" a:xxx="foo" xmlns:a="urn:a">4</mam:legs>
</dromedaries>

15
example/test.dtd Normal file
View File

@ -0,0 +1,15 @@
<!ELEMENT article (pubData* , pubArticleID* , pubDate+ , pubName+ , section+ , memo* , headline1* , headline2* , byline* , lead* , rest* , graphics* , keywords* , caption*)>
<!ELEMENT pubData (#PCDATA)>
<!ELEMENT pubArticleID (#PCDATA)>
<!ELEMENT pubDate (#PCDATA)>
<!ELEMENT pubName (#PCDATA)>
<!ELEMENT section (#PCDATA)>
<!ELEMENT memo (#PCDATA)>
<!ELEMENT headline1 (#PCDATA)>
<!ELEMENT headline2 (#PCDATA)>
<!ELEMENT byline (#PCDATA)>
<!ELEMENT lead (#PCDATA)>
<!ELEMENT rest (#PCDATA)>
<!ELEMENT graphics (#PCDATA)>
<!ELEMENT keywords (#PCDATA)>
<!ELEMENT caption (#PCDATA)>

182
example/test.html Normal file
View File

@ -0,0 +1,182 @@
<HTML>
<HEAD>
<TITLE>XML::LibXML::Document - DOM Document Class</TITLE>
<LINK REV="made" HREF="mailto:root@updates.mandrakesoft.com">
</HEAD>
<BODY>
<!-- INDEX BEGIN -->
<UL>
<LI><A HREF="#NAME">NAME</A>
<LI><A HREF="#SYNOPSIS">SYNOPSIS</A>
<LI><A HREF="#DESCRIPTION">DESCRIPTION</A>
<UL>
<LI><A HREF="#Methods">Methods</A>
</UL>
<LI><A HREF="#SEE_ALSO">SEE ALSO</A>
<LI><A HREF="#VERSION">VERSION</A>
</UL>
<!-- INDEX END -->
<HR>
<P>
<H1><A NAME="NAME">NAME</A></H1>
<P>
XML::LibXML::Document - DOM Document Class
<P>
<HR>
<H1><A NAME="SYNOPSIS">SYNOPSIS</A></H1>
<P>
<PRE> use XML::LibXML::Document;
</PRE>
<P>
<PRE> $dom = XML::LibXML::Document-&gt;new( $version, $encoding );
$dom = XML::LibXML::Document-&gt;createDocument( $version, $encoding );
$strEncoding = $doc-&gt;getEncoding();
$strVersion = $doc-&gt;getVersion();
$docstring = $dom-&gt;toString([$format]);
$bool = $dom-&gt;is_valid();
$root = $dom-&gt;getDocumentElement($name, $namespace );
$dom-&gt;setDocumentElement( $root );
$element = $dom-&gt;createElement( $nodename );
$element = $dom-&gt;createElementNS( $namespaceURI, $qname );
$text = $dom-&gt;createTextNode( $content_text );
$comment = $dom-&gt;createComment( $comment_text );
$attrnode = $doc-&gt;createAttribute($name [,$value]);
$attrnode = $doc-&gt;createAttributeNS( namespaceURI, $name [,$value] );
$cdata = $dom-&gt;create( $cdata_content );
$document-&gt;importNode( $node [, $move] );
</PRE>
<P>
<HR>
<H1><A NAME="DESCRIPTION">DESCRIPTION</A></H1>
<P>
The Document Class is the result of a parsing process. But sometimes it is
necessary to create a Document from scratch. The DOM Document Class
provides functions that are conform to the DOM Core naming style. It
inherits all functions from <EM>XML::LibXML::Node</EM> as specified in DOM Level2. This enables to access the nodes beside the
root element on document level - a <EM>DTD</EM> for example. The support for these nodes is limited at the moment, so I
would recommend, not to use <EM>node</EM> functions on <EM>documents</EM>. It is suggested that one should always create a node not bound to any
document. There is no need of really including the node to the document,
but once the node is bound to a document, it is quite safe that all strings
have the correct encoding. If an unbound textnode with an iso encoded
string is created (e.g. with $CLASS-&gt;new()), the <EM>toString</EM> function may not return the expected result. This seems like a limitation
as long UTF8 encoding is assured. If iso encoded strings come into play it
is much safer to use the node creation functions of <STRONG>XML::LibXML::Document</STRONG>.
<P>
<HR>
<H2><A NAME="Methods">Methods</A></H2>
<DL>
<DT><STRONG><A NAME="item_new">new</A></STRONG><DD>
<P>
alias for <CODE>createDocument()</CODE>
<DT><STRONG><A NAME="item_createDocument">createDocument</A></STRONG><DD>
<P>
The constructor for the document class. As Parameter it takes the version
string and (optionally) the ecoding string. Simply calling <STRONG>createDocument</STRONG> will create the document:
<P>
<PRE>
&lt;?xml version=&quot;your version&quot; encoding=&quot;your encoding&quot;?&gt;
</PRE>
<P>
Both parameter are optional. The default value for <STRONG>$version</STRONG> is <EM>1.0</EM> , of course. If the <STRONG>$encoding</STRONG> parameter is not set, the encoding will be left unset, which means UTF8 is
implied (and set). The call of <STRONG>createDocument</STRONG> without any parameter will result the following code:
<P>
<PRE>
&lt;?xml version=&quot;1.0&quot;?&gt;
</PRE>
<DT><STRONG><A NAME="item_getEncoding">getEncoding</A></STRONG><DD>
<P>
returns the encoding string of the document
<DT><STRONG><A NAME="item_getVersion">getVersion</A></STRONG><DD>
<P>
returns the version string of the document
<DT><STRONG><A NAME="item_toString">toString</A></STRONG><DD>
<P>
<STRONG>toString</STRONG> is a deparsing function, so the DOM Tree can be translated into a string,
ready for output. The optional <STRONG>$format</STRONG> parameter sets the indenting of the output. This parameter is expected to
be an <EM>integer</EM> value, that specifies the number of linebreaks for each node. For more
information about the formatted output check the documentation of <EM>xmlDocDumpFormatMemory</EM> in <EM>libxml2/tree.h</EM> .
<DT><STRONG><A NAME="item_is_valid">is_valid</A></STRONG><DD>
<P>
Returns either TRUE or FALSE depending on the DOM Tree is a valid Document
or not.
<DT><STRONG><A NAME="item_getDocumentElement">getDocumentElement</A></STRONG><DD>
<P>
Returns the root element of the Document. A document can have just one root
element to contain the documents data.
<DT><STRONG><A NAME="item_setDocumentElement">setDocumentElement</A></STRONG><DD>
<P>
This function enables you to set the root element for a document. The
function supports the import of a node from a different document tree.
<DT><STRONG><A NAME="item_createElement">createElement</A></STRONG><DD>
<P>
This function creates a new Element Node bound to the DOM with the name <EM>$nodename</EM> .
<DT><STRONG><A NAME="item_createElementNS">createElementNS</A></STRONG><DD>
<P>
This function creates a new Element Node bound to the DOM with the name <EM>$nodename</EM> and placed in the given namespace.
<DT><STRONG><A NAME="item_createTextNode">createTextNode</A></STRONG><DD>
<P>
As an equivalent of <STRONG>createElement</STRONG> , but it creates a <STRONG>Text Node</STRONG> bound to the DOM.
<DT><STRONG><A NAME="item_createComment">createComment</A></STRONG><DD>
<P>
As an equivalent of <STRONG>createElement</STRONG> , but it creates a <STRONG>Comment Node</STRONG> bound to the DOM.
<DT><STRONG><A NAME="item_createAttribute">createAttribute</A></STRONG><DD>
<P>
Creates a new Attribute node. This function is rather useless at the
moment, since there is no setAttributeNode function defined in <EM>XML::LibXML::Element</EM> , yet.
<DT><STRONG><A NAME="item_createAttributeNS">createAttributeNS</A></STRONG><DD>
<P>
Creates an Attribute bound to a namespace.
<DT><STRONG><A NAME="item_createCDATASection">createCDATASection</A></STRONG><DD>
<P>
Similar to createTextNode and createComment, this function creates a
CDataSection bound to the current DOM.
<DT><STRONG><A NAME="item_importNode">importNode</A></STRONG><DD>
<P>
If a node is not part of a document, it can be imported to another
document. As specified in DOM Level 2 Specification the Node will not be
altered or removed from its original document by default. ( <EM>$node-</EM><CODE>cloneNode(1)&gt;</CODE> will get called implicitly). Sometimes it is
necessary to <EM>move</EM> a node between documents. In such a case the node will not be copied, but
removed from the original document.
</DL>
<P>
<HR>
<H1><A NAME="SEE_ALSO">SEE ALSO</A></H1>
<P>
XML::LibXML, XML::LibXML::Element, XML::LibXML::Text, XML::LibXML::Attr,
XML::LibXML::Comment
<P>
<HR>
<H1><A NAME="VERSION">VERSION</A></H1>
<P>
0.90_a
</BODY>
</HTML>

109
example/test.xhtml Normal file
View File

@ -0,0 +1,109 @@
<?xml version="1.0" standalone="yes"?>
<html><head><title>XML::LibXML::Document - DOM Document Class</title><link rev="made" href="mailto:root@updates.mandrakesoft.com"/></head><body><!-- INDEX BEGIN --><ul><li><a href="#NAME">NAME</a></li><li><a href="#SYNOPSIS">SYNOPSIS</a></li><li><a href="#DESCRIPTION">DESCRIPTION</a><ul><li><a href="#Methods">Methods</a></li></ul></li><li><a href="#SEE_ALSO">SEE ALSO</a></li><li><a href="#VERSION">VERSION</a></li></ul><!-- INDEX END --><hr/><p/><h1><a name="NAME">NAME</a></h1><p>
XML::LibXML::Document - DOM Document Class
</p><p/><hr/><h1><a name="SYNOPSIS">SYNOPSIS</a></h1><p/><pre> use XML::LibXML::Document;
</pre><p/><pre> $dom = XML::LibXML::Document-&gt;new( $version, $encoding );
$dom = XML::LibXML::Document-&gt;createDocument( $version, $encoding );
$strEncoding = $doc-&gt;getEncoding();
$strVersion = $doc-&gt;getVersion();
$docstring = $dom-&gt;toString([$format]);
$bool = $dom-&gt;is_valid();
$root = $dom-&gt;getDocumentElement($name, $namespace );
$dom-&gt;setDocumentElement( $root );
$element = $dom-&gt;createElement( $nodename );
$element = $dom-&gt;createElementNS( $namespaceURI, $qname );
$text = $dom-&gt;createTextNode( $content_text );
$comment = $dom-&gt;createComment( $comment_text );
$attrnode = $doc-&gt;createAttribute($name [,$value]);
$attrnode = $doc-&gt;createAttributeNS( namespaceURI, $name [,$value] );
$cdata = $dom-&gt;create( $cdata_content );
$document-&gt;importNode( $node [, $move] );
</pre><p/><hr/><h1><a name="DESCRIPTION">DESCRIPTION</a></h1><p>
The Document Class is the result of a parsing process. But sometimes it is
necessary to create a Document from scratch. The DOM Document Class
provides functions that are conform to the DOM Core naming style. It
inherits all functions from <em>XML::LibXML::Node</em> as specified in DOM Level2. This enables to access the nodes beside the
root element on document level - a <em>DTD</em> for example. The support for these nodes is limited at the moment, so I
would recommend, not to use <em>node</em> functions on <em>documents</em>. It is suggested that one should always create a node not bound to any
document. There is no need of really including the node to the document,
but once the node is bound to a document, it is quite safe that all strings
have the correct encoding. If an unbound textnode with an iso encoded
string is created (e.g. with $CLASS-&gt;new()), the <em>toString</em> function may not return the expected result. This seems like a limitation
as long UTF8 encoding is assured. If iso encoded strings come into play it
is much safer to use the node creation functions of <strong>XML::LibXML::Document</strong>.
</p><p/><hr/><h2><a name="Methods">Methods</a></h2><dl><dt><strong><a name="item_new">new</a></strong></dt><dd><p>
alias for <code>createDocument()</code></p></dd><dt><strong><a name="item_createDocument">createDocument</a></strong></dt><dd><p>
The constructor for the document class. As Parameter it takes the version
string and (optionally) the ecoding string. Simply calling <strong>createDocument</strong> will create the document:
</p><p/><pre>
&lt;?xml version=&quot;your version&quot; encoding=&quot;your encoding&quot;?&gt;
</pre><p>
Both parameter are optional. The default value for <strong>$version</strong> is <em>1.0</em> , of course. If the <strong>$encoding</strong> parameter is not set, the encoding will be left unset, which means UTF8 is
implied (and set). The call of <strong>createDocument</strong> without any parameter will result the following code:
</p><p/><pre>
&lt;?xml version=&quot;1.0&quot;?&gt;
</pre></dd><dt><strong><a name="item_getEncoding">getEncoding</a></strong></dt><dd><p>
returns the encoding string of the document
</p></dd><dt><strong><a name="item_getVersion">getVersion</a></strong></dt><dd><p>
returns the version string of the document
</p></dd><dt><strong><a name="item_toString">toString</a></strong></dt><dd><p><strong>toString</strong> is a deparsing function, so the DOM Tree can be translated into a string,
ready for output. The optional <strong>$format</strong> parameter sets the indenting of the output. This parameter is expected to
be an <em>integer</em> value, that specifies the number of linebreaks for each node. For more
information about the formatted output check the documentation of <em>xmlDocDumpFormatMemory</em> in <em>libxml2/tree.h</em> .
</p></dd><dt><strong><a name="item_is_valid">is_valid</a></strong></dt><dd><p>
Returns either TRUE or FALSE depending on the DOM Tree is a valid Document
or not.
</p></dd><dt><strong><a name="item_getDocumentElement">getDocumentElement</a></strong></dt><dd><p>
Returns the root element of the Document. A document can have just one root
element to contain the documents data.
</p></dd><dt><strong><a name="item_setDocumentElement">setDocumentElement</a></strong></dt><dd><p>
This function enables you to set the root element for a document. The
function supports the import of a node from a different document tree.
</p></dd><dt><strong><a name="item_createElement">createElement</a></strong></dt><dd><p>
This function creates a new Element Node bound to the DOM with the name <em>$nodename</em> .
</p></dd><dt><strong><a name="item_createElementNS">createElementNS</a></strong></dt><dd><p>
This function creates a new Element Node bound to the DOM with the name <em>$nodename</em> and placed in the given namespace.
</p></dd><dt><strong><a name="item_createTextNode">createTextNode</a></strong></dt><dd><p>
As an equivalent of <strong>createElement</strong> , but it creates a <strong>Text Node</strong> bound to the DOM.
</p></dd><dt><strong><a name="item_createComment">createComment</a></strong></dt><dd><p>
As an equivalent of <strong>createElement</strong> , but it creates a <strong>Comment Node</strong> bound to the DOM.
</p></dd><dt><strong><a name="item_createAttribute">createAttribute</a></strong></dt><dd><p>
Creates a new Attribute node. This function is rather useless at the
moment, since there is no setAttributeNode function defined in <em>XML::LibXML::Element</em> , yet.
</p></dd><dt><strong><a name="item_createAttributeNS">createAttributeNS</a></strong></dt><dd><p>
Creates an Attribute bound to a namespace.
</p></dd><dt><strong><a name="item_createCDATASection">createCDATASection</a></strong></dt><dd><p>
Similar to createTextNode and createComment, this function creates a
CDataSection bound to the current DOM.
</p></dd><dt><strong><a name="item_importNode">importNode</a></strong></dt><dd><p>
If a node is not part of a document, it can be imported to another
document. As specified in DOM Level 2 Specification the Node will not be
altered or removed from its original document by default. ( <em>$node-</em><code>cloneNode(1)&gt;</code> will get called implicitly). Sometimes it is
necessary to <em>move</em> a node between documents. In such a case the node will not be copied, but
removed from the original document.
</p></dd></dl><p/><hr/><h1><a name="SEE_ALSO">SEE ALSO</a></h1><p>
XML::LibXML, XML::LibXML::Element, XML::LibXML::Text, XML::LibXML::Attr,
XML::LibXML::Comment
</p><p/><hr/><h1><a name="VERSION">VERSION</a></h1><p>
0.90_a
</p></body></html>

6
example/test.xml Normal file
View File

@ -0,0 +1,6 @@
<x xmlns:xinclude="http://www.w3.org/2001/XInclude">
<xml>
test
<xinclude:include href="test2.xml"/>
</xml>
</x>

1
example/test2.xml Normal file
View File

@ -0,0 +1 @@
<xsl>..</xsl>

3
example/test3.xml Normal file
View File

@ -0,0 +1,3 @@
<a>
<b/>
</a>

6
example/test4.xml Normal file
View File

@ -0,0 +1,6 @@
<x xmlns:xinclude="http://www.w3.org/2001/XInclude">
<xml>
test 4
<xinclude:include href="test2.xml"/>
</xml>
</x>

29
example/thedieline.rss Normal file
View File

@ -0,0 +1,29 @@
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:thr="http://purl.org/syndication/thread/1.0">
<title>TheDieline.com: Package Design</title>
<link rel="self" type="application/atom+xml" href="http://www.thedieline.com/blog/atom.xml" />
<link rel="hub" href="http://hubbub.api.typepad.com/" />
<link rel="alternate" type="text/html" href="http://www.thedieline.com/blog/" />
<id>tag:typepad.com,2003:weblog-611821</id>
<updated>2011-06-15T11:03:00-07:00</updated>
<subtitle>The World's #1 Package Design Website</subtitle>
<generator uri="http://www.typepad.com/">TypePad</generator>
<entry>
<title>Ginja d&#39; Óbidos</title>
<link rel="alternate" type="text/html" href="http://feeds.feedblitz.com/~/15124587/1ir2jk/thedieline~Ginja-d-%c3%93bidos.html" />
<link rel="replies" type="text/html" href="http://feeds.feedblitz.com/~/15124587/1ir2jk/thedieline~Ginja-d-%c3%93bidos.html" thr:count="2" thr:updated="2010-06-25T13:08:23-07:00" />
<id>tag:typepad.com,2003:post-6a00d8345250f069e20133f1a9824b970b</id>
<published>2010-06-25T10:00:00-07:00</published>
<updated>2010-06-24T22:16:07-07:00</updated>
<author>
<name>The Dieline</name>
</author>
<category scheme="http://www.sixapart.com/ns/types#category" term="Industry: Wine &amp; Spirits" />
<category scheme="http://www.sixapart.com/ns/types#category" term="Substrate: Glass Bottle" />
<content type="html" xml:lang="en-US" xml:base="http://www.thedieline.com/blog/">&lt;font color=&quot;#000000&quot; &gt;&lt;p&gt;&lt;Img align=&quot;left&quot; border=&quot;0&quot; height=&quot;1&quot; width=&quot;1&quot; style=&quot;border:0;float:left;margin:0;&quot; vspace=&quot;0&quot; hspace=&quot;0&quot; src=&quot;http://feeds.feedblitz.com/~/i/15124587/1ir2jk/thedieline&quot;&gt;&lt;div xmlns=&quot;http://www.w3.org/1999/xhtml&quot;&gt;&lt;p&gt;&lt;a href=&quot;http://feeds.feedblitz.com/~/t/0/1ir2jk/thedieline/~http://www.thedieline.com/.a/6a00d8345250f069e20133f1a97caa970b-popup&quot; onclick=&quot;window.open( this.href, &amp;#39;_blank&amp;#39;, &amp;#39;width=640,height=480,scrollbars=no,resizable=no,toolbar=no,directories=no,location=no,menubar=no,status=no,left=0,top=0&amp;#39; ); return false&quot; style=&quot;display: inline;&quot;&gt;&lt;img alt=&quot;1&quot; class=&quot;asset asset-image at-xid-6a00d8345250f069e20133f1a97caa970b &quot; src=&quot;http://www.thedieline.com/.a/6a00d8345250f069e20133f1a97caa970b-550wi&quot; style=&quot;width: 540px; &quot; &gt;&lt;/a&gt; &lt;br&gt;Lisbon based &lt;a href=&quot;http://feeds.feedblitz.com/~/t/0/1ir2jk/thedieline/~http://&quot;&gt;&lt;/a&gt;&lt;a href=&quot;http://feeds.feedblitz.com/~/t/0/1ir2jk/thedieline/~http://www.ntgj.org/&quot; target=&quot;_blank&quot;&gt;NT.GJ&lt;/a&gt; designed this cherry liqueur concept which features actual cherries within...&lt;/div&gt;&lt;p&gt;&lt;a href=&quot;http://feeds.feedblitz.com/~/15124587/1ir2jk/thedieline&quot;&gt;CLICK HERE to read the rest of the post...&lt;/a&gt; &lt;!-- _!fbztxtlnk!_ http://feeds.feedblitz.com/~/15124587/1ir2jk/thedieline --&gt;&amp;raquo;&lt;/p&gt;&lt;/font&gt;&lt;p&gt;&lt;div style=&quot;clear:both;&quot;&gt;&lt;em&gt;(Want to see more packaging? Visit &lt;a href=&quot;http://www.TheDieline.com&quot;&gt;TheDieline.com&lt;/a&gt;!)&lt;/em&gt;&lt;p&gt;&lt;/div&gt;&lt;/p&gt;
&lt;div style=&quot;clear:both;&quot;&gt;&lt;a title=&quot;Tweet with Bit.ly&quot; href=&quot;http://bit.ly/?v=3&amp;ref=feedblitz&amp;u=http%3a%2f%2fwww.thedieline.com%2fblog%2f2010%2f06%2fginja-d-%25c3%25b3bidos.html&amp;t=Ginja+d%26%2339%3b+Ã&ldquo;bidos&quot;&gt;&lt;img height=&quot;16&quot; border=&quot;0&quot; src=&quot;http://assets.feedblitz.com/images/icons/bitly.jpg&quot;&gt;&lt;/a&gt; &lt;a title=&quot;Add to Delicious&quot; href=&quot;http://delicious.com/post?url=http%3a%2f%2fwww.thedieline.com%2fblog%2f2010%2f06%2fginja-d-%25c3%25b3bidos.html&amp;title=Ginja+d%26%2339%3b+Ã&ldquo;bidos&quot;&gt;&lt;img height=&quot;16&quot; border=&quot;0&quot; src=&quot;http://assets.feedblitz.com/images/icons/delicious.jpg&quot;&gt;&lt;/a&gt; &lt;a title=&quot;Digg This&quot; href=&quot;http://digg.com/submit?phase=2&amp;url=http%3a%2f%2fwww.thedieline.com%2fblog%2f2010%2f06%2fginja-d-%25c3%25b3bidos.html&amp;title=Ginja+d%26%2339%3b+Ã&ldquo;bidos&quot;&gt;&lt;img height=&quot;16&quot; border=&quot;0&quot; src=&quot;http://assets.feedblitz.com/images/icons/digg.jpg&quot;&gt;&lt;/a&gt; &lt;a title=&quot;Add to FaceBook&quot; href=&quot;http://facebook.com/share.php?u=http%3a%2f%2fwww.thedieline.com%2fblog%2f2010%2f06%2fginja-d-%25c3%25b3bidos.html&amp;t=Ginja+d%26%2339%3b+Ã&ldquo;bidos&quot;&gt;&lt;img height=&quot;16&quot; border=&quot;0&quot; src=&quot;http://assets.feedblitz.com/images/icons/facebook.jpg&quot;&gt;&lt;/a&gt; &lt;a title=&quot;Add to Google Bookmarks&quot; href=&quot;http://google.com/bookmarks/mark?op=edit&amp;bkmk=http%3a%2f%2fwww.thedieline.com%2fblog%2f2010%2f06%2fginja-d-%25c3%25b3bidos.html&amp;title=Ginja+d%26%2339%3b+Ã&ldquo;bidos&quot;&gt;&lt;img height=&quot;16&quot; border=&quot;0&quot; src=&quot;http://assets.feedblitz.com/images/icons/google.jpg&quot;&gt;&lt;/a&gt; &lt;a title=&quot;Stumble This&quot; href=&quot;http://stumbleupon.com/submit?url=http%3a%2f%2fwww.thedieline.com%2fblog%2f2010%2f06%2fginja-d-%25c3%25b3bidos.html&amp;title=Ginja+d%26%2339%3b+Ã&ldquo;bidos&quot;&gt;&lt;img height=&quot;16&quot; border=&quot;0&quot; src=&quot;http://assets.feedblitz.com/images/icons/stumble.jpg&quot;&gt;&lt;/a&gt; &lt;a title=&quot;Tweet This&quot; href=&quot;http://twitter.com/home?status=Ginja+d%26%2339%3b+Ã&ldquo;bidos+http%3a%2f%2fwww.thedieline.com%2fblog%2f2010%2f06%2fginja-d-%25c3%25b3bidos.html&quot;&gt;&lt;img height=&quot;16&quot; border=&quot;0&quot; src=&quot;http://assets.feedblitz.com/images/icons/twitter.jpg&quot;&gt;&lt;/a&gt; &lt;a title=&quot;Subscribe by RSS&quot; href=&quot;http://feeds.feedblitz.com/thedieline&quot;&gt;&lt;img height=&quot;16&quot; border=&quot;0&quot; src=&quot;http://assets.feedblitz.com/images/rss.gif&quot;&gt;&lt;/a&gt; &lt;a title=&quot;View Comments&quot; href=&quot;http://www.thedieline.com/blog/2010/06/ginja-d-%C3%B3bidos.html&quot;&gt;&lt;img height=16 border=0 src=&quot;http://assets.feedblitz.com/images/comment.png&quot;&gt;&lt;/a&gt; &lt;/div&gt;</content></entry></feed>

BIN
example/utf-16-1.html Normal file

Binary file not shown.

BIN
example/utf-16-2.html Normal file

Binary file not shown.

BIN
example/utf-16-2.xml Normal file

Binary file not shown.

551
example/xmllibxmldocs.pl Normal file
View File

@ -0,0 +1,551 @@
#!/usr/bin/perl -w
use strict;
use XML::LibXML;
use IO::File;
# ------------------------------------------------------------------------- #
# (c) 2003 christian p. glahn
# ------------------------------------------------------------------------- #
# ------------------------------------------------------------------------- #
# This is an example how to use the DOM interface of XML::LibXML The
# script reads a XML File with a module specification. If the module
# contains several classes, the script fetches them and stores the
# data into different POD Files.
#
# Note this is just an example, to demonstrate how XML::LibXML works.
# The code works for the XML::LibXML documentation, but may not work
# for any other docbook file.
#
# If you are interested what the results are, check the README and the POD
# files shipped with XML::LibXML.
# ------------------------------------------------------------------------- #
# ------------------------------------------------------------------------- #
# SYNOPSIS:
# xmllibxmldocs.pl $dokbook_file $targetdir
#
my $srcfile = shift @ARGV;
my $targetdir = shift @ARGV;
unless ( $targetdir =~ /\/$/ ) {
$targetdir .= "/";
}
# ------------------------------------------------------------------------- #
#
# ------------------------------------------------------------------------- #
# init the parser
my $parser = XML::LibXML->new();
$parser->load_ext_dtd(0);
$parser->keep_blanks(0);
# ------------------------------------------------------------------------- #
#
# ------------------------------------------------------------------------- #
# load the document into memory.
my $doc = $parser->parse_file( $srcfile );
# ------------------------------------------------------------------------- #
#
# ------------------------------------------------------------------------- #
# good implementations would use XSLT to convert a docbook to any other
# text format. Since the module does not presume libxslt installed, we
# have to do the dirty job.
my $ch = ChapterHandler->new($targetdir);
# ------------------------------------------------------------------------- #
# init the common parts in all pods
my ( $bookinfo ) = $doc->findnodes( "//bookinfo" );
$ch->set_general_info( $bookinfo );
# ------------------------------------------------------------------------- #
# ------------------------------------------------------------------------- #
# then process each chapter of the XML::LibXML book
my @chapters = $doc->findnodes( "//chapter" );
foreach my $chap ( @chapters ) {
$ch->handle( $chap );
}
# ------------------------------------------------------------------------- #
# ------------------------------------------------------------------------- #
# ------------------------------------------------------------------------- #
# the class to process our dokbook file
# ------------------------------------------------------------------------- #
package ChapterHandler;
use XML::LibXML;
# ------------------------------------------------------------------------- #
# the constructor
# ------------------------------------------------------------------------- #
sub new{
my $class = shift;
my $dir = shift;
my $self = bless {directory => $dir}, $class;
return $self;
}
# ------------------------------------------------------------------------- #
# ------------------------------------------------------------------------- #
# set_general_info
# ------------------------------------------------------------------------- #
# processes the bookinfo tag of XML::LibXML to extract common information such
# as version or copyright information
sub set_general_info {
my $self = shift;
my $infonode = shift;
return unless defined $infonode;
my $infostr = "=head1 AUTHORS\n\n";
my @authors = $infonode->findnodes( "authorgroup/author" );
foreach my $author ( @authors ) {
my ( $node_fn ) = $author->getChildrenByTagName( "firstname" );
my ( $node_sn ) = $author->getChildrenByTagName( "surname" );
if ( defined $node_fn ) {
$infostr .= $node_fn->string_value();
}
if ( defined $node_sn ) {
$infostr .= " ". $node_sn->string_value();
}
if ( defined $author->nextSibling() ) {
$infostr .= ", \n";
}
else {
$infostr .= "\n\n";
}
}
my ( $version ) = $infonode->findnodes( "edition" );
if ( defined $version ) {
$infostr .= "\n=head1 VERSION\n\n" . $version->string_value() . "\n\n";
}
my @copyright = $infonode->findnodes( "copyright" );
if ( @copyright ) {
$infostr .= "=head1 COPYRIGHT\n\n";
foreach my $copyright (@copyright) {
my $node_y = $copyright->getChildrenByTagName( "year" );
my $node_h = $copyright->getChildrenByTagName( "holder" );
if ( defined $node_y ) {
$infostr .= $node_y->string_value() . ", ";
}
if ( defined $node_h ) {
$infostr .= $node_h->string_value();
}
$infostr .= ".\n\n";
}
$infostr .= "=cut\n";
$infostr .= "\n\n".<<'EOF';
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.
EOF
}
$self->{infoblock} = $infostr;
}
# ------------------------------------------------------------------------- #
# handle
# ------------------------------------------------------------------------- #
# This function opens the output file and decides how the chapter is
# processed
sub handle {
my $self = shift;
my $chapter = shift;
my ( $abbr ) = $chapter->findnodes( "titleabbrev" );
if ( defined $abbr ) {
# create a new file.
my $filename = $abbr->string_value();
$filename =~ s/^\s*|\s*$//g;
my $dir = $self->{directory};
$filename =~ s/XML\:\:LibXML//g;
$filename =~ s/^-|^\:\://g; # remove the first colon or minus.
$filename =~ s/\:\:/\//g; # transform remaining colons to paths.
# the previous statement should work for existing modules. This could be
# dangerous for nested modules, which do not exist at the time of writing
# this code.
unless ( length $filename ) {
$dir = "";
$filename = "LibXML";
}
if ( $filename ne "README" and $filename ne "LICENSE" ) {
$filename .= ".pod";
}
else {
$dir = "";
}
$self->{OFILE} = IO::File->new();
$self->{OFILE}->open(">".$dir.$filename);
if ( $abbr->string_value() eq "README"
or $abbr->string_value() eq "LICENSE" ) {
# Text only chapters in the documentation
$self->dump_text( $chapter );
}
else {
# print header
# print synopsis
# process the information itself
# dump the info block
$self->dump_pod( $chapter );
$self->{OFILE}->print( $self->{infoblock} );
}
# close the file
$self->{OFILE}->close();
# Strip trailing space.
my $text = _slurp($dir.$filename);
$text =~ s/[ \t]+$//gms;
open my $out, '>', $dir.$filename
or die "Cannot open $dir$filename for writing.";
print {$out} $text;
close ($out);
}
}
sub _slurp
{
my $filename = shift;
open my $in, '<', $filename
or die "Cannot open '$filename' for slurping - $!";
local $/;
my $contents = <$in>;
close($in);
return $contents;
}
# ------------------------------------------------------------------------- #
# dump_text
# ------------------------------------------------------------------------- #
# convert the chapter into a textfile, such as README.
sub dump_text {
my $self = shift;
my $chap = shift;
if ( $chap->nodeName() eq "chapter" ) {
my ( $title ) = $chap->getChildrenByTagName( "title" );
my $str = $title->string_value();
my $len = length $str;
$self->{OFILE}->print( uc($str) . "\n" );
$self->{OFILE}->print( "=" x $len );
$self->{OFILE}->print( "\n\n" );
}
foreach my $node ( $chap->childNodes() ) {
if ( $node->nodeName() eq "para" ) {
# we split at the last whitespace before 80 chars
my $string = $node->string_value();
my $os = "";
my @words = split /\s+/, $string;
foreach my $word ( @words ) {
if ( (length( $os ) + length( $word ) + 1) < 80 ) {
if ( length $os ) { $os .= " "; }
$os .= $word;
}
else {
$self->{OFILE}->print( $os . "\n" );
$os = $word;
}
}
$self->{OFILE}->print( $os );
$self->{OFILE}->print( "\n\n" );
}
elsif ( $node->nodeName() eq "sect1" ) {
my ( $title ) = $node->getChildrenByTagName( "title" );
my $str = $title->string_value();
my $len = length $str;
$self->{OFILE}->print( "\n" . uc($str) . "\n" );
$self->{OFILE}->print( "=" x $len );
$self->{OFILE}->print( "\n\n" );
$self->dump_text( $node );
}
elsif ( $node->nodeName() eq "sect2" ) {
my ( $title ) = $node->getChildrenByTagName( "title" );
my $str = $title->string_value();
my $len = length $str;
$self->{OFILE}->print( "\n" . $str . "\n" );
$self->{OFILE}->print( "=" x $len );
$self->{OFILE}->print( "\n\n" );
$self->dump_text( $node );
}
elsif ( $node->nodeName() eq "itemizedlist" ) {
my @items = $node->findnodes( "listitem" );
my $sp= " ";
foreach my $item ( @items ) {
$self->{OFILE}->print( "$sp o " );
my $str = $item->string_value();
$str =~ s/^\s*|\s*$//g;
$self->{OFILE}->print( $str );
$self->{OFILE}->print( "\n" );
}
$self->{OFILE}->print( "\n" );
}
elsif ( $node->nodeName() eq "orderedlist" ) {
my @items = $node->findnodes( "listitem" );
my $i = 0;
my $sp= " ";
foreach my $item ( @items ) {
$i++;
$self->{OFILE}->print( "$sp $i " );
my $str = $item->string_value();
$str =~ s/^\s*|\s*$//g;
$self->{OFILE}->print( $str );
$self->{OFILE}->print( "\n" );
}
$self->{OFILE}->print( "\n" );
}
elsif ( $node->nodeName() eq "programlisting" ) {
my $str = $node->string_value();
$str =~ s/\n/\n> /g;
$self->{OFILE}->print( "> ". $str );
$self->{OFILE}->print( "\n\n" );
}
}
}
# ------------------------------------------------------------------------- #
# dump_pod
# ------------------------------------------------------------------------- #
# This method is used to create the real POD files for XML::LibXML. It is not
# too sophisticated, but it already does quite a good job.
sub dump_pod {
my $self = shift;
my $chap = shift;
if ( $chap->nodeName() eq "chapter" ) {
my ( $title ) = $chap->getChildrenByTagName( "title" );
my ( $ttlabbr ) = $chap->getChildrenByTagName( "titleabbrev" );
my $str = $ttlabbr->string_value() . " - ".$title->string_value();
$str=~s/^\s+|\s+$//g;
$self->{OFILE}->print( "=head1 NAME\n\n$str\n" );
my ($synopsis) = $chap->findnodes( "sect1[title='Synopsis']" );
my @funcs = $chap->findnodes( ".//funcsynopsis" );
if ($synopsis or scalar @funcs) {
$self->{OFILE}->print( "\n=head1 SYNOPSIS\n\n" )
}
if ($synopsis) {
$self->dump_pod( $synopsis );
}
if ( scalar @funcs ) {
foreach my $s ( @funcs ) {
$self->dump_pod( $s );
}
# $self->{OFILE}->print( "\n\n=head1 DESCRIPTION\n\n" );
}
}
foreach my $node ( $chap->childNodes() ) {
if ( $node->nodeType == XML_TEXT_NODE ||
$node->nodeType == XML_CDATA_SECTION_NODE ) {
# we split at the last whitespace before 80 chars
my $prev_inline =
($node->previousSibling and
$node->previousSibling->nodeName !~
/^(?:itemizedlist|orderedlist|variablelist|programlisting|funcsynopsis)/)
? 1 : 0;
my $str = $node->data();
$str=~s/(^|\n)[ \t]+($|\n)/$1$2/g;
if ($str=~/\S/) {
my $string = $str;
my $space_before = ($string =~ s/^\s+//g) ? $prev_inline : 0;
my $space_after = ($string =~ s/\s+$//g) ? 1 : 0;
$self->{OFILE}->print( " " ) if $space_before;
my $os = "";
my @words = split /\s+/, $string;
foreach my $word ( @words ) {
if ( (length( $os ) + length( $word ) + 1) < 80 ) {
if ( length $os ) { $os .= " "; }
$os .= $word;
}
else {
$self->{OFILE}->print( $os . "\n" );
$os = $word;
}
}
$os.=" " if $space_after;
$self->{OFILE}->print( $os );
}
} elsif ( $node->nodeName() eq "para" ) {
$self->dump_pod( $node );
$self->{OFILE}->print( "\n\n" );
} elsif ( $node->nodeName() eq "sect1" ) {
my ( $title ) = $node->getChildrenByTagName( "title" );
my $str = $title->string_value();
unless ($chap->nodeName eq "chapter" and $str eq 'Synopsis') {
$self->{OFILE}->print( "\n=head1 " . uc($str) );
$self->{OFILE}->print( "\n\n" );
$self->dump_pod( $node );
}
}
elsif ( $node->nodeName() eq "sect2" ) {
my ( $title ) = $node->getChildrenByTagName( "title" );
my $str = $title->string_value();
my $len = length $str;
$self->{OFILE}->print( "\n=head2 " . $str . "\n\n" );
$self->dump_pod( $node );
}
elsif ( $node->nodeName() eq "sect3" ) {
my ( $title ) = $node->getChildrenByTagName( "title" );
my $str = $title->string_value();
my $len = length $str;
$self->{OFILE}->print( "\n=head3 " . $str . "\n\n" );
$self->dump_pod( $node );
}
elsif ( $node->nodeName() eq "itemizedlist" ) {
my @items = $node->findnodes( "listitem" );
$self->{OFILE}->print( "\n=over 4\n\n" );
foreach my $item ( @items ) {
$self->{OFILE}->print( "=item *\n\n" );
$self->dump_pod( $item );
$self->{OFILE}->print( "\n\n" );
}
$self->{OFILE}->print( "=back\n\n" );
}
elsif ( $node->nodeName() eq "orderedlist" ) {
my @items = $node->findnodes( "listitem" );
my $i = 0;
$self->{OFILE}->print( "\n=over 4\n\n" );
foreach my $item ( @items ) {
$i++;
$self->{OFILE}->print( "=item $i.\n\n" );
$self->dump_pod($item);
$self->{OFILE}->print( "\n\n" );
}
$self->{OFILE}->print( "=back\n\n" );
}
elsif ( $node->nodeName() eq "variablelist" ) {
$self->{OFILE}->print( "=over 4\n\n" );
my @nodes = $node->findnodes( "varlistentry" );
$self->dump_pod( $node );
$self->{OFILE}->print( "\n=back\n\n" );
}
elsif ( $node->nodeName() eq "varlistentry" ) {
my ( $term ) = $node->findnodes( "term" );
$self->{OFILE}->print( "=item " );
if ( defined $term ) {
$self->dump_pod( $term );
}
$self->{OFILE}->print( "\n\n" );
my @nodes =$node->findnodes( "listitem" );
foreach my $it ( @nodes ) {
$self->dump_pod( $it );
}
$self->{OFILE}->print( "\n" );
}
elsif ( $node->nodeName() eq "programlisting" ) {
my $str = $node->string_value();
$str =~ s/^\s+|\s+$//g;
$str =~ s/\n/\n /g;
$str=~s/(^|\n)[ \t]+($|\n)/$1$2/g;
$self->{OFILE}->print( "\n\n" );
$self->{OFILE}->print( " ". $str );
$self->{OFILE}->print( "\n\n" );
}
elsif ( $node->nodeName() eq "funcsynopsis") {
if (($node->getAttribute('role')||'') ne 'synopsis') {
$self->dump_pod($node);
$self->{OFILE}->print( "\n" );
}
}
elsif( $node->nodeName() eq "funcsynopsisinfo" ) {
my $str = $node->string_value() ;
$str =~ s/\n/\n /g;
$self->{OFILE}->print( " $str\n" );
} elsif( $node->nodeName() eq "title" or
$node->nodeName() eq "titleabbrev"
) {
# IGNORE
} elsif( $node->nodeName() eq "emphasis" ) {
my $str = $node->string_value() ;
$str =~ s/\n/ /g;
$str = pod_escape($str);
$self->{OFILE}->print( "I<<<<<< $str >>>>>>" );
} elsif( $node->nodeName() eq "function" or
$node->nodeName() eq "email" or
$node->nodeName() eq "literal"
) {
my $str = $node->string_value() ;
$str =~ s/\n/ /g;
$str = pod_escape($str);
$self->{OFILE}->print( "C<<<<<< $str >>>>>>" );
} elsif( $node->nodeName() eq "ulink" ) {
my $str = $node->string_value() ;
my $url = $node->getAttribute('url');
$str =~ s/\n/ /g;
if ($str eq $url) {
$self->{OFILE}->print( "L<<<<<< $url >>>>>>" );
} else {
$self->{OFILE}->print( "$str (L<<<<<< $url >>>>>>)" );
}
} elsif( $node->nodeName() eq "xref" ) {
my $linkend = $node->getAttribute('linkend');
my ($target) = $node->findnodes(qq(//*[\@id="$linkend"]/titleabbrev));
($target) = $node->findnodes(qq(//*[\@id="$linkend"]/title)) unless $target;
if ($target) {
my $str = $target->string_value() ;
$str =~ s/\n/ /g;
$str = pod_escape($str);
$self->{OFILE}->print( "L<<<<<< $str >>>>>>" );
} else {
warn "WARNING: Didn't find any section with id='$linkend'\n";
$self->{OFILE}->print( "$linkend" );
}
} elsif( $node->nodeName() eq "olink" ) {
my $str = pod_escape($node->string_value());
my $url = $node->getAttribute('targetdoc');
if (!defined $url) {
warn $node->toString(1),"\n";
}
$str =~ s/\n/ /g;
if ($str eq $url) {
$self->{OFILE}->print( "L<<<<<< $url >>>>>>" );
} else {
$self->{OFILE}->print( "$str (L<<<<<< $url >>>>>>)" );
}
} else {
print STDERR "Ignoring ",$node->nodeName(),"\n";
$self->dump_pod($node);
}
}
}
sub pod_escape {
my ($str) = @_;
my %escapes = (
'>' => 'gt',
'<' => 'lt',
);
my $re = join('|', keys %escapes);
$str =~ s/($re)/E<$escapes{$1}>/g;
return $str;
}
1;

1
example/xmlns/badguy.xml Normal file
View File

@ -0,0 +1 @@
<A:B xmlns:A="http://D"><A:C xmlns:A="http://E"></A:C></A:B>

View File

@ -0,0 +1 @@
<A:B xmlns:A="http://D"><A:C xmlns:A="http://D"></A:C></A:B>

53
example/xpath.pl Normal file
View File

@ -0,0 +1,53 @@
#!/usr/bin/perl
# $Id$
use XML::LibXML;
use strict;
use warnings;
my $parser = XML::LibXML->new();
my $xpath = shift @ARGV;
if ( scalar @ARGV ) {
foreach ( @ARGV ) {
my $doc = $parser->parse_file( $_ );
my $result = $doc->find( $xpath );
handle_result( $result );
undef $doc;
}
}
else {
# read from std in
my @doc = <STDIN>;
my $string = join "", @doc;
my $doc = $parser->parse_string( $string );
my $result = $doc->find( $xpath );
exit handle_result( $result );
}
sub handle_result {
my $result = shift;
return 1 unless defined $result;
if ( $result->isa( 'XML::LibXML::NodeList' ) ) {
foreach ( @$result ) {
print $_->toString(1) , "\n";
}
return 0;
}
if ( $result->isa( 'XML::LibXML::Literal' ) ) {
print $result->value , "\n";
return 0;
}
if ( $result->isa( 'XML::LibXML::Boolean' ) ){
print $result->to_literal , "\n";
return 0;
}
return 1;
}

File diff suppressed because one or more lines are too long

141
lib/XML/LibXML/Attr.pod Normal file
View File

@ -0,0 +1,141 @@
=head1 NAME
XML::LibXML::Attr - XML::LibXML Attribute Class
=head1 SYNOPSIS
use XML::LibXML;
# Only methods specific to Attribute nodes are listed here,
# see the XML::LibXML::Node manpage for other methods
$attr = XML::LibXML::Attr->new($name [,$value]);
$string = $attr->getValue();
$string = $attr->value;
$attr->setValue( $string );
$node = $attr->getOwnerElement();
$attr->setNamespace($nsURI, $prefix);
$bool = $attr->isId;
$string = $attr->serializeContent;
=head1 DESCRIPTION
This is the interface to handle Attributes like ordinary nodes. The naming of
the class relies on the W3C DOM documentation.
=head1 METHODS
The class inherits from L<<<<<< XML::LibXML::Node >>>>>>. The documentation for Inherited methods is not listed here.
Many functions listed here are extensively documented in the DOM Level 3 specification (L<<<<<< http://www.w3.org/TR/DOM-Level-3-Core/ >>>>>>). Please refer to the specification for extensive documentation.
=over 4
=item new
$attr = XML::LibXML::Attr->new($name [,$value]);
Class constructor. If you need to work with ISO encoded strings, you should I<<<<<< always >>>>>> use the C<<<<<< createAttribute >>>>>> of L<<<<<< XML::LibXML::Document >>>>>>.
=item getValue
$string = $attr->getValue();
Returns the value stored for the attribute. If undef is returned, the attribute
has no value, which is different of being C<<<<<< not specified >>>>>>.
=item value
$string = $attr->value;
Alias for I<<<<<< getValue() >>>>>>
=item setValue
$attr->setValue( $string );
This is needed to set a new attribute value. If ISO encoded strings are passed
as parameter, the node has to be bound to a document, otherwise the encoding
might be done incorrectly.
=item getOwnerElement
$node = $attr->getOwnerElement();
returns the node the attribute belongs to. If the attribute is not bound to a
node, undef will be returned. Overwriting the underlying implementation, the I<<<<<< parentNode >>>>>> function will return undef, instead of the owner element.
=item setNamespace
$attr->setNamespace($nsURI, $prefix);
This function tries to bound the attribute to a given namespace. If C<<<<<< $nsURI >>>>>> is undefined or empty, the function discards any previous association of the
attribute with a namespace. If the namespace was not previously declared in the
context of the attribute, this function will fail. In this case you may wish to
call setNamespace() on the ownerElement. If the namespace URI is non-empty and
declared in the context of the attribute, but only with a different (non-empty)
prefix, then the attribute is still bound to the namespace but gets a different
prefix than C<<<<<< $prefix >>>>>>. The function also fails if the prefix is empty but the namespace URI is not
(because unprefixed attributes should by definition belong to no namespace).
This function returns 1 on success, 0 otherwise.
=item isId
$bool = $attr->isId;
Determine whether an attribute is of type ID. For documents with a DTD, this
information is only available if DTD loading/validation has been requested. For
HTML documents parsed with the HTML parser ID detection is done automatically.
In XML documents, all "xml:id" attributes are considered to be of type ID.
=item serializeContent($docencoding)
$string = $attr->serializeContent;
This function is not part of DOM API. It returns attribute content in the form
in which it serializes into XML, that is with all meta-characters properly
quoted and with raw entity references (except for entities expanded during
parse time). Setting the optional $docencoding flag to 1 enforces document
encoding for the output string (which is then passed to Perl as a byte string).
Otherwise the string is passed to Perl as (UTF-8 encoded) characters.
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0207
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

View File

@ -0,0 +1,215 @@
package XML::LibXML::AttributeHash;
use strict;
use warnings;
use Scalar::Util qw//;
use Tie::Hash;
our @ISA = qw/Tie::Hash/;
use vars qw($VERSION);
$VERSION = "2.0207"; # VERSION TEMPLATE: DO NOT CHANGE
BEGIN
{
*__HAS_WEAKEN = defined(&Scalar::Util::weaken)
? sub () { 1 }
: sub () { 0 };
};
sub element
{
return $_[0][0];
}
sub from_clark
{
my ($self, $str) = @_;
if ($str =~ m! \{ (.+) \} (.+) !x)
{
return ($1, $2);
}
return (undef, $str);
}
sub to_clark
{
my ($self, $ns, $local) = @_;
defined $ns ? "{$ns}$local" : $local;
}
sub all_keys
{
my ($self, @keys) = @_;
my $elem = $self->element;
foreach my $attr (defined($elem) ? $elem->attributes : ())
{
if (! $attr->isa('XML::LibXML::Namespace'))
{
push @keys, $self->to_clark($attr->namespaceURI, $attr->localname);
}
}
return sort @keys;
}
sub TIEHASH
{
my ($class, $element, %args) = @_;
my $self = bless [$element, undef, \%args], $class;
if (__HAS_WEAKEN and $args{weaken})
{
Scalar::Util::weaken( $self->[0] );
}
return $self;
}
sub STORE
{
my ($self, $key, $value) = @_;
my ($key_ns, $key_local) = $self->from_clark($key);
if (defined $key_ns)
{
return $self->element->setAttributeNS($key_ns, "xxx:$key_local", "$value");
}
else
{
return $self->element->setAttribute($key_local, "$value");
}
}
sub FETCH
{
my ($self, $key) = @_;
my ($key_ns, $key_local) = $self->from_clark($key);
if (defined $key_ns)
{
return $self->element->getAttributeNS($key_ns, "$key_local");
}
else
{
return $self->element->getAttribute($key_local);
}
}
sub EXISTS
{
my ($self, $key) = @_;
my ($key_ns, $key_local) = $self->from_clark($key);
if (defined $key_ns)
{
return $self->element->hasAttributeNS($key_ns, "$key_local");
}
else
{
return $self->element->hasAttribute($key_local);
}
}
sub DELETE
{
my ($self, $key) = @_;
my ($key_ns, $key_local) = $self->from_clark($key);
if (defined $key_ns)
{
return $self->element->removeAttributeNS($key_ns, "$key_local");
}
else
{
return $self->element->removeAttribute($key_local);
}
}
sub FIRSTKEY
{
my ($self) = @_;
my @keys = $self->all_keys;
$self->[1] = \@keys;
if (wantarray)
{
return ($keys[0], $self->FETCH($keys[0]));
}
$keys[0];
}
sub NEXTKEY
{
my ($self, $lastkey) = @_;
my @keys = defined $self->[1] ? @{ $self->[1] } : $self->all_keys;
my $found;
foreach my $k (@keys)
{
if ($k gt $lastkey)
{
$found = $k and last;
}
}
if (!defined $found)
{
$self->[1] = undef;
return;
}
if (wantarray)
{
return ($found, $self->FETCH($found));
}
return $found;
}
sub SCALAR
{
my ($self) = @_;
return $self->element;
}
sub CLEAR
{
my ($self) = @_;
foreach my $k ($self->all_keys)
{
$self->DELETE($k);
}
return $self;
}
__PACKAGE__
__END__
=head1 NAME
XML::LibXML::AttributeHash - tie an XML::LibXML::Element to a hash to access its attributes
=head1 SYNOPSIS
tie my %hash, 'XML::LibXML::AttributeHash', $element;
$hash{'href'} = 'http://example.com/';
print $element->getAttribute('href') . "\n";
=head1 DESCRIPTION
This class allows an element's attributes to be accessed as if they were a
plain old Perl hash. Attribute names become hash keys. Namespaced attributes
are keyed using Clark notation.
my $XLINK = 'http://www.w3.org/1999/xlink';
tie my %hash, 'XML::LibXML::AttributeHash', $element;
$hash{"{$XLINK}href"} = 'http://localhost/';
print $element->getAttributeNS($XLINK, 'href') . "\n";
There is rarely any need to use XML::LibXML::AttributeHash directly. In
general, it is possible to take advantage of XML::LibXML::Element's
overloading. The example in the SYNOPSIS could have been written:
$element->{'href'} = 'http://example.com/';
print $element->getAttribute('href') . "\n";
The tie interface allows the passing of additional arguments to
XML::LibXML::AttributeHash:
tie my %hash, 'XML::LibXML::AttributeHash', $element, %args;
Currently only one argument is supported, the boolean "weaken" which (if
true) indicates that the tied object's reference to the element should be
a weak reference. This is used by XML::LibXML::Element's overloading. The
"weaken" argument is ignored if you don't have a working Scalar::Util::weaken.

93
lib/XML/LibXML/Boolean.pm Normal file
View File

@ -0,0 +1,93 @@
# $Id$
#
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
package XML::LibXML::Boolean;
use XML::LibXML::Number;
use XML::LibXML::Literal;
use strict;
use warnings;
use vars qw ($VERSION);
$VERSION = "2.0207"; # VERSION TEMPLATE: DO NOT CHANGE
use overload
'""' => \&value,
'<=>' => \&cmp;
sub new {
my $class = shift;
my ($param) = @_;
my $val = $param ? 1 : 0;
bless \$val, $class;
}
sub True {
my $class = shift;
my $val = 1;
bless \$val, $class;
}
sub False {
my $class = shift;
my $val = 0;
bless \$val, $class;
}
sub value {
my $self = shift;
$$self;
}
sub cmp {
my $self = shift;
my ($other, $swap) = @_;
if ($swap) {
return $other <=> $$self;
}
return $$self <=> $other;
}
sub to_number { XML::LibXML::Number->new($_[0]->value); }
sub to_boolean { $_[0]; }
sub to_literal { XML::LibXML::Literal->new($_[0]->value ? "true" : "false"); }
sub string_value { return $_[0]->to_literal->value; }
1;
__END__
=head1 NAME
XML::LibXML::Boolean - Boolean true/false values
=head1 DESCRIPTION
XML::LibXML::Boolean objects implement simple boolean true/false objects.
=head1 API
=head2 XML::LibXML::Boolean->True
Creates a new Boolean object with a true value.
=head2 XML::LibXML::Boolean->False
Creates a new Boolean object with a false value.
=head2 value()
Returns true or false.
=head2 to_literal()
Returns the string "true" or "false".
=cut

View File

@ -0,0 +1,65 @@
=head1 NAME
XML::LibXML::CDATASection - XML::LibXML Class for CDATA Sections
=head1 SYNOPSIS
use XML::LibXML;
# Only methods specific to CDATA nodes are listed here,
# see the XML::LibXML::Node manpage for other methods
$node = XML::LibXML::CDATASection->new( $content );
=head1 DESCRIPTION
This class provides all functions of L<<<<<< XML::LibXML::Text >>>>>>, but for CDATA nodes.
=head1 METHODS
The class inherits from L<<<<<< XML::LibXML::Node >>>>>>. The documentation for Inherited methods is not listed here.
Many functions listed here are extensively documented in the DOM Level 3 specification (L<<<<<< http://www.w3.org/TR/DOM-Level-3-Core/ >>>>>>). Please refer to the specification for extensive documentation.
=over 4
=item new
$node = XML::LibXML::CDATASection->new( $content );
The constructor is the only provided function for this package. It is required,
because I<<<<<< libxml2 >>>>>> treats the different text node types slightly differently.
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0207
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

View File

@ -0,0 +1,66 @@
=head1 NAME
XML::LibXML::Comment - XML::LibXML Comment Class
=head1 SYNOPSIS
use XML::LibXML;
# Only methods specific to Comment nodes are listed here,
# see the XML::LibXML::Node manpage for other methods
$node = XML::LibXML::Comment->new( $content );
=head1 DESCRIPTION
This class provides all functions of L<<<<<< XML::LibXML::Text >>>>>>, but for comment nodes. This can be done, since only the output of the node
types is different, but not the data structure. :-)
=head1 METHODS
The class inherits from L<<<<<< XML::LibXML::Node >>>>>>. The documentation for Inherited methods is not listed here.
Many functions listed here are extensively documented in the DOM Level 3 specification (L<<<<<< http://www.w3.org/TR/DOM-Level-3-Core/ >>>>>>). Please refer to the specification for extensive documentation.
=over 4
=item new
$node = XML::LibXML::Comment->new( $content );
The constructor is the only provided function for this package. It is required,
because I<<<<<< libxml2 >>>>>> treats text nodes and comment nodes slightly differently.
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0207
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

203
lib/XML/LibXML/Common.pm Normal file
View File

@ -0,0 +1,203 @@
#-------------------------------------------------------------------------#
# $Id: Common.pm,v 1.5 2003/02/27 18:32:59 phish108 Exp $
#
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
#-------------------------------------------------------------------------#
package XML::LibXML::Common;
#-------------------------------------------------------------------------#
# global blur #
#-------------------------------------------------------------------------#
use strict;
use warnings;
require Exporter;
use vars qw( @ISA $VERSION @EXPORT @EXPORT_OK %EXPORT_TAGS);
@ISA = qw(Exporter);
$VERSION = "2.0207"; # VERSION TEMPLATE: DO NOT CHANGE
use XML::LibXML qw(:libxml);
#-------------------------------------------------------------------------#
# export information #
#-------------------------------------------------------------------------#
%EXPORT_TAGS = (
all => [qw(
ELEMENT_NODE
ATTRIBUTE_NODE
TEXT_NODE
CDATA_SECTION_NODE
ENTITY_REFERENCE_NODE
ENTITY_NODE
PI_NODE
PROCESSING_INSTRUCTION_NODE
COMMENT_NODE
DOCUMENT_NODE
DOCUMENT_TYPE_NODE
DOCUMENT_FRAG_NODE
DOCUMENT_FRAGMENT_NODE
NOTATION_NODE
HTML_DOCUMENT_NODE
DTD_NODE
ELEMENT_DECLARATION
ATTRIBUTE_DECLARATION
ENTITY_DECLARATION
NAMESPACE_DECLARATION
XINCLUDE_END
XINCLUDE_START
encodeToUTF8
decodeFromUTF8
)],
w3c => [qw(
ELEMENT_NODE
ATTRIBUTE_NODE
TEXT_NODE
CDATA_SECTION_NODE
ENTITY_REFERENCE_NODE
ENTITY_NODE
PI_NODE
PROCESSING_INSTRUCTION_NODE
COMMENT_NODE
DOCUMENT_NODE
DOCUMENT_TYPE_NODE
DOCUMENT_FRAG_NODE
DOCUMENT_FRAGMENT_NODE
NOTATION_NODE
HTML_DOCUMENT_NODE
DTD_NODE
ELEMENT_DECLARATION
ATTRIBUTE_DECLARATION
ENTITY_DECLARATION
NAMESPACE_DECLARATION
XINCLUDE_END
XINCLUDE_START
)],
libxml => [qw(
XML_ELEMENT_NODE
XML_ATTRIBUTE_NODE
XML_TEXT_NODE
XML_CDATA_SECTION_NODE
XML_ENTITY_REF_NODE
XML_ENTITY_NODE
XML_PI_NODE
XML_COMMENT_NODE
XML_DOCUMENT_NODE
XML_DOCUMENT_TYPE_NODE
XML_DOCUMENT_FRAG_NODE
XML_NOTATION_NODE
XML_HTML_DOCUMENT_NODE
XML_DTD_NODE
XML_ELEMENT_DECL
XML_ATTRIBUTE_DECL
XML_ENTITY_DECL
XML_NAMESPACE_DECL
XML_XINCLUDE_END
XML_XINCLUDE_START
)],
gdome => [qw(
GDOME_ELEMENT_NODE
GDOME_ATTRIBUTE_NODE
GDOME_TEXT_NODE
GDOME_CDATA_SECTION_NODE
GDOME_ENTITY_REF_NODE
GDOME_ENTITY_NODE
GDOME_PI_NODE
GDOME_COMMENT_NODE
GDOME_DOCUMENT_NODE
GDOME_DOCUMENT_TYPE_NODE
GDOME_DOCUMENT_FRAG_NODE
GDOME_NOTATION_NODE
GDOME_HTML_DOCUMENT_NODE
GDOME_DTD_NODE
GDOME_ELEMENT_DECL
GDOME_ATTRIBUTE_DECL
GDOME_ENTITY_DECL
GDOME_NAMESPACE_DECL
GDOME_XINCLUDE_END
GDOME_XINCLUDE_START
)],
encoding => [qw(
encodeToUTF8
decodeFromUTF8
)],
);
@EXPORT_OK = (
@{$EXPORT_TAGS{encoding}},
@{$EXPORT_TAGS{w3c}},
@{$EXPORT_TAGS{libxml}},
@{$EXPORT_TAGS{gdome}},
);
@EXPORT = (
@{$EXPORT_TAGS{encoding}},
@{$EXPORT_TAGS{w3c}},
);
#-------------------------------------------------------------------------#
# W3 conform node types #
#-------------------------------------------------------------------------#
use constant ELEMENT_NODE => 1;
use constant ATTRIBUTE_NODE => 2;
use constant TEXT_NODE => 3;
use constant CDATA_SECTION_NODE => 4;
use constant ENTITY_REFERENCE_NODE => 5;
use constant ENTITY_NODE => 6;
use constant PROCESSING_INSTRUCTION_NODE => 7;
use constant COMMENT_NODE => 8;
use constant DOCUMENT_NODE => 9;
use constant DOCUMENT_TYPE_NODE => 10;
use constant DOCUMENT_FRAGMENT_NODE => 11;
use constant NOTATION_NODE => 12;
use constant HTML_DOCUMENT_NODE => 13;
use constant DTD_NODE => 14;
use constant ELEMENT_DECLARATION => 15;
use constant ATTRIBUTE_DECLARATION => 16;
use constant ENTITY_DECLARATION => 17;
use constant NAMESPACE_DECLARATION => 18;
#-------------------------------------------------------------------------#
# some extras for the W3 spec
#-------------------------------------------------------------------------#
use constant PI_NODE => 7;
use constant DOCUMENT_FRAG_NODE => 11;
use constant XINCLUDE_END => 19;
use constant XINCLUDE_START => 20;
#-------------------------------------------------------------------------#
# libgdome compat names #
#-------------------------------------------------------------------------#
use constant GDOME_ELEMENT_NODE => 1;
use constant GDOME_ATTRIBUTE_NODE => 2;
use constant GDOME_TEXT_NODE => 3;
use constant GDOME_CDATA_SECTION_NODE => 4;
use constant GDOME_ENTITY_REF_NODE => 5;
use constant GDOME_ENTITY_NODE => 6;
use constant GDOME_PI_NODE => 7;
use constant GDOME_COMMENT_NODE => 8;
use constant GDOME_DOCUMENT_NODE => 9;
use constant GDOME_DOCUMENT_TYPE_NODE => 10;
use constant GDOME_DOCUMENT_FRAG_NODE => 11;
use constant GDOME_NOTATION_NODE => 12;
use constant GDOME_HTML_DOCUMENT_NODE => 13;
use constant GDOME_DTD_NODE => 14;
use constant GDOME_ELEMENT_DECL => 15;
use constant GDOME_ATTRIBUTE_DECL => 16;
use constant GDOME_ENTITY_DECL => 17;
use constant GDOME_NAMESPACE_DECL => 18;
use constant GDOME_XINCLUDE_START => 19;
use constant GDOME_XINCLUDE_END => 20;
1;
#-------------------------------------------------------------------------#
__END__

136
lib/XML/LibXML/Common.pod Normal file
View File

@ -0,0 +1,136 @@
=head1 NAME
XML::LibXML::Common - Constants and Character Encoding Routines
=head1 SYNOPSIS
use XML::LibXML::Common;
$encodedstring = encodeToUTF8( $name_of_encoding, $sting_to_encode );
$decodedstring = decodeFromUTF8($name_of_encoding, $string_to_decode );
=head1 DESCRIPTION
XML::LibXML::Common defines constants for all node types and provides interface
to libxml2 charset conversion functions.
Since XML::LibXML use their own node type definitions, one may want to use
XML::LibXML::Common in its compatibility mode:
=head2 Exporter TAGS
use XML::LibXML::Common qw(:libxml);
C<<<<<< :libxml >>>>>> tag will use the XML::LibXML Compatibility mode, which defines the old 'XML_'
node-type definitions.
use XML::LibXML::Common qw(:gdome);
C<<<<<< :gdome >>>>>> tag will use the XML::GDOME Compatibility mode, which defines the old 'GDOME_'
node-type definitions.
use XML::LibXML::Common qw(:w3c);
This uses the nodetype definition names as specified for DOM.
use XML::LibXML::Common qw(:encoding);
This tag can be used to export only the charset encoding functions of
XML::LibXML::Common.
=head2 Exports
By default the W3 definitions as defined in the DOM specifications and the
encoding functions are exported by XML::LibXML::Common.
=head2 Encoding functions
To encode or decode a string to or from UTF-8, XML::LibXML::Common exports two
functions, which provide an interface to the encoding support in C<<<<<< libxml2 >>>>>>. Which encodings are supported by these functions depends on how C<<<<<< libxml2 >>>>>> was compiled. UTF-16 is always supported and on most installations, ISO
encodings are supported as well.
This interface was useful for older versions of Perl. Since Perl >= 5.8
provides similar functions via the C<<<<<< Encode >>>>>> module, it is probably a good idea to use those instead.
=over 4
=item encodeToUTF8
$encodedstring = encodeToUTF8( $name_of_encoding, $sting_to_encode );
The function will convert a byte string from the specified encoding to an UTF-8
encoded character string.
=item decodeToUTF8
$decodedstring = decodeFromUTF8($name_of_encoding, $string_to_decode );
This function converts an UTF-8 encoded character string to a specified
encoding. Note that the conversion can raise an error if the given string
contains characters that cannot be represented in the target encoding.
=back
Both these functions report their errors on the standard error. If an error
occurs the function will croak(). To catch the error information it is required
to call the encoding function from within an eval block in order to prevent the
entire script from being stopped on encoding error.
=head2 A note on history
Before XML::LibXML 1.70, this class was available as a separate CPAN
distribution, intended to provide functionality shared between XML::LibXML,
XML::GDOME, and possibly other modules. Since there seems to be no progress in
this direction, we decided to merge XML::LibXML::Common 0.13 and XML::LibXML
1.70 to one CPAN distribution.
The merge also naturally eliminates a practical and urgent problem experienced
by many XML::LibXML users on certain platforms, namely mysterious misbehavior
of XML::LibXML occurring if the installed (often pre-packaged) version of
XML::LibXML::Common was compiled against an older version of libxml2 than
XML::LibXML.
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0207
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

149
lib/XML/LibXML/DOM.pod Normal file
View File

@ -0,0 +1,149 @@
=head1 NAME
XML::LibXML::DOM - XML::LibXML DOM Implementation
=head1 DESCRIPTION
XML::LibXML provides a lightweight interface to I<<<<<< modify >>>>>> a node of the document tree generated by the XML::LibXML parser. This interface
follows as far as possible the DOM Level 3 specification. In addition to the
specified functions, XML::LibXML supports some functions that are more handy to
use in the perl environment.
One also has to remember, that XML::LibXML is an interface to libxml2 nodes
which actually reside on the C-Level of XML::LibXML. This means each node is a
reference to a structure which is different from a perl hash or array. The only
way to access these structures' values is through the DOM interface provided by
XML::LibXML. This also means, that one I<<<<<< can't >>>>>> simply inherit an XML::LibXML node and add new member variables as if they were
hash keys.
The DOM interface of XML::LibXML does not intend to implement a full DOM
interface as it is done by XML::GDOME and used for full featured application.
Moreover, it offers an simple way to build or modify documents that are created
by XML::LibXML's parser.
Another target of the XML::LibXML interface is to make the interfaces of
libxml2 available to the perl community. This includes also some workarounds to
some features where libxml2 assumes more control over the C-Level that most
perl users don't have.
One of the most important parts of the XML::LibXML DOM interface is that the
interfaces try to follow the DOM Level 3 specification (L<<<<<< http://www.w3.org/TR/DOM-Level-3-Core/ >>>>>>) rather strictly. This means the interface functions are named as the DOM
specification says and not what widespread Java interfaces claim to be the
standard. Although there are several functions that have only a singular
interface that conforms to the DOM spec XML::LibXML provides an additional Java
style alias interface.
Moreover, there are some function interfaces left over from early stages of
XML::LibXML for compatibility reasons. These interfaces are for compatibility
reasons I<<<<<< only >>>>>>. They might disappear in one of the future versions of XML::LibXML, so a user
is requested to switch over to the official functions.
=head2 Encodings and XML::LibXML's DOM implementation
See the section on Encodings in the I<<<<<< XML::LibXML >>>>>> manual page.
=head2 Namespaces and XML::LibXML's DOM implementation
XML::LibXML's DOM implementation is limited by the DOM implementation of
libxml2 which treats namespaces slightly differently than required by the DOM
Level 2 specification.
According to the DOM Level 2 specification, namespaces of elements and
attributes should be persistent, and nodes should be permanently bound to
namespace URIs as they get created; it should be possible to manipulate the
special attributes used for declaring XML namespaces just as other attributes
without affecting the namespaces of other nodes. In DOM Level 2, the
application is responsible for creating the special attributes consistently
and/or for correct serialization of the document.
This is both inconvenient, causes problems in serialization of DOM to XML, and
most importantly, seems almost impossible to implement over libxml2.
In libxml2, namespace URI and prefix of a node is provided by a pointer to a
namespace declaration (appearing as a special xmlns attribute in the XML
document). If the prefix or namespace URI of the declaration changes, the
prefix and namespace URI of all nodes that point to it changes as well.
Moreover, in contrast to DOM, a node (element or attribute) can only be bound
to a namespace URI if there is some namespace declaration in the document to
point to.
Therefore current DOM implementation in XML::LibXML tries to treat namespace
declarations in a compromise between reason, common sense, limitations of
libxml2, and the DOM Level 2 specification.
In XML::LibXML, special attributes declaring XML namespaces are often created
automatically, usually when a namespaced node is attached to a document and no
existing declaration of the namespace and prefix is in the scope to be reused.
In this respect, XML::LibXML DOM implementation differs from the DOM Level 2
specification according to which special attributes for declaring the
appropriate XML namespaces should not be added when a node with a namespace
prefix and namespace URI is created.
Namespace declarations are also created when L<<<<<< XML::LibXML::Document >>>>>>'s createElementNS() or createAttributeNS() function are used. If the a
namespace is not declared on the documentElement, the namespace will be locally
declared for the newly created node. In case of Attributes this may look a bit
confusing, since these nodes cannot have namespace declarations itself. In this
case the namespace is internally applied to the attribute and later declared on
the node the attribute is appended to (if required).
The following example may explain this a bit:
my $doc = XML::LibXML->createDocument;
my $root = $doc->createElementNS( "", "foo" );
$doc->setDocumentElement( $root );
my $attr = $doc->createAttributeNS( "bar", "bar:foo", "test" );
$root->setAttributeNodeNS( $attr );
This piece of code will result in the following document:
<?xml version="1.0"?>
<foo xmlns:bar="bar" bar:foo="test"/>
The namespace is declared on the document element during the
setAttributeNodeNS() call.
Namespaces can be also declared explicitly by the use of XML::LibXML::Element's
setNamespace() function. Since 1.61, they can also be manipulated with
functions setNamespaceDeclPrefix() and setNamespaceDeclURI() (not available in
DOM). Changing an URI or prefix of an existing namespace declaration affects
the namespace URI and prefix of all nodes which point to it (that is the nodes
in its scope).
It is also important to repeat the specification: While working with namespaces
you should use the namespace aware functions instead of the simplified
versions. For example you should I<<<<<< never >>>>>> use setAttribute() but setAttributeNS().
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0207
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

216
lib/XML/LibXML/Devel.pm Normal file
View File

@ -0,0 +1,216 @@
# $Id: $
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2011 Joachim Zobel
#
package XML::LibXML::Devel;
use strict;
use warnings;
use XML::LibXML;
use vars qw ($VERSION);
$VERSION = "2.0207"; # VERSION TEMPLATE: DO NOT CHANGE
use 5.008_000;
use parent qw(Exporter);
use vars qw( @EXPORT @EXPORT_OK %EXPORT_TAGS );
# This allows declaration use XML::LibXML::Devel ':all';
# If you do not need this, moving things directly into @EXPORT or @EXPORT_OK
# will save memory.
our %EXPORT_TAGS = ( 'all' => [ qw(
node_to_perl
node_from_perl
refcnt_inc
refcnt_dec
refcnt
fix_owner
mem_used
) ] );
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
# Preloaded methods go here.
1;
__END__
=head1 NAME
XML::LibXML::Devel - makes functions from LibXML.xs available
=head1 SYNOPSIS
/**********************************************
* C functions you want to access
*/
xmlNode *return_node();
void receive_node(xmlNode *);
###############################################
# XS Code
void *
xs_return_node
CODE:
RETVAL = return_node();
OUTPUT:
RETVAL
void
xs_receive_node
void *n
CODE:
receive_node(n);
###############################################
# Perl code
use XML::LibXML::Devel;
sub return_node
{
my $raw_node = xs_return_node();
my $node = XML::LibXML::Devel::node_to_perl($raw_node);
XML::LibXML::Devel::refcnt_inc($raw_node);
return $node;
}
sub receive_node
{
my ($node) = @_;
my $raw_node = XML::LibXML::Devel::node_from_perl($node);
xs_receive_node($raw_node);
XML::LibXML::Devel::refcnt_inc($raw_node);
}
=head1 DESCRIPTION
C<XML::LibXML::Devel> makes functions from LibXML.xs available that
are needed to wrap libxml2 nodes in and out of XML::LibXML::Nodes.
This gives cleaner dependencies than using LibXML.so directly.
To XS a library that uses libxml2 nodes the first step is to
do this so that xmlNodePtr is passed as void *. These raw nodes
are then turned into libxml nodes by using this C<Devel> functions.
Be aware that this module is currently rather experimental. The function
names may change if I XS more functions and introduce a reasonable
naming convention.
Be also aware that this module is a great tool to cause segfaults and
introduce memory leaks. It does however provide a partial cure by making
C<xmlMemUsed> available as C<mem_used>.
=head1 FUNCTIONS
=head2 NODE MANAGEMENT
=over 1
=item node_to_perl
node_to_perl($raw_node);
Returns a LibXML::Node object. This has a proxy node with a reference
counter and an owner attached. The raw node will be deleted as soon
as the reference counter reaches zero.
If the C library is keeping a
pointer to the raw node, you need to call refcnt_inc immediately.
You also need to replace xmlFreeNode by a call to refcnt_dec.
=item node_to_perl
node_from_perl($node);
Returns a raw node. This is a void * pointer and you can do nothing
but passing it to functions that treat it as an xmlNodePtr. The
raw node will be freed as soon as its reference counter reaches zero.
If the C library is keeping a
pointer to the raw node, you need to call refcnt_inc immediately.
You also need to replace xmlFreeNode by a call to refcnt_dec.
=item refcnt_inc
refcnt_inc($raw_node);
Increments the raw nodes reference counter. The raw node must already
be known to perl to have a reference counter.
=item refcnt_dec
refcnt_dec($raw_node);
Decrements the raw nodes reference counter and returns the value it
had before. if the counter becomes zero or less,
this method will free the proxy node holding the reference counter.
If the node is part of a
subtree, refcnt_dec will fix the reference counts and delete
the subtree if it is not required any more.
=item refcnt
refcnt($raw_node);
Returns the value of the reference counter.
=item fix_owner
fix_owner($raw_node, $raw_parent);
This functions fixes the reference counts for an entire subtree.
it is very important to fix an entire subtree after node operations
where the documents or the owner node may get changed. this method is
aware about nodes that already belong to a certain owner node.
=back
=head2 MEMORY DEBUGGING
=over 1
=item $ENV{DEBUG_MEMORY}
BEGIN {$ENV{DEBUG_MEMORY} = 1;};
use XML::LibXML;
This turns on libxml2 memory debugging. It must be set before
XML::LibXML is loaded.
=item mem_used
mem_used();
Returns the number of bytes currently allocated.
=back
=head2 EXPORT
None by default.
=head1 SEE ALSO
This was created to support the needs of Apache2::ModXml2. So this
can serve as an example.
=head1 AUTHOR
Joachim Zobel E<lt>jz-2011@heute-morgen.deE<gt>
=head1 COPYRIGHT AND LICENSE
Copyright (C) 2011 by Joachim Zobel
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.10.1 or,
at your option, any later version of Perl 5 you may have available.
=cut

703
lib/XML/LibXML/Document.pod Normal file
View File

@ -0,0 +1,703 @@
=head1 NAME
XML::LibXML::Document - XML::LibXML DOM Document Class
=head1 SYNOPSIS
use XML::LibXML;
# Only methods specific to Document nodes are listed here,
# see the XML::LibXML::Node manpage for other methods
$dom = XML::LibXML::Document->new( $version, $encoding );
$dom = XML::LibXML::Document->createDocument( $version, $encoding );
$strURI = $doc->URI();
$doc->setURI($strURI);
$strEncoding = $doc->encoding();
$strEncoding = $doc->actualEncoding();
$doc->setEncoding($new_encoding);
$strVersion = $doc->version();
$doc->standalone
$doc->setStandalone($numvalue);
my $compression = $doc->compression;
$doc->setCompression($ziplevel);
$docstring = $dom->toString($format);
$c14nstr = $doc->toStringC14N($comment_flag, $xpath [, $xpath_context ]);
$ec14nstr = $doc->toStringEC14N($comment_flag, $xpath [, $xpath_context ], $inclusive_prefix_list);
$str = $doc->serialize($format);
$state = $doc->toFile($filename, $format);
$state = $doc->toFH($fh, $format);
$str = $document->toStringHTML();
$str = $document->serialize_html();
$bool = $dom->is_valid();
$dom->validate();
$root = $dom->documentElement();
$dom->setDocumentElement( $root );
$element = $dom->createElement( $nodename );
$element = $dom->createElementNS( $namespaceURI, $nodename );
$text = $dom->createTextNode( $content_text );
$comment = $dom->createComment( $comment_text );
$attrnode = $doc->createAttribute($name [,$value]);
$attrnode = $doc->createAttributeNS( namespaceURI, $name [,$value] );
$fragment = $doc->createDocumentFragment();
$cdata = $dom->createCDATASection( $cdata_content );
my $pi = $doc->createProcessingInstruction( $target, $data );
my $entref = $doc->createEntityReference($refname);
$dtd = $document->createInternalSubset( $rootnode, $public, $system);
$dtd = $document->createExternalSubset( $rootnode_name, $publicId, $systemId);
$document->importNode( $node );
$document->adoptNode( $node );
my $dtd = $doc->externalSubset;
my $dtd = $doc->internalSubset;
$doc->setExternalSubset($dtd);
$doc->setInternalSubset($dtd);
my $dtd = $doc->removeExternalSubset();
my $dtd = $doc->removeInternalSubset();
my @nodelist = $doc->getElementsByTagName($tagname);
my @nodelist = $doc->getElementsByTagNameNS($nsURI,$tagname);
my @nodelist = $doc->getElementsByLocalName($localname);
my $node = $doc->getElementById($id);
$dom->indexElements();
=head1 DESCRIPTION
The Document Class is in most cases the result of a parsing process. But
sometimes it is necessary to create a Document from scratch. The DOM Document
Class provides functions that conform to the DOM Core naming style.
It inherits all functions from L<<<<<< XML::LibXML::Node >>>>>> as specified in the DOM specification. This enables access to the nodes besides
the root element on document level - a C<<<<<< DTD >>>>>> for example. The support for these nodes is limited at the moment.
While generally nodes are bound to a document in the DOM concept it is
suggested that one should always create a node not bound to any document. There
is no need of really including the node to the document, but once the node is
bound to a document, it is quite safe that all strings have the correct
encoding. If an unbound text node with an ISO encoded string is created (e.g.
with $CLASS->new()), the C<<<<<< toString >>>>>> function may not return the expected result.
To prevent such problems, it is recommended to pass all data to XML::LibXML
methods as character strings (i.e. UTF-8 encoded, with the UTF8 flag on).
=head1 METHODS
Many functions listed here are extensively documented in the DOM Level 3 specification (L<<<<<< http://www.w3.org/TR/DOM-Level-3-Core/ >>>>>>). Please refer to the specification for extensive documentation.
=over 4
=item new
$dom = XML::LibXML::Document->new( $version, $encoding );
alias for createDocument()
=item createDocument
$dom = XML::LibXML::Document->createDocument( $version, $encoding );
The constructor for the document class. As Parameter it takes the version
string and (optionally) the encoding string. Simply calling I<<<<<< createDocument >>>>>>() will create the document:
<?xml version="your version" encoding="your encoding"?>
Both parameter are optional. The default value for I<<<<<< $version >>>>>> is C<<<<<< 1.0 >>>>>>, of course. If the I<<<<<< $encoding >>>>>> parameter is not set, the encoding will be left unset, which means UTF-8 is
implied.
The call of I<<<<<< createDocument >>>>>>() without any parameter will result the following code:
<?xml version="1.0"?>
Alternatively one can call this constructor directly from the XML::LibXML class
level, to avoid some typing. This will not have any effect on the class
instance, which is always XML::LibXML::Document.
my $document = XML::LibXML->createDocument( "1.0", "UTF-8" );
is therefore a shortcut for
my $document = XML::LibXML::Document->createDocument( "1.0", "UTF-8" );
=item URI
$strURI = $doc->URI();
Returns the URI (or filename) of the original document. For documents obtained
by parsing a string of a FH without using the URI parsing argument of the
corresponding C<<<<<< parse_* >>>>>> function, the result is a generated string unknown-XYZ where XYZ is some
number; for documents created with the constructor C<<<<<< new >>>>>>, the URI is undefined.
The value can be modified by calling C<<<<<< setURI >>>>>> method on the document node.
=item setURI
$doc->setURI($strURI);
Sets the URI of the document reported by the method URI (see also the URI
argument to the various C<<<<<< parse_* >>>>>> functions).
=item encoding
$strEncoding = $doc->encoding();
returns the encoding string of the document.
my $doc = XML::LibXML->createDocument( "1.0", "ISO-8859-15" );
print $doc->encoding; # prints ISO-8859-15
=item actualEncoding
$strEncoding = $doc->actualEncoding();
returns the encoding in which the XML will be returned by $doc->toString().
This is usually the original encoding of the document as declared in the XML
declaration and returned by $doc->encoding. If the original encoding is not
known (e.g. if created in memory or parsed from a XML without a declared
encoding), 'UTF-8' is returned.
my $doc = XML::LibXML->createDocument( "1.0", "ISO-8859-15" );
print $doc->encoding; # prints ISO-8859-15
=item setEncoding
$doc->setEncoding($new_encoding);
This method allows one to change the declaration of encoding in the XML
declaration of the document. The value also affects the encoding in which the
document is serialized to XML by $doc->toString(). Use setEncoding() to remove
the encoding declaration.
=item version
$strVersion = $doc->version();
returns the version string of the document
I<<<<<< getVersion() >>>>>> is an alternative form of this function.
=item standalone
$doc->standalone
This function returns the Numerical value of a documents XML declarations
standalone attribute. It returns I<<<<<< 1 >>>>>> if standalone="yes" was found, I<<<<<< 0 >>>>>> if standalone="no" was found and I<<<<<< -1 >>>>>> if standalone was not specified (default on creation).
=item setStandalone
$doc->setStandalone($numvalue);
Through this method it is possible to alter the value of a documents standalone
attribute. Set it to I<<<<<< 1 >>>>>> to set standalone="yes", to I<<<<<< 0 >>>>>> to set standalone="no" or set it to I<<<<<< -1 >>>>>> to remove the standalone attribute from the XML declaration.
=item compression
my $compression = $doc->compression;
libxml2 allows reading of documents directly from gzipped files. In this case
the compression variable is set to the compression level of that file (0-8). If
XML::LibXML parsed a different source or the file wasn't compressed, the
returned value will be I<<<<<< -1 >>>>>>.
=item setCompression
$doc->setCompression($ziplevel);
If one intends to write the document directly to a file, it is possible to set
the compression level for a given document. This level can be in the range from
0 to 8. If XML::LibXML should not try to compress use I<<<<<< -1 >>>>>> (default).
Note that this feature will I<<<<<< only >>>>>> work if libxml2 is compiled with zlib support and toFile() is used for output.
=item toString
$docstring = $dom->toString($format);
I<<<<<< toString >>>>>> is a DOM serializing function, so the DOM Tree is serialized into an XML
string, ready for output.
IMPORTANT: unlike toString for other nodes, on document nodes this function
returns the XML as a byte string in the original encoding of the document (see
the actualEncoding() method)! This means you can simply do:
open my $out_fh, '>', $file;
print {$out_fh} $doc->toString;
regardless of the actual encoding of the document. See the section on encodings
in L<<<<<< XML::LibXML >>>>>> for more details.
The optional I<<<<<< $format >>>>>> parameter sets the indenting of the output. This parameter is expected to be an C<<<<<< integer >>>>>> value, that specifies that indentation should be used. The format parameter can
have three different values if it is used:
If $format is 0, than the document is dumped as it was originally parsed
If $format is 1, libxml2 will add ignorable white spaces, so the nodes content
is easier to read. Existing text nodes will not be altered
If $format is 2 (or higher), libxml2 will act as $format == 1 but it add a
leading and a trailing line break to each text node.
libxml2 uses a hard-coded indentation of 2 space characters per indentation
level. This value can not be altered on run-time.
=item toStringC14N
$c14nstr = $doc->toStringC14N($comment_flag, $xpath [, $xpath_context ]);
See the documentation in L<<<<<< XML::LibXML::Node >>>>>>.
=item toStringEC14N
$ec14nstr = $doc->toStringEC14N($comment_flag, $xpath [, $xpath_context ], $inclusive_prefix_list);
See the documentation in L<<<<<< XML::LibXML::Node >>>>>>.
=item serialize
$str = $doc->serialize($format);
An alias for toString(). This function was name added to be more consistent
with libxml2.
=item serialize_c14n
An alias for toStringC14N().
=item serialize_exc_c14n
An alias for toStringEC14N().
=item toFile
$state = $doc->toFile($filename, $format);
This function is similar to toString(), but it writes the document directly
into a filesystem. This function is very useful, if one needs to store large
documents.
The format parameter has the same behaviour as in toString().
=item toFH
$state = $doc->toFH($fh, $format);
This function is similar to toString(), but it writes the document directly to
a filehandle or a stream. A byte stream in the document encoding is passed to
the file handle. Do NOT apply any C<<<<<< :encoding(...) >>>>>> or C<<<<<< :utf8 >>>>>> PerlIO layer to the filehandle! See the section on encodings in L<<<<<< XML::LibXML >>>>>> for more details.
The format parameter has the same behaviour as in toString().
=item toStringHTML
$str = $document->toStringHTML();
I<<<<<< toStringHTML >>>>>> serialize the tree to a byte string in the document encoding as HTML. With this
method indenting is automatic and managed by libxml2 internally.
=item serialize_html
$str = $document->serialize_html();
An alias for toStringHTML().
=item is_valid
$bool = $dom->is_valid();
Returns either TRUE or FALSE depending on whether the DOM Tree is a valid
Document or not.
You may also pass in a L<<<<<< XML::LibXML::Dtd >>>>>> object, to validate against an external DTD:
if (!$dom->is_valid($dtd)) {
warn("document is not valid!");
}
=item validate
$dom->validate();
This is an exception throwing equivalent of is_valid. If the document is not
valid it will throw an exception containing the error. This allows you much
better error reporting than simply is_valid or not.
Again, you may pass in a DTD object
=item documentElement
$root = $dom->documentElement();
Returns the root element of the Document. A document can have just one root
element to contain the documents data.
Optionally one can use I<<<<<< getDocumentElement >>>>>>.
=item setDocumentElement
$dom->setDocumentElement( $root );
This function enables you to set the root element for a document. The function
supports the import of a node from a different document tree, but does not
support a document fragment as $root.
=item createElement
$element = $dom->createElement( $nodename );
This function creates a new Element Node bound to the DOM with the name C<<<<<< $nodename >>>>>>.
=item createElementNS
$element = $dom->createElementNS( $namespaceURI, $nodename );
This function creates a new Element Node bound to the DOM with the name C<<<<<< $nodename >>>>>> and placed in the given namespace.
=item createTextNode
$text = $dom->createTextNode( $content_text );
As an equivalent of I<<<<<< createElement >>>>>>, but it creates a I<<<<<< Text Node >>>>>> bound to the DOM.
=item createComment
$comment = $dom->createComment( $comment_text );
As an equivalent of I<<<<<< createElement >>>>>>, but it creates a I<<<<<< Comment Node >>>>>> bound to the DOM.
=item createAttribute
$attrnode = $doc->createAttribute($name [,$value]);
Creates a new Attribute node.
=item createAttributeNS
$attrnode = $doc->createAttributeNS( namespaceURI, $name [,$value] );
Creates an Attribute bound to a namespace.
=item createDocumentFragment
$fragment = $doc->createDocumentFragment();
This function creates a DocumentFragment.
=item createCDATASection
$cdata = $dom->createCDATASection( $cdata_content );
Similar to createTextNode and createComment, this function creates a
CDataSection bound to the current DOM.
=item createProcessingInstruction
my $pi = $doc->createProcessingInstruction( $target, $data );
create a processing instruction node.
Since this method is quite long one may use its short form I<<<<<< createPI() >>>>>>.
=item createEntityReference
my $entref = $doc->createEntityReference($refname);
If a document has a DTD specified, one can create entity references by using
this function. If one wants to add a entity reference to the document, this
reference has to be created by this function.
An entity reference is unique to a document and cannot be passed to other
documents as other nodes can be passed.
I<<<<<< NOTE: >>>>>> A text content containing something that looks like an entity reference, will
not be expanded to a real entity reference unless it is a predefined entity
my $string = "&foo;";
$some_element->appendText( $string );
print $some_element->textContent; # prints "&amp;foo;"
=item createInternalSubset
$dtd = $document->createInternalSubset( $rootnode, $public, $system);
This function creates and adds an internal subset to the given document.
Because the function automatically adds the DTD to the document there is no
need to add the created node explicitly to the document.
my $document = XML::LibXML::Document->new();
my $dtd = $document->createInternalSubset( "foo", undef, "foo.dtd" );
will result in the following XML document:
<?xml version="1.0"?>
<!DOCTYPE foo SYSTEM "foo.dtd">
By setting the public parameter it is possible to set PUBLIC DTDs to a given
document. So
my $document = XML::LibXML::Document->new();
my $dtd = $document->createInternalSubset( "foo", "-//FOO//DTD FOO 0.1//EN", undef );
will cause the following declaration to be created on the document:
<?xml version="1.0"?>
<!DOCTYPE foo PUBLIC "-//FOO//DTD FOO 0.1//EN">
=item createExternalSubset
$dtd = $document->createExternalSubset( $rootnode_name, $publicId, $systemId);
This function is similar to C<<<<<< createInternalSubset() >>>>>> but this DTD is considered to be external and is therefore not added to the
document itself. Nevertheless it can be used for validation purposes.
=item importNode
$document->importNode( $node );
If a node is not part of a document, it can be imported to another document. As
specified in DOM Level 2 Specification the Node will not be altered or removed
from its original document (C<<<<<< $node-E<gt>cloneNode(1) >>>>>> will get called implicitly).
I<<<<<< NOTE: >>>>>> Don't try to use importNode() to import sub-trees that contain an entity
reference - even if the entity reference is the root node of the sub-tree. This
will cause serious problems to your program. This is a limitation of libxml2
and not of XML::LibXML itself.
=item adoptNode
$document->adoptNode( $node );
If a node is not part of a document, it can be imported to another document. As
specified in DOM Level 3 Specification the Node will not be altered but it will
removed from its original document.
After a document adopted a node, the node, its attributes and all its
descendants belong to the new document. Because the node does not belong to the
old document, it will be unlinked from its old location first.
I<<<<<< NOTE: >>>>>> Don't try to adoptNode() to import sub-trees that contain entity references -
even if the entity reference is the root node of the sub-tree. This will cause
serious problems to your program. This is a limitation of libxml2 and not of
XML::LibXML itself.
=item externalSubset
my $dtd = $doc->externalSubset;
If a document has an external subset defined it will be returned by this
function.
I<<<<<< NOTE >>>>>> Dtd nodes are no ordinary nodes in libxml2. The support for these nodes in
XML::LibXML is still limited. In particular one may not want use common node
function on doctype declaration nodes!
=item internalSubset
my $dtd = $doc->internalSubset;
If a document has an internal subset defined it will be returned by this
function.
I<<<<<< NOTE >>>>>> Dtd nodes are no ordinary nodes in libxml2. The support for these nodes in
XML::LibXML is still limited. In particular one may not want use common node
function on doctype declaration nodes!
=item setExternalSubset
$doc->setExternalSubset($dtd);
I<<<<<< EXPERIMENTAL! >>>>>>
This method sets a DTD node as an external subset of the given document.
=item setInternalSubset
$doc->setInternalSubset($dtd);
I<<<<<< EXPERIMENTAL! >>>>>>
This method sets a DTD node as an internal subset of the given document.
=item removeExternalSubset
my $dtd = $doc->removeExternalSubset();
I<<<<<< EXPERIMENTAL! >>>>>>
If a document has an external subset defined it can be removed from the
document by using this function. The removed dtd node will be returned.
=item removeInternalSubset
my $dtd = $doc->removeInternalSubset();
I<<<<<< EXPERIMENTAL! >>>>>>
If a document has an internal subset defined it can be removed from the
document by using this function. The removed dtd node will be returned.
=item getElementsByTagName
my @nodelist = $doc->getElementsByTagName($tagname);
Implements the DOM Level 2 function
In SCALAR context this function returns an L<<<<<< XML::LibXML::NodeList >>>>>> object.
=item getElementsByTagNameNS
my @nodelist = $doc->getElementsByTagNameNS($nsURI,$tagname);
Implements the DOM Level 2 function
In SCALAR context this function returns an L<<<<<< XML::LibXML::NodeList >>>>>> object.
=item getElementsByLocalName
my @nodelist = $doc->getElementsByLocalName($localname);
This allows the fetching of all nodes from a given document with the given
Localname.
In SCALAR context this function returns an L<<<<<< XML::LibXML::NodeList >>>>>> object.
=item getElementById
my $node = $doc->getElementById($id);
Returns the element that has an ID attribute with the given value. If no such
element exists, this returns undef.
Note: the ID of an element may change while manipulating the document. For
documents with a DTD, the information about ID attributes is only available if
DTD loading/validation has been requested. For HTML documents parsed with the
HTML parser ID detection is done automatically. In XML documents, all "xml:id"
attributes are considered to be of type ID. You can test ID-ness of an
attribute node with $attr->isId().
In versions 1.59 and earlier this method was called getElementsById() (plural)
by mistake. Starting from 1.60 this name is maintained as an alias only for
backward compatibility.
=item indexElements
$dom->indexElements();
This function causes libxml2 to stamp all elements in a document with their
document position index which considerably speeds up XPath queries for large
documents. It should only be used with static documents that won't be further
changed by any DOM methods, because once a document is indexed, XPath will
always prefer the index to other methods of determining the document order of
nodes. XPath could therefore return improperly ordered node-lists when applied
on a document that has been changed after being indexed. It is of course
possible to use this method to re-index a modified document before using it
with XPath again. This function is not a part of the DOM specification.
This function returns number of elements indexed, -1 if error occurred, or -2
if this feature is not available in the running libxml2.
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0207
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

View File

@ -0,0 +1,47 @@
=head1 NAME
XML::LibXML::DocumentFragment - XML::LibXML's DOM L2 Document Fragment Implementation
=head1 SYNOPSIS
use XML::LibXML;
=head1 DESCRIPTION
This class is a helper class as described in the DOM Level 2 Specification. It
is implemented as a node without name. All adding, inserting or replacing
functions are aware of document fragments now.
As well I<<<<<< all >>>>>> unbound nodes (all nodes that do not belong to any document sub-tree) are
implicit members of document fragments.
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0207
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

109
lib/XML/LibXML/Dtd.pod Normal file
View File

@ -0,0 +1,109 @@
=head1 NAME
XML::LibXML::Dtd - XML::LibXML DTD Handling
=head1 SYNOPSIS
use XML::LibXML;
$dtd = XML::LibXML::Dtd->new($public_id, $system_id);
$dtd = XML::LibXML::Dtd->parse_string($dtd_str);
$publicId = $dtd->getName();
$publicId = $dtd->publicId();
$systemId = $dtd->systemId();
=head1 DESCRIPTION
This class holds a DTD. You may parse a DTD from either a string, or from an
external SYSTEM identifier.
No support is available as yet for parsing from a filehandle.
XML::LibXML::Dtd is a sub-class of L<<<<<< XML::LibXML::Node >>>>>>, so all the methods available to nodes (particularly toString()) are available
to Dtd objects.
=head1 METHODS
=over 4
=item new
$dtd = XML::LibXML::Dtd->new($public_id, $system_id);
Parse a DTD from the system identifier, and return a DTD object that you can
pass to $doc->is_valid() or $doc->validate().
my $dtd = XML::LibXML::Dtd->new(
"SOME // Public / ID / 1.0",
"test.dtd"
);
my $doc = XML::LibXML->new->parse_file("test.xml");
$doc->validate($dtd);
=item parse_string
$dtd = XML::LibXML::Dtd->parse_string($dtd_str);
The same as new() above, except you can parse a DTD from a string. Note that
parsing from string may fail if the DTD contains external parametric-entity
references with relative URLs.
=item getName
$publicId = $dtd->getName();
Returns the name of DTD; i.e., the name immediately following the DOCTYPE
keyword.
=item publicId
$publicId = $dtd->publicId();
Returns the public identifier of the external subset.
=item systemId
$systemId = $dtd->systemId();
Returns the system identifier of the external subset.
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0207
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

402
lib/XML/LibXML/Element.pod Normal file
View File

@ -0,0 +1,402 @@
=head1 NAME
XML::LibXML::Element - XML::LibXML Class for Element Nodes
=head1 SYNOPSIS
use XML::LibXML;
# Only methods specific to Element nodes are listed here,
# see the XML::LibXML::Node manpage for other methods
$node = XML::LibXML::Element->new( $name );
$node->setAttribute( $aname, $avalue );
$node->setAttributeNS( $nsURI, $aname, $avalue );
$avalue = $node->getAttribute( $aname );
$avalue = $node->getAttributeNS( $nsURI, $aname );
$attrnode = $node->getAttributeNode( $aname );
$attrnode = $node->getAttributeNodeNS( $namespaceURI, $aname );
$node->removeAttribute( $aname );
$node->removeAttributeNS( $nsURI, $aname );
$boolean = $node->hasAttribute( $aname );
$boolean = $node->hasAttributeNS( $nsURI, $aname );
@nodes = $node->getChildrenByTagName($tagname);
@nodes = $node->getChildrenByTagNameNS($nsURI,$tagname);
@nodes = $node->getChildrenByLocalName($localname);
@nodes = $node->getElementsByTagName($tagname);
@nodes = $node->getElementsByTagNameNS($nsURI,$localname);
@nodes = $node->getElementsByLocalName($localname);
$node->appendWellBalancedChunk( $chunk );
$node->appendText( $PCDATA );
$node->appendTextNode( $PCDATA );
$node->appendTextChild( $childname , $PCDATA );
$node->setNamespace( $nsURI , $nsPrefix, $activate );
$node->setNamespaceDeclURI( $nsPrefix, $newURI );
$node->setNamespaceDeclPrefix( $oldPrefix, $newPrefix );
=head1 METHODS
The class inherits from L<<<<<< XML::LibXML::Node >>>>>>. The documentation for Inherited methods is not listed here.
Many functions listed here are extensively documented in the DOM Level 3 specification (L<<<<<< http://www.w3.org/TR/DOM-Level-3-Core/ >>>>>>). Please refer to the specification for extensive documentation.
=over 4
=item new
$node = XML::LibXML::Element->new( $name );
This function creates a new node unbound to any DOM.
=item setAttribute
$node->setAttribute( $aname, $avalue );
This method sets or replaces the node's attribute C<<<<<< $aname >>>>>> to the value C<<<<<< $avalue >>>>>>
=item setAttributeNS
$node->setAttributeNS( $nsURI, $aname, $avalue );
Namespace-aware version of C<<<<<< setAttribute >>>>>>, where C<<<<<< $nsURI >>>>>> is a namespace URI, C<<<<<< $aname >>>>>> is a qualified name, and C<<<<<< $avalue >>>>>> is the value. The namespace URI may be null (empty or undefined) in order to
create an attribute which has no namespace.
The current implementation differs from DOM in the following aspects
If an attribute with the same local name and namespace URI already exists on
the element, but its prefix differs from the prefix of C<<<<<< $aname >>>>>>, then this function is supposed to change the prefix (regardless of namespace
declarations and possible collisions). However, the current implementation does
rather the opposite. If a prefix is declared for the namespace URI in the scope
of the attribute, then the already declared prefix is used, disregarding the
prefix specified in C<<<<<< $aname >>>>>>. If no prefix is declared for the namespace, the function tries to declare the
prefix specified in C<<<<<< $aname >>>>>> and dies if the prefix is already taken by some other namespace.
According to DOM Level 2 specification, this method can also be used to create
or modify special attributes used for declaring XML namespaces (which belong to
the namespace "http://www.w3.org/2000/xmlns/" and have prefix or name "xmlns").
This should work since version 1.61, but again the implementation differs from
DOM specification in the following: if a declaration of the same namespace
prefix already exists on the element, then changing its value via this method
automatically changes the namespace of all elements and attributes in its
scope. This is because in libxml2 the namespace URI of an element is not static
but is computed from a pointer to a namespace declaration attribute.
=item getAttribute
$avalue = $node->getAttribute( $aname );
If C<<<<<< $node >>>>>> has an attribute with the name C<<<<<< $aname >>>>>>, the value of this attribute will get returned.
=item getAttributeNS
$avalue = $node->getAttributeNS( $nsURI, $aname );
Retrieves an attribute value by local name and namespace URI.
=item getAttributeNode
$attrnode = $node->getAttributeNode( $aname );
Retrieve an attribute node by name. If no attribute with a given name exists, C<<<<<< undef >>>>>> is returned.
=item getAttributeNodeNS
$attrnode = $node->getAttributeNodeNS( $namespaceURI, $aname );
Retrieves an attribute node by local name and namespace URI. If no attribute
with a given localname and namespace exists, C<<<<<< undef >>>>>> is returned.
=item removeAttribute
$node->removeAttribute( $aname );
The method removes the attribute C<<<<<< $aname >>>>>> from the node's attribute list, if the attribute can be found.
=item removeAttributeNS
$node->removeAttributeNS( $nsURI, $aname );
Namespace version of C<<<<<< removeAttribute >>>>>>
=item hasAttribute
$boolean = $node->hasAttribute( $aname );
This function tests if the named attribute is set for the node. If the
attribute is specified, TRUE (1) will be returned, otherwise the return value
is FALSE (0).
=item hasAttributeNS
$boolean = $node->hasAttributeNS( $nsURI, $aname );
namespace version of C<<<<<< hasAttribute >>>>>>
=item getChildrenByTagName
@nodes = $node->getChildrenByTagName($tagname);
The function gives direct access to all child elements of the current node with
a given tagname, where tagname is a qualified name, that is, in case of
namespace usage it may consist of a prefix and local name. This function makes
things a lot easier if one needs to handle big data sets. A special tagname '*'
can be used to match any name.
If this function is called in SCALAR context, it returns the number of elements
found.
=item getChildrenByTagNameNS
@nodes = $node->getChildrenByTagNameNS($nsURI,$tagname);
Namespace version of C<<<<<< getChildrenByTagName >>>>>>. A special nsURI '*' matches any namespace URI, in which case the function
behaves just like C<<<<<< getChildrenByLocalName >>>>>>.
If this function is called in SCALAR context, it returns the number of elements
found.
=item getChildrenByLocalName
@nodes = $node->getChildrenByLocalName($localname);
The function gives direct access to all child elements of the current node with
a given local name. It makes things a lot easier if one needs to handle big
data sets. A special C<<<<<< localname >>>>>> '*' can be used to match any local name.
If this function is called in SCALAR context, it returns the number of elements
found.
=item getElementsByTagName
@nodes = $node->getElementsByTagName($tagname);
This function is part of the spec. It fetches all descendants of a node with a
given tagname, where C<<<<<< tagname >>>>>> is a qualified name, that is, in case of namespace usage it may consist of a
prefix and local name. A special C<<<<<< tagname >>>>>> '*' can be used to match any tag name.
In SCALAR context this function returns an L<<<<<< XML::LibXML::NodeList >>>>>> object.
=item getElementsByTagNameNS
@nodes = $node->getElementsByTagNameNS($nsURI,$localname);
Namespace version of C<<<<<< getElementsByTagName >>>>>> as found in the DOM spec. A special C<<<<<< localname >>>>>> '*' can be used to match any local name and C<<<<<< nsURI >>>>>> '*' can be used to match any namespace URI.
In SCALAR context this function returns an L<<<<<< XML::LibXML::NodeList >>>>>> object.
=item getElementsByLocalName
@nodes = $node->getElementsByLocalName($localname);
This function is not found in the DOM specification. It is a mix of
getElementsByTagName and getElementsByTagNameNS. It will fetch all tags
matching the given local-name. This allows one to select tags with the same
local name across namespace borders.
In SCALAR context this function returns an L<<<<<< XML::LibXML::NodeList >>>>>> object.
=item appendWellBalancedChunk
$node->appendWellBalancedChunk( $chunk );
Sometimes it is necessary to append a string coded XML Tree to a node. I<<<<<< appendWellBalancedChunk >>>>>> will do the trick for you. But this is only done if the String is C<<<<<< well-balanced >>>>>>.
I<<<<<< Note that appendWellBalancedChunk() is only left for compatibility reasons >>>>>>. Implicitly it uses
my $fragment = $parser->parse_balanced_chunk( $chunk );
$node->appendChild( $fragment );
This form is more explicit and makes it easier to control the flow of a script.
=item appendText
$node->appendText( $PCDATA );
alias for appendTextNode().
=item appendTextNode
$node->appendTextNode( $PCDATA );
This wrapper function lets you add a string directly to an element node.
=item appendTextChild
$node->appendTextChild( $childname , $PCDATA );
Somewhat similar with C<<<<<< appendTextNode >>>>>>: It lets you set an Element, that contains only a C<<<<<< text node >>>>>> directly by specifying the name and the text content.
=item setNamespace
$node->setNamespace( $nsURI , $nsPrefix, $activate );
setNamespace() allows one to apply a namespace to an element. The function
takes three parameters: 1. the namespace URI, which is required and the two
optional values prefix, which is the namespace prefix, as it should be used in
child elements or attributes as well as the additional activate parameter. If
prefix is not given, undefined or empty, this function tries to create a
declaration of the default namespace.
The activate parameter is most useful: If this parameter is set to FALSE (0), a
new namespace declaration is simply added to the element while the element's
namespace itself is not altered. Nevertheless, activate is set to TRUE (1) on
default. In this case the namespace is used as the node's effective namespace.
This means the namespace prefix is added to the node name and if there was a
namespace already active for the node, it will be replaced (but its declaration
is not removed from the document). A new namespace declaration is only created
if necessary (that is, if the element is already in the scope of a namespace
declaration associating the prefix with the namespace URI, then this
declaration is reused).
The following example may clarify this:
my $e1 = $doc->createElement("bar");
$e1->setNamespace("http://foobar.org", "foo")
results
<foo:bar xmlns:foo="http://foobar.org"/>
while
my $e2 = $doc->createElement("bar");
$e2->setNamespace("http://foobar.org", "foo",0)
results only
<bar xmlns:foo="http://foobar.org"/>
By using $activate == 0 it is possible to create multiple namespace
declarations on a single element.
The function fails if it is required to create a declaration associating the
prefix with the namespace URI but the element already carries a declaration
with the same prefix but different namespace URI.
=item setNamespaceDeclURI
$node->setNamespaceDeclURI( $nsPrefix, $newURI );
EXPERIMENTAL IN 1.61 !
This function manipulates directly with an existing namespace declaration on an
element. It takes two parameters: the prefix by which it looks up the namespace
declaration and a new namespace URI which replaces its previous value.
It returns 1 if the namespace declaration was found and changed, 0 otherwise.
All elements and attributes (even those previously unbound from the document)
for which the namespace declaration determines their namespace belong to the
new namespace after the change.
If the new URI is undef or empty, the nodes have no namespace and no prefix
after the change. Namespace declarations once nulled in this way do not further
appear in the serialized output (but do remain in the document for internal
integrity of libxml2 data structures).
This function is NOT part of any DOM API.
=item setNamespaceDeclPrefix
$node->setNamespaceDeclPrefix( $oldPrefix, $newPrefix );
EXPERIMENTAL IN 1.61 !
This function manipulates directly with an existing namespace declaration on an
element. It takes two parameters: the old prefix by which it looks up the
namespace declaration and a new prefix which is to replace the old one.
The function dies with an error if the element is in the scope of another
declaration whose prefix equals to the new prefix, or if the change should
result in a declaration with a non-empty prefix but empty namespace URI.
Otherwise, it returns 1 if the namespace declaration was found and changed and
0 if not found.
All elements and attributes (even those previously unbound from the document)
for which the namespace declaration determines their namespace change their
prefix to the new value.
If the new prefix is undef or empty, the namespace declaration becomes a
declaration of a default namespace. The corresponding nodes drop their
namespace prefix (but remain in the, now default, namespace). In this case the
function fails, if the containing element is in the scope of another default
namespace declaration.
This function is NOT part of any DOM API.
=back
=head1 OVERLOADING
XML::LibXML::Element overloads hash dereferencing to provide access to the
element's attributes. For non-namespaced attributes, the attribute name is the
hash key, and the attribute value is the hash value. For namespaced attributes,
the hash key is qualified with the namespace URI, using Clark notation.
Perl's "tied hash" feature is used, which means that the hash gives you
read-write access to the element's attributes. For more information, see L<<<<<< XML::LibXML::AttributeHash >>>>>>
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0207
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

501
lib/XML/LibXML/ErrNo.pm Normal file
View File

@ -0,0 +1,501 @@
# $Id: ErrNo.pm,v 1.1.2.1 2004/04/20 20:09:48 pajas Exp $
#
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
package XML::LibXML::ErrNo;
use strict;
use warnings;
use vars qw($VERSION);
$VERSION = "2.0207"; # VERSION TEMPLATE: DO NOT CHANGE
use constant ERR_OK => 0;
use constant ERR_INTERNAL_ERROR => 1;
use constant ERR_NO_MEMORY => 2;
use constant ERR_DOCUMENT_START => 3;
use constant ERR_DOCUMENT_EMPTY => 4;
use constant ERR_DOCUMENT_END => 5;
use constant ERR_INVALID_HEX_CHARREF => 6;
use constant ERR_INVALID_DEC_CHARREF => 7;
use constant ERR_INVALID_CHARREF => 8;
use constant ERR_INVALID_CHAR => 9;
use constant ERR_CHARREF_AT_EOF => 10;
use constant ERR_CHARREF_IN_PROLOG => 11;
use constant ERR_CHARREF_IN_EPILOG => 12;
use constant ERR_CHARREF_IN_DTD => 13;
use constant ERR_ENTITYREF_AT_EOF => 14;
use constant ERR_ENTITYREF_IN_PROLOG => 15;
use constant ERR_ENTITYREF_IN_EPILOG => 16;
use constant ERR_ENTITYREF_IN_DTD => 17;
use constant ERR_PEREF_AT_EOF => 18;
use constant ERR_PEREF_IN_PROLOG => 19;
use constant ERR_PEREF_IN_EPILOG => 20;
use constant ERR_PEREF_IN_INT_SUBSET => 21;
use constant ERR_ENTITYREF_NO_NAME => 22;
use constant ERR_ENTITYREF_SEMICOL_MISSING => 23;
use constant ERR_PEREF_NO_NAME => 24;
use constant ERR_PEREF_SEMICOL_MISSING => 25;
use constant ERR_UNDECLARED_ENTITY => 26;
use constant WAR_UNDECLARED_ENTITY => 27;
use constant ERR_UNPARSED_ENTITY => 28;
use constant ERR_ENTITY_IS_EXTERNAL => 29;
use constant ERR_ENTITY_IS_PARAMETER => 30;
use constant ERR_UNKNOWN_ENCODING => 31;
use constant ERR_UNSUPPORTED_ENCODING => 32;
use constant ERR_STRING_NOT_STARTED => 33;
use constant ERR_STRING_NOT_CLOSED => 34;
use constant ERR_NS_DECL_ERROR => 35;
use constant ERR_ENTITY_NOT_STARTED => 36;
use constant ERR_ENTITY_NOT_FINISHED => 37;
use constant ERR_LT_IN_ATTRIBUTE => 38;
use constant ERR_ATTRIBUTE_NOT_STARTED => 39;
use constant ERR_ATTRIBUTE_NOT_FINISHED => 40;
use constant ERR_ATTRIBUTE_WITHOUT_VALUE => 41;
use constant ERR_ATTRIBUTE_REDEFINED => 42;
use constant ERR_LITERAL_NOT_STARTED => 43;
use constant ERR_LITERAL_NOT_FINISHED => 44;
use constant ERR_COMMENT_NOT_FINISHED => 45;
use constant ERR_PI_NOT_STARTED => 46;
use constant ERR_PI_NOT_FINISHED => 47;
use constant ERR_NOTATION_NOT_STARTED => 48;
use constant ERR_NOTATION_NOT_FINISHED => 49;
use constant ERR_ATTLIST_NOT_STARTED => 50;
use constant ERR_ATTLIST_NOT_FINISHED => 51;
use constant ERR_MIXED_NOT_STARTED => 52;
use constant ERR_MIXED_NOT_FINISHED => 53;
use constant ERR_ELEMCONTENT_NOT_STARTED => 54;
use constant ERR_ELEMCONTENT_NOT_FINISHED => 55;
use constant ERR_XMLDECL_NOT_STARTED => 56;
use constant ERR_XMLDECL_NOT_FINISHED => 57;
use constant ERR_CONDSEC_NOT_STARTED => 58;
use constant ERR_CONDSEC_NOT_FINISHED => 59;
use constant ERR_EXT_SUBSET_NOT_FINISHED => 60;
use constant ERR_DOCTYPE_NOT_FINISHED => 61;
use constant ERR_MISPLACED_CDATA_END => 62;
use constant ERR_CDATA_NOT_FINISHED => 63;
use constant ERR_RESERVED_XML_NAME => 64;
use constant ERR_SPACE_REQUIRED => 65;
use constant ERR_SEPARATOR_REQUIRED => 66;
use constant ERR_NMTOKEN_REQUIRED => 67;
use constant ERR_NAME_REQUIRED => 68;
use constant ERR_PCDATA_REQUIRED => 69;
use constant ERR_URI_REQUIRED => 70;
use constant ERR_PUBID_REQUIRED => 71;
use constant ERR_LT_REQUIRED => 72;
use constant ERR_GT_REQUIRED => 73;
use constant ERR_LTSLASH_REQUIRED => 74;
use constant ERR_EQUAL_REQUIRED => 75;
use constant ERR_TAG_NAME_MISMATCH => 76;
use constant ERR_TAG_NOT_FINISHED => 77;
use constant ERR_STANDALONE_VALUE => 78;
use constant ERR_ENCODING_NAME => 79;
use constant ERR_HYPHEN_IN_COMMENT => 80;
use constant ERR_INVALID_ENCODING => 81;
use constant ERR_EXT_ENTITY_STANDALONE => 82;
use constant ERR_CONDSEC_INVALID => 83;
use constant ERR_VALUE_REQUIRED => 84;
use constant ERR_NOT_WELL_BALANCED => 85;
use constant ERR_EXTRA_CONTENT => 86;
use constant ERR_ENTITY_CHAR_ERROR => 87;
use constant ERR_ENTITY_PE_INTERNAL => 88;
use constant ERR_ENTITY_LOOP => 89;
use constant ERR_ENTITY_BOUNDARY => 90;
use constant ERR_INVALID_URI => 91;
use constant ERR_URI_FRAGMENT => 92;
use constant WAR_CATALOG_PI => 93;
use constant ERR_NO_DTD => 94;
use constant ERR_CONDSEC_INVALID_KEYWORD => 95;
use constant ERR_VERSION_MISSING => 96;
use constant WAR_UNKNOWN_VERSION => 97;
use constant WAR_LANG_VALUE => 98;
use constant WAR_NS_URI => 99;
use constant WAR_NS_URI_RELATIVE => 100;
use constant NS_ERR_XML_NAMESPACE => 200;
use constant NS_ERR_UNDEFINED_NAMESPACE => 201;
use constant NS_ERR_QNAME => 202;
use constant NS_ERR_ATTRIBUTE_REDEFINED => 203;
use constant DTD_ATTRIBUTE_DEFAULT => 500;
use constant DTD_ATTRIBUTE_REDEFINED => 501;
use constant DTD_ATTRIBUTE_VALUE => 502;
use constant DTD_CONTENT_ERROR => 503;
use constant DTD_CONTENT_MODEL => 504;
use constant DTD_CONTENT_NOT_DETERMINIST => 505;
use constant DTD_DIFFERENT_PREFIX => 506;
use constant DTD_ELEM_DEFAULT_NAMESPACE => 507;
use constant DTD_ELEM_NAMESPACE => 508;
use constant DTD_ELEM_REDEFINED => 509;
use constant DTD_EMPTY_NOTATION => 510;
use constant DTD_ENTITY_TYPE => 511;
use constant DTD_ID_FIXED => 512;
use constant DTD_ID_REDEFINED => 513;
use constant DTD_ID_SUBSET => 514;
use constant DTD_INVALID_CHILD => 515;
use constant DTD_INVALID_DEFAULT => 516;
use constant DTD_LOAD_ERROR => 517;
use constant DTD_MISSING_ATTRIBUTE => 518;
use constant DTD_MIXED_CORRUPT => 519;
use constant DTD_MULTIPLE_ID => 520;
use constant DTD_NO_DOC => 521;
use constant DTD_NO_DTD => 522;
use constant DTD_NO_ELEM_NAME => 523;
use constant DTD_NO_PREFIX => 524;
use constant DTD_NO_ROOT => 525;
use constant DTD_NOTATION_REDEFINED => 526;
use constant DTD_NOTATION_VALUE => 527;
use constant DTD_NOT_EMPTY => 528;
use constant DTD_NOT_PCDATA => 529;
use constant DTD_NOT_STANDALONE => 530;
use constant DTD_ROOT_NAME => 531;
use constant DTD_STANDALONE_WHITE_SPACE => 532;
use constant DTD_UNKNOWN_ATTRIBUTE => 533;
use constant DTD_UNKNOWN_ELEM => 534;
use constant DTD_UNKNOWN_ENTITY => 535;
use constant DTD_UNKNOWN_ID => 536;
use constant DTD_UNKNOWN_NOTATION => 537;
use constant HTML_STRUCURE_ERROR => 800;
use constant HTML_UNKNOWN_TAG => 801;
use constant RNGP_ANYNAME_ATTR_ANCESTOR => 1000;
use constant RNGP_ATTR_CONFLICT => 1001;
use constant RNGP_ATTRIBUTE_CHILDREN => 1002;
use constant RNGP_ATTRIBUTE_CONTENT => 1003;
use constant RNGP_ATTRIBUTE_EMPTY => 1004;
use constant RNGP_ATTRIBUTE_NOOP => 1005;
use constant RNGP_CHOICE_CONTENT => 1006;
use constant RNGP_CHOICE_EMPTY => 1007;
use constant RNGP_CREATE_FAILURE => 1008;
use constant RNGP_DATA_CONTENT => 1009;
use constant RNGP_DEF_CHOICE_AND_INTERLEAVE => 1010;
use constant RNGP_DEFINE_CREATE_FAILED => 1011;
use constant RNGP_DEFINE_EMPTY => 1012;
use constant RNGP_DEFINE_MISSING => 1013;
use constant RNGP_DEFINE_NAME_MISSING => 1014;
use constant RNGP_ELEM_CONTENT_EMPTY => 1015;
use constant RNGP_ELEM_CONTENT_ERROR => 1016;
use constant RNGP_ELEMENT_EMPTY => 1017;
use constant RNGP_ELEMENT_CONTENT => 1018;
use constant RNGP_ELEMENT_NAME => 1019;
use constant RNGP_ELEMENT_NO_CONTENT => 1020;
use constant RNGP_ELEM_TEXT_CONFLICT => 1021;
use constant RNGP_EMPTY => 1022;
use constant RNGP_EMPTY_CONSTRUCT => 1023;
use constant RNGP_EMPTY_CONTENT => 1024;
use constant RNGP_EMPTY_NOT_EMPTY => 1025;
use constant RNGP_ERROR_TYPE_LIB => 1026;
use constant RNGP_EXCEPT_EMPTY => 1027;
use constant RNGP_EXCEPT_MISSING => 1028;
use constant RNGP_EXCEPT_MULTIPLE => 1029;
use constant RNGP_EXCEPT_NO_CONTENT => 1030;
use constant RNGP_EXTERNALREF_EMTPY => 1031;
use constant RNGP_EXTERNAL_REF_FAILURE => 1032;
use constant RNGP_EXTERNALREF_RECURSE => 1033;
use constant RNGP_FORBIDDEN_ATTRIBUTE => 1034;
use constant RNGP_FOREIGN_ELEMENT => 1035;
use constant RNGP_GRAMMAR_CONTENT => 1036;
use constant RNGP_GRAMMAR_EMPTY => 1037;
use constant RNGP_GRAMMAR_MISSING => 1038;
use constant RNGP_GRAMMAR_NO_START => 1039;
use constant RNGP_GROUP_ATTR_CONFLICT => 1040;
use constant RNGP_HREF_ERROR => 1041;
use constant RNGP_INCLUDE_EMPTY => 1042;
use constant RNGP_INCLUDE_FAILURE => 1043;
use constant RNGP_INCLUDE_RECURSE => 1044;
use constant RNGP_INTERLEAVE_ADD => 1045;
use constant RNGP_INTERLEAVE_CREATE_FAILED => 1046;
use constant RNGP_INTERLEAVE_EMPTY => 1047;
use constant RNGP_INTERLEAVE_NO_CONTENT => 1048;
use constant RNGP_INVALID_DEFINE_NAME => 1049;
use constant RNGP_INVALID_URI => 1050;
use constant RNGP_INVALID_VALUE => 1051;
use constant RNGP_MISSING_HREF => 1052;
use constant RNGP_NAME_MISSING => 1053;
use constant RNGP_NEED_COMBINE => 1054;
use constant RNGP_NOTALLOWED_NOT_EMPTY => 1055;
use constant RNGP_NSNAME_ATTR_ANCESTOR => 1056;
use constant RNGP_NSNAME_NO_NS => 1057;
use constant RNGP_PARAM_FORBIDDEN => 1058;
use constant RNGP_PARAM_NAME_MISSING => 1059;
use constant RNGP_PARENTREF_CREATE_FAILED => 1060;
use constant RNGP_PARENTREF_NAME_INVALID => 1061;
use constant RNGP_PARENTREF_NO_NAME => 1062;
use constant RNGP_PARENTREF_NO_PARENT => 1063;
use constant RNGP_PARENTREF_NOT_EMPTY => 1064;
use constant RNGP_PARSE_ERROR => 1065;
use constant RNGP_PAT_ANYNAME_EXCEPT_ANYNAME => 1066;
use constant RNGP_PAT_ATTR_ATTR => 1067;
use constant RNGP_PAT_ATTR_ELEM => 1068;
use constant RNGP_PAT_DATA_EXCEPT_ATTR => 1069;
use constant RNGP_PAT_DATA_EXCEPT_ELEM => 1070;
use constant RNGP_PAT_DATA_EXCEPT_EMPTY => 1071;
use constant RNGP_PAT_DATA_EXCEPT_GROUP => 1072;
use constant RNGP_PAT_DATA_EXCEPT_INTERLEAVE => 1073;
use constant RNGP_PAT_DATA_EXCEPT_LIST => 1074;
use constant RNGP_PAT_DATA_EXCEPT_ONEMORE => 1075;
use constant RNGP_PAT_DATA_EXCEPT_REF => 1076;
use constant RNGP_PAT_DATA_EXCEPT_TEXT => 1077;
use constant RNGP_PAT_LIST_ATTR => 1078;
use constant RNGP_PAT_LIST_ELEM => 1079;
use constant RNGP_PAT_LIST_INTERLEAVE => 1080;
use constant RNGP_PAT_LIST_LIST => 1081;
use constant RNGP_PAT_LIST_REF => 1082;
use constant RNGP_PAT_LIST_TEXT => 1083;
use constant RNGP_PAT_NSNAME_EXCEPT_ANYNAME => 1084;
use constant RNGP_PAT_NSNAME_EXCEPT_NSNAME => 1085;
use constant RNGP_PAT_ONEMORE_GROUP_ATTR => 1086;
use constant RNGP_PAT_ONEMORE_INTERLEAVE_ATTR => 1087;
use constant RNGP_PAT_START_ATTR => 1088;
use constant RNGP_PAT_START_DATA => 1089;
use constant RNGP_PAT_START_EMPTY => 1090;
use constant RNGP_PAT_START_GROUP => 1091;
use constant RNGP_PAT_START_INTERLEAVE => 1092;
use constant RNGP_PAT_START_LIST => 1093;
use constant RNGP_PAT_START_ONEMORE => 1094;
use constant RNGP_PAT_START_TEXT => 1095;
use constant RNGP_PAT_START_VALUE => 1096;
use constant RNGP_PREFIX_UNDEFINED => 1097;
use constant RNGP_REF_CREATE_FAILED => 1098;
use constant RNGP_REF_CYCLE => 1099;
use constant RNGP_REF_NAME_INVALID => 1100;
use constant RNGP_REF_NO_DEF => 1101;
use constant RNGP_REF_NO_NAME => 1102;
use constant RNGP_REF_NOT_EMPTY => 1103;
use constant RNGP_START_CHOICE_AND_INTERLEAVE => 1104;
use constant RNGP_START_CONTENT => 1105;
use constant RNGP_START_EMPTY => 1106;
use constant RNGP_START_MISSING => 1107;
use constant RNGP_TEXT_EXPECTED => 1108;
use constant RNGP_TEXT_HAS_CHILD => 1109;
use constant RNGP_TYPE_MISSING => 1110;
use constant RNGP_TYPE_NOT_FOUND => 1111;
use constant RNGP_TYPE_VALUE => 1112;
use constant RNGP_UNKNOWN_ATTRIBUTE => 1113;
use constant RNGP_UNKNOWN_COMBINE => 1114;
use constant RNGP_UNKNOWN_CONSTRUCT => 1115;
use constant RNGP_UNKNOWN_TYPE_LIB => 1116;
use constant RNGP_URI_FRAGMENT => 1117;
use constant RNGP_URI_NOT_ABSOLUTE => 1118;
use constant RNGP_VALUE_EMPTY => 1119;
use constant RNGP_VALUE_NO_CONTENT => 1120;
use constant RNGP_XMLNS_NAME => 1121;
use constant RNGP_XML_NS => 1122;
use constant XPATH_EXPRESSION_OK => 1200;
use constant XPATH_NUMBER_ERROR => 1201;
use constant XPATH_UNFINISHED_LITERAL_ERROR => 1202;
use constant XPATH_START_LITERAL_ERROR => 1203;
use constant XPATH_VARIABLE_REF_ERROR => 1204;
use constant XPATH_UNDEF_VARIABLE_ERROR => 1205;
use constant XPATH_INVALID_PREDICATE_ERROR => 1206;
use constant XPATH_EXPR_ERROR => 1207;
use constant XPATH_UNCLOSED_ERROR => 1208;
use constant XPATH_UNKNOWN_FUNC_ERROR => 1209;
use constant XPATH_INVALID_OPERAND => 1210;
use constant XPATH_INVALID_TYPE => 1211;
use constant XPATH_INVALID_ARITY => 1212;
use constant XPATH_INVALID_CTXT_SIZE => 1213;
use constant XPATH_INVALID_CTXT_POSITION => 1214;
use constant XPATH_MEMORY_ERROR => 1215;
use constant XPTR_SYNTAX_ERROR => 1216;
use constant XPTR_RESOURCE_ERROR => 1217;
use constant XPTR_SUB_RESOURCE_ERROR => 1218;
use constant XPATH_UNDEF_PREFIX_ERROR => 1219;
use constant XPATH_ENCODING_ERROR => 1220;
use constant XPATH_INVALID_CHAR_ERROR => 1221;
use constant TREE_INVALID_HEX => 1300;
use constant TREE_INVALID_DEC => 1301;
use constant TREE_UNTERMINATED_ENTITY => 1302;
use constant SAVE_NOT_UTF8 => 1400;
use constant SAVE_CHAR_INVALID => 1401;
use constant SAVE_NO_DOCTYPE => 1402;
use constant SAVE_UNKNOWN_ENCODING => 1403;
use constant REGEXP_COMPILE_ERROR => 1450;
use constant IO_UNKNOWN => 1500;
use constant IO_EACCES => 1501;
use constant IO_EAGAIN => 1502;
use constant IO_EBADF => 1503;
use constant IO_EBADMSG => 1504;
use constant IO_EBUSY => 1505;
use constant IO_ECANCELED => 1506;
use constant IO_ECHILD => 1507;
use constant IO_EDEADLK => 1508;
use constant IO_EDOM => 1509;
use constant IO_EEXIST => 1510;
use constant IO_EFAULT => 1511;
use constant IO_EFBIG => 1512;
use constant IO_EINPROGRESS => 1513;
use constant IO_EINTR => 1514;
use constant IO_EINVAL => 1515;
use constant IO_EIO => 1516;
use constant IO_EISDIR => 1517;
use constant IO_EMFILE => 1518;
use constant IO_EMLINK => 1519;
use constant IO_EMSGSIZE => 1520;
use constant IO_ENAMETOOLONG => 1521;
use constant IO_ENFILE => 1522;
use constant IO_ENODEV => 1523;
use constant IO_ENOENT => 1524;
use constant IO_ENOEXEC => 1525;
use constant IO_ENOLCK => 1526;
use constant IO_ENOMEM => 1527;
use constant IO_ENOSPC => 1528;
use constant IO_ENOSYS => 1529;
use constant IO_ENOTDIR => 1530;
use constant IO_ENOTEMPTY => 1531;
use constant IO_ENOTSUP => 1532;
use constant IO_ENOTTY => 1533;
use constant IO_ENXIO => 1534;
use constant IO_EPERM => 1535;
use constant IO_EPIPE => 1536;
use constant IO_ERANGE => 1537;
use constant IO_EROFS => 1538;
use constant IO_ESPIPE => 1539;
use constant IO_ESRCH => 1540;
use constant IO_ETIMEDOUT => 1541;
use constant IO_EXDEV => 1542;
use constant IO_NETWORK_ATTEMPT => 1543;
use constant IO_ENCODER => 1544;
use constant IO_FLUSH => 1545;
use constant IO_WRITE => 1546;
use constant IO_NO_INPUT => 1547;
use constant IO_BUFFER_FULL => 1548;
use constant IO_LOAD_ERROR => 1549;
use constant IO_ENOTSOCK => 1550;
use constant IO_EISCONN => 1551;
use constant IO_ECONNREFUSED => 1552;
use constant IO_ENETUNREACH => 1553;
use constant IO_EADDRINUSE => 1554;
use constant IO_EALREADY => 1555;
use constant IO_EAFNOSUPPORT => 1556;
use constant XINCLUDE_RECURSION => 1600;
use constant XINCLUDE_PARSE_VALUE => 1601;
use constant XINCLUDE_ENTITY_DEF_MISMATCH => 1602;
use constant XINCLUDE_NO_HREF => 1603;
use constant XINCLUDE_NO_FALLBACK => 1604;
use constant XINCLUDE_HREF_URI => 1605;
use constant XINCLUDE_TEXT_FRAGMENT => 1606;
use constant XINCLUDE_TEXT_DOCUMENT => 1607;
use constant XINCLUDE_INVALID_CHAR => 1608;
use constant XINCLUDE_BUILD_FAILED => 1609;
use constant XINCLUDE_UNKNOWN_ENCODING => 1610;
use constant XINCLUDE_MULTIPLE_ROOT => 1611;
use constant XINCLUDE_XPTR_FAILED => 1612;
use constant XINCLUDE_XPTR_RESULT => 1613;
use constant XINCLUDE_INCLUDE_IN_INCLUDE => 1614;
use constant XINCLUDE_FALLBACKS_IN_INCLUDE => 1615;
use constant XINCLUDE_FALLBACK_NOT_IN_INCLUDE => 1616;
use constant CATALOG_MISSING_ATTR => 1650;
use constant CATALOG_ENTRY_BROKEN => 1651;
use constant CATALOG_PREFER_VALUE => 1652;
use constant CATALOG_NOT_CATALOG => 1653;
use constant CATALOG_RECURSION => 1654;
use constant SCHEMAP_PREFIX_UNDEFINED => 1700;
use constant SCHEMAP_ATTRFORMDEFAULT_VALUE => 1701;
use constant SCHEMAP_ATTRGRP_NONAME_NOREF => 1702;
use constant SCHEMAP_ATTR_NONAME_NOREF => 1703;
use constant SCHEMAP_COMPLEXTYPE_NONAME_NOREF => 1704;
use constant SCHEMAP_ELEMFORMDEFAULT_VALUE => 1705;
use constant SCHEMAP_ELEM_NONAME_NOREF => 1706;
use constant SCHEMAP_EXTENSION_NO_BASE => 1707;
use constant SCHEMAP_FACET_NO_VALUE => 1708;
use constant SCHEMAP_FAILED_BUILD_IMPORT => 1709;
use constant SCHEMAP_GROUP_NONAME_NOREF => 1710;
use constant SCHEMAP_IMPORT_NAMESPACE_NOT_URI => 1711;
use constant SCHEMAP_IMPORT_REDEFINE_NSNAME => 1712;
use constant SCHEMAP_IMPORT_SCHEMA_NOT_URI => 1713;
use constant SCHEMAP_INVALID_BOOLEAN => 1714;
use constant SCHEMAP_INVALID_ENUM => 1715;
use constant SCHEMAP_INVALID_FACET => 1716;
use constant SCHEMAP_INVALID_FACET_VALUE => 1717;
use constant SCHEMAP_INVALID_MAXOCCURS => 1718;
use constant SCHEMAP_INVALID_MINOCCURS => 1719;
use constant SCHEMAP_INVALID_REF_AND_SUBTYPE => 1720;
use constant SCHEMAP_INVALID_WHITE_SPACE => 1721;
use constant SCHEMAP_NOATTR_NOREF => 1722;
use constant SCHEMAP_NOTATION_NO_NAME => 1723;
use constant SCHEMAP_NOTYPE_NOREF => 1724;
use constant SCHEMAP_REF_AND_SUBTYPE => 1725;
use constant SCHEMAP_RESTRICTION_NONAME_NOREF => 1726;
use constant SCHEMAP_SIMPLETYPE_NONAME => 1727;
use constant SCHEMAP_TYPE_AND_SUBTYPE => 1728;
use constant SCHEMAP_UNKNOWN_ALL_CHILD => 1729;
use constant SCHEMAP_UNKNOWN_ANYATTRIBUTE_CHILD => 1730;
use constant SCHEMAP_UNKNOWN_ATTR_CHILD => 1731;
use constant SCHEMAP_UNKNOWN_ATTRGRP_CHILD => 1732;
use constant SCHEMAP_UNKNOWN_ATTRIBUTE_GROUP => 1733;
use constant SCHEMAP_UNKNOWN_BASE_TYPE => 1734;
use constant SCHEMAP_UNKNOWN_CHOICE_CHILD => 1735;
use constant SCHEMAP_UNKNOWN_COMPLEXCONTENT_CHILD => 1736;
use constant SCHEMAP_UNKNOWN_COMPLEXTYPE_CHILD => 1737;
use constant SCHEMAP_UNKNOWN_ELEM_CHILD => 1738;
use constant SCHEMAP_UNKNOWN_EXTENSION_CHILD => 1739;
use constant SCHEMAP_UNKNOWN_FACET_CHILD => 1740;
use constant SCHEMAP_UNKNOWN_FACET_TYPE => 1741;
use constant SCHEMAP_UNKNOWN_GROUP_CHILD => 1742;
use constant SCHEMAP_UNKNOWN_IMPORT_CHILD => 1743;
use constant SCHEMAP_UNKNOWN_LIST_CHILD => 1744;
use constant SCHEMAP_UNKNOWN_NOTATION_CHILD => 1745;
use constant SCHEMAP_UNKNOWN_PROCESSCONTENT_CHILD => 1746;
use constant SCHEMAP_UNKNOWN_REF => 1747;
use constant SCHEMAP_UNKNOWN_RESTRICTION_CHILD => 1748;
use constant SCHEMAP_UNKNOWN_SCHEMAS_CHILD => 1749;
use constant SCHEMAP_UNKNOWN_SEQUENCE_CHILD => 1750;
use constant SCHEMAP_UNKNOWN_SIMPLECONTENT_CHILD => 1751;
use constant SCHEMAP_UNKNOWN_SIMPLETYPE_CHILD => 1752;
use constant SCHEMAP_UNKNOWN_TYPE => 1753;
use constant SCHEMAP_UNKNOWN_UNION_CHILD => 1754;
use constant SCHEMAP_ELEM_DEFAULT_FIXED => 1755;
use constant SCHEMAP_REGEXP_INVALID => 1756;
use constant SCHEMAP_FAILED_LOAD => 1756;
use constant SCHEMAP_NOTHING_TO_PARSE => 1757;
use constant SCHEMAP_NOROOT => 1758;
use constant SCHEMAP_REDEFINED_GROUP => 1759;
use constant SCHEMAP_REDEFINED_TYPE => 1760;
use constant SCHEMAP_REDEFINED_ELEMENT => 1761;
use constant SCHEMAP_REDEFINED_ATTRGROUP => 1762;
use constant SCHEMAP_REDEFINED_ATTR => 1763;
use constant SCHEMAP_REDEFINED_NOTATION => 1764;
use constant SCHEMAP_FAILED_PARSE => 1765;
use constant SCHEMAV_NOROOT => 1800;
use constant SCHEMAV_UNDECLAREDELEM => 1801;
use constant SCHEMAV_NOTTOPLEVEL => 1802;
use constant SCHEMAV_MISSING => 1803;
use constant SCHEMAV_WRONGELEM => 1804;
use constant SCHEMAV_NOTYPE => 1805;
use constant SCHEMAV_NOROLLBACK => 1806;
use constant SCHEMAV_ISABSTRACT => 1807;
use constant SCHEMAV_NOTEMPTY => 1808;
use constant SCHEMAV_ELEMCONT => 1809;
use constant SCHEMAV_HAVEDEFAULT => 1810;
use constant SCHEMAV_NOTNILLABLE => 1811;
use constant SCHEMAV_EXTRACONTENT => 1812;
use constant SCHEMAV_INVALIDATTR => 1813;
use constant SCHEMAV_INVALIDELEM => 1814;
use constant SCHEMAV_NOTDETERMINIST => 1815;
use constant SCHEMAV_CONSTRUCT => 1816;
use constant SCHEMAV_INTERNAL => 1817;
use constant SCHEMAV_NOTSIMPLE => 1818;
use constant SCHEMAV_ATTRUNKNOWN => 1819;
use constant SCHEMAV_ATTRINVALID => 1820;
use constant SCHEMAV_VALUE => 1821;
use constant SCHEMAV_FACET => 1822;
use constant XPTR_UNKNOWN_SCHEME => 1900;
use constant XPTR_CHILDSEQ_START => 1901;
use constant XPTR_EVAL_FAILED => 1902;
use constant XPTR_EXTRA_OBJECTS => 1903;
use constant C14N_CREATE_CTXT => 1950;
use constant C14N_REQUIRES_UTF8 => 1951;
use constant C14N_CREATE_STACK => 1952;
use constant C14N_INVALID_NODE => 1953;
use constant FTP_PASV_ANSWER => 2000;
use constant FTP_EPSV_ANSWER => 2001;
use constant FTP_ACCNT => 2002;
use constant HTTP_URL_SYNTAX => 2020;
use constant HTTP_USE_IP => 2021;
use constant HTTP_UNKNOWN_HOST => 2022;
1;

37
lib/XML/LibXML/ErrNo.pod Normal file
View File

@ -0,0 +1,37 @@
=head1 NAME
XML::LibXML::ErrNo - Structured Errors
=head1 DESCRIPTION
This module is based on xmlerror.h libxml2 C header file. It defines symbolic
constants for all libxml2 error codes. Currently libxml2 uses over 480
different error codes. See also XML::LibXML::Error.
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0207
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

260
lib/XML/LibXML/Error.pm Normal file
View File

@ -0,0 +1,260 @@
# $Id: Error.pm,v 1.1.2.1 2004/04/20 20:09:48 pajas Exp $
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
package XML::LibXML::Error;
use strict;
use warnings;
# To avoid a "Deep recursion on subroutine as_string" warning
no warnings 'recursion';
use Encode ();
use vars qw(@error_domains $VERSION $WARNINGS);
use overload
'""' => \&as_string,
'eq' => sub {
("$_[0]" eq "$_[1]")
},
'cmp' => sub {
("$_[0]" cmp "$_[1]")
},
fallback => 1;
$WARNINGS = 0; # 0: suppress, 1: report via warn, 2: report via die
$VERSION = "2.0207"; # VERSION TEMPLATE: DO NOT CHANGE
use constant XML_ERR_NONE => 0;
use constant XML_ERR_WARNING => 1; # A simple warning
use constant XML_ERR_ERROR => 2; # A recoverable error
use constant XML_ERR_FATAL => 3; # A fatal error
use constant XML_ERR_FROM_NONE => 0;
use constant XML_ERR_FROM_PARSER => 1; # The XML parser
use constant XML_ERR_FROM_TREE => 2; # The tree module
use constant XML_ERR_FROM_NAMESPACE => 3; # The XML Namespace module
use constant XML_ERR_FROM_DTD => 4; # The XML DTD validation
use constant XML_ERR_FROM_HTML => 5; # The HTML parser
use constant XML_ERR_FROM_MEMORY => 6; # The memory allocator
use constant XML_ERR_FROM_OUTPUT => 7; # The serialization code
use constant XML_ERR_FROM_IO => 8; # The Input/Output stack
use constant XML_ERR_FROM_FTP => 9; # The FTP module
use constant XML_ERR_FROM_HTTP => 10; # The FTP module
use constant XML_ERR_FROM_XINCLUDE => 11; # The XInclude processing
use constant XML_ERR_FROM_XPATH => 12; # The XPath module
use constant XML_ERR_FROM_XPOINTER => 13; # The XPointer module
use constant XML_ERR_FROM_REGEXP => 14; # The regular expressions module
use constant XML_ERR_FROM_DATATYPE => 15; # The W3C XML Schemas Datatype module
use constant XML_ERR_FROM_SCHEMASP => 16; # The W3C XML Schemas parser module
use constant XML_ERR_FROM_SCHEMASV => 17; # The W3C XML Schemas validation module
use constant XML_ERR_FROM_RELAXNGP => 18; # The Relax-NG parser module
use constant XML_ERR_FROM_RELAXNGV => 19; # The Relax-NG validator module
use constant XML_ERR_FROM_CATALOG => 20; # The Catalog module
use constant XML_ERR_FROM_C14N => 21; # The Canonicalization module
use constant XML_ERR_FROM_XSLT => 22; # The XSLT engine from libxslt
use constant XML_ERR_FROM_VALID => 23; # The DTD validation module with valid context
use constant XML_ERR_FROM_CHECK => 24; # The error-checking module
use constant XML_ERR_FROM_WRITER => 25; # The xmlwriter module
use constant XML_ERR_FROM_MODULE => 26; # The dynamically-loaded module module
use constant XML_ERR_FROM_I18N => 27; # The module handling character conversion
use constant XML_ERR_FROM_SCHEMATRONV=> 28; # The Schematron validator module
@error_domains = ("", "parser", "tree", "namespace", "validity",
"HTML parser", "memory", "output", "I/O", "ftp",
"http", "XInclude", "XPath", "xpointer", "regexp",
"Schemas datatype", "Schemas parser", "Schemas validity",
"Relax-NG parser", "Relax-NG validity",
"Catalog", "C14N", "XSLT", "validity", "error-checking",
"xmlwriter", "dynamic loading", "i18n",
"Schematron validity");
my $MAX_ERROR_PREV_DEPTH = 100;
for my $field (qw<code _prev level file line nodename message column context
str1 str2 str3 num1 num2 __prev_depth>) {
my $method = sub { $_[0]{$field} };
no strict 'refs';
*$field = $method;
}
{
sub new {
my ($class,$xE) = @_;
my $terr;
if (ref($xE)) {
my ($context,$column) = $xE->context_and_column();
$terr =bless {
domain => $xE->domain(),
level => $xE->level(),
code => $xE->code(),
message => $xE->message(),
file => $xE->file(),
line => $xE->line(),
str1 => $xE->str1(),
str2 => $xE->str2(),
str3 => $xE->str3(),
num1 => $xE->num1(),
num2 => $xE->num2(),
__prev_depth => 0,
(defined($context) ?
(
context => $context,
column => $column,
) : ()),
}, $class;
} else {
# !!!! problem : got a flat error
# warn("PROBLEM: GOT A FLAT ERROR $xE\n");
$terr =bless {
domain => 0,
level => 2,
code => -1,
message => $xE,
file => undef,
line => undef,
str1 => undef,
str2 => undef,
str3 => undef,
num1 => undef,
num2 => undef,
__prev_depth => 0,
}, $class;
}
return $terr;
}
sub _callback_error {
#print "CALLBACK\n";
my ($xE,$prev) = @_;
my $terr;
$terr=XML::LibXML::Error->new($xE);
if ($terr->{level} == XML_ERR_WARNING and $WARNINGS!=2) {
warn $terr if $WARNINGS;
return $prev;
}
#unless ( defined $terr->{file} and length $terr->{file} ) {
# this would make it easier to recognize parsed strings
# but it breaks old implementations
# [CG] $terr->{file} = 'string()';
#}
#warn "Saving the error ",$terr->dump;
if (ref($prev))
{
if ($prev->__prev_depth() >= $MAX_ERROR_PREV_DEPTH)
{
return $prev;
}
$terr->{_prev} = $prev;
$terr->{__prev_depth} = $prev->__prev_depth() + 1;
}
else
{
$terr->{_prev} = defined($prev) && length($prev) ? XML::LibXML::Error->new($prev) : undef;
}
return $terr;
}
sub _instant_error_callback {
my $xE = shift;
my $terr= XML::LibXML::Error->new($xE);
print "Reporting an instanteous error ",$terr->dump;
die $terr;
}
sub _report_warning {
my ($saved_error) = @_;
#print "CALLBACK WARN\n";
if ( defined $saved_error ) {
#print "reporting a warning ",$saved_error->dump;
warn $saved_error;
}
}
sub _report_error {
my ($saved_error) = @_;
#print "CALLBACK ERROR: $saved_error\n";
if ( defined $saved_error ) {
die $saved_error;
}
}
}
# backward compatibility
sub int1 { $_[0]->num1 }
sub int2 { $_[0]->num2 }
sub domain {
my ($self)=@_;
return undef unless ref($self);
my $domain = $self->{domain};
# Newer versions of libxml2 might yield errors in domains that aren't
# listed above. Invent something reasonable in that case.
return $domain < @error_domains ? $error_domains[$domain] : "domain_$domain";
}
sub as_string {
my ($self)=@_;
my $msg = "";
my $level;
if (defined($self->{_prev})) {
$msg = $self->{_prev}->as_string;
}
if ($self->{level} == XML_ERR_NONE) {
$level = "";
} elsif ($self->{level} == XML_ERR_WARNING) {
$level = "warning";
} elsif ($self->{level} == XML_ERR_ERROR ||
$self->{level} == XML_ERR_FATAL) {
$level = "error";
}
my $where="";
if (defined($self->{file})) {
$where="$self->{file}:$self->{line}";
} elsif (($self->{domain} == XML_ERR_FROM_PARSER)
and
$self->{line}) {
$where="Entity: line $self->{line}";
}
if ($self->{nodename}) {
$where.=": element ".$self->{nodename};
}
$msg.=$where.": " if $where ne "";
$msg.=$self->domain." ".$level." :";
my $str=$self->{message}||"";
chomp($str);
$msg.=" ".$str."\n";
if (($self->{domain} == XML_ERR_FROM_XPATH) and
defined($self->{str1})) {
$msg.=$self->{str1}."\n";
$msg.=(" " x $self->{num1})."^\n";
} elsif (defined $self->{context}) {
# If the error relates to character-encoding problems in the context,
# then doing textual operations on it will spew warnings that
# XML::LibXML can do nothing to fix. So just disable all such
# warnings. This has the pleasing benefit of making the test suite
# run warning-free.
no warnings 'utf8';
my $context = Encode::encode('UTF-8', $self->{context});
$msg.=$context."\n";
$context = substr($context,0,$self->{column});
$context=~s/[^\t]/ /g;
$msg.=$context."^\n";
}
return $msg;
}
sub dump {
my ($self)=@_;
require Data::Dumper;
return Data::Dumper->new([$self],['error'])->Dump;
}
1;

264
lib/XML/LibXML/Error.pod Normal file
View File

@ -0,0 +1,264 @@
=head1 NAME
XML::LibXML::Error - Structured Errors
=head1 SYNOPSIS
eval { ... };
if (ref($@)) {
# handle a structured error (XML::LibXML::Error object)
} elsif ($@) {
# error, but not an XML::LibXML::Error object
} else {
# no error
}
$XML::LibXML::Error::WARNINGS=1;
$message = $@->as_string();
print $@->dump();
$error_domain = $@->domain();
$error_code = $@->code();
$error_message = $@->message();
$error_level = $@->level();
$filename = $@->file();
$line = $@->line();
$nodename = $@->nodename();
$error_str1 = $@->str1();
$error_str2 = $@->str2();
$error_str3 = $@->str3();
$error_num1 = $@->num1();
$error_num2 = $@->num2();
$string = $@->context();
$offset = $@->column();
$previous_error = $@->_prev();
=head1 DESCRIPTION
The XML::LibXML::Error class is a tiny frontend to I<<<<<< libxml2 >>>>>>'s structured error support. If XML::LibXML is compiled with structured error
support, all errors reported by libxml2 are transformed to XML::LibXML::Error
objects. These objects automatically serialize to the corresponding error
messages when printed or used in a string operation, but as objects, can also
be used to get a detailed and structured information about the error that
occurred.
Unlike most other XML::LibXML objects, XML::LibXML::Error doesn't wrap an
underlying I<<<<<< libxml2 >>>>>> structure directly, but rather transforms it to a blessed Perl hash reference
containing the individual fields of the structured error information as hash
key-value pairs. Individual items (fields) of a structured error can either be
obtained directly as $@->{field}, or using autoloaded methods such as
$@->field() (where field is the field name). XML::LibXML::Error objects have
the following fields: domain, code, level, file, line, nodename, message, str1,
str2, str3, num1, num2, and _prev (some of them may be undefined).
=over 4
=item $XML::LibXML::Error::WARNINGS
$XML::LibXML::Error::WARNINGS=1;
Traditionally, XML::LibXML was suppressing parser warnings by setting libxml2's
global variable xmlGetWarningsDefaultValue to 0. Since 1.70 we do not change
libxml2's global variables anymore; for backward compatibility, XML::LibXML
suppresses warnings. This variable can be set to 1 to enable reporting of these
warnings via Perl C<<<<<< warn >>>>>> and to 2 to report hem via C<<<<<< die >>>>>>.
=item as_string
$message = $@->as_string();
This function serializes an XML::LibXML::Error object to a string containing
the full error message close to the message produced by I<<<<<< libxml2 >>>>>> default error handlers and tools like xmllint. This method is also used to
overload "" operator on XML::LibXML::Error, so it is automatically called
whenever XML::LibXML::Error object is treated as a string (e.g. in print $@).
=item dump
print $@->dump();
This function serializes an XML::LibXML::Error to a string displaying all
fields of the error structure individually on separate lines of the form 'name'
=> 'value'.
=item domain
$error_domain = $@->domain();
Returns string containing information about what part of the library raised the
error. Can be one of: "parser", "tree", "namespace", "validity", "HTML parser",
"memory", "output", "I/O", "ftp", "http", "XInclude", "XPath", "xpointer",
"regexp", "Schemas datatype", "Schemas parser", "Schemas validity", "Relax-NG
parser", "Relax-NG validity", "Catalog", "C14N", "XSLT", "validity".
=item code
$error_code = $@->code();
Returns the actual libxml2 error code. The XML::LibXML::ErrNo module defines
constants for individual error codes. Currently libxml2 uses over 480 different
error codes.
=item message
$error_message = $@->message();
Returns a human-readable informative error message.
=item level
$error_level = $@->level();
Returns an integer value describing how consequent is the error.
XML::LibXML::Error defines the following constants:
=over 4
=item *
XML_ERR_NONE = 0
=item *
XML_ERR_WARNING = 1 : A simple warning.
=item *
XML_ERR_ERROR = 2 : A recoverable error.
=item *
XML_ERR_FATAL = 3 : A fatal error.
=back
=item file
$filename = $@->file();
Returns the filename of the file being processed while the error occurred.
=item line
$line = $@->line();
The line number, if available.
=item nodename
$nodename = $@->nodename();
Name of the node where error occurred, if available. When this field is
non-empty, libxml2 actually returned a physical pointer to the specified node.
Due to memory management issues, it is very difficult to implement a way to
expose the pointer to the Perl level as a XML::LibXML::Node. For this reason,
XML::LibXML::Error currently only exposes the name the node.
=item str1
$error_str1 = $@->str1();
Error specific. Extra string information.
=item str2
$error_str2 = $@->str2();
Error specific. Extra string information.
=item str3
$error_str3 = $@->str3();
Error specific. Extra string information.
=item num1
$error_num1 = $@->num1();
Error specific. Extra numeric information.
=item num2
$error_num2 = $@->num2();
In recent libxml2 versions, this value contains a column number of the error or
0 if N/A.
=item context
$string = $@->context();
For parsing errors, this field contains about 80 characters of the XML near the
place where the error occurred. The field C<<<<<< $@-E<gt>column() >>>>>> contains the corresponding offset. Where N/A, the field is undefined.
=item column
$offset = $@->column();
See C<<<<<< $@-E<gt>column() >>>>>> above.
=item _prev
$previous_error = $@->_prev();
This field can possibly hold a reference to another XML::LibXML::Error object
representing an error which occurred just before this error.
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0207
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

View File

@ -0,0 +1,300 @@
=head1 NAME
XML::LibXML::InputCallback - XML::LibXML Class for Input Callbacks
=head1 SYNOPSIS
use XML::LibXML;
=head1 DESCRIPTION
You may get unexpected results if you are trying to load external documents
during libxml2 parsing if the location of the resource is not a HTTP, FTP or
relative location but a absolute path for example. To get around this
limitation, you may add your own input handler to open, read and close
particular types of locations or URI classes. Using this input callback
handlers, you can handle your own custom URI schemes for example.
The input callbacks are used whenever XML::LibXML has to get something other
than externally parsed entities from somewhere. They are implemented using a
callback stack on the Perl layer in analogy to libxml2's native callback stack.
The XML::LibXML::InputCallback class transparently registers the input
callbacks for the libxml2's parser processes.
=head2 How does XML::LibXML::InputCallback work?
The libxml2 library offers a callback implementation as global functions only.
To work-around the troubles resulting in having only global callbacks - for
example, if the same global callback stack is manipulated by different
applications running together in a single Apache Web-server environment -,
XML::LibXML::InputCallback comes with a object-oriented and a function-oriented
part.
Using the function-oriented part the global callback stack of libxml2 can be
manipulated. Those functions can be used as interface to the callbacks on the
C- and XS Layer. At the object-oriented part, operations for working with the
"pseudo-localized" callback stack are implemented. Currently, you can register
and de-register callbacks on the Perl layer and initialize them on a per parser
basis.
=head3 Callback Groups
The libxml2 input callbacks come in groups. One group contains a URI matcher (I<<<<<< match >>>>>>), a data stream constructor (I<<<<<< open >>>>>>), a data stream reader (I<<<<<< read >>>>>>), and a data stream destructor (I<<<<<< close >>>>>>). The callbacks can be manipulated on a per group basis only.
=head3 The Parser Process
The parser process works on an XML data stream, along which, links to other
resources can be embedded. This can be links to external DTDs or XIncludes for
example. Those resources are identified by URIs. The callback implementation of
libxml2 assumes that one callback group can handle a certain amount of URIs and
a certain URI scheme. Per default, callback handlers for I<<<<<< file://* >>>>>>, I<<<<<< file:://*.gz >>>>>>, I<<<<<< http://* >>>>>> and I<<<<<< ftp://* >>>>>> are registered.
Callback groups in the callback stack are processed from top to bottom, meaning
that callback groups registered later will be processed before the earlier
registered ones.
While parsing the data stream, the libxml2 parser checks if a registered
callback group will handle a URI - if they will not, the URI will be
interpreted as I<<<<<< file://URI >>>>>>. To handle a URI, the I<<<<<< match >>>>>> callback will have to return '1'. If that happens, the handling of the URI will
be passed to that callback group. Next, the URI will be passed to the I<<<<<< open >>>>>> callback, which should return a I<<<<<< reference >>>>>> to the data stream if it successfully opened the file, '0' otherwise. If
opening the stream was successful, the I<<<<<< read >>>>>> callback will be called repeatedly until it returns an empty string. After the
read callback, the I<<<<<< close >>>>>> callback will be called to close the stream.
=head3 Organisation of callback groups in XML::LibXML::InputCallback
Callback groups are implemented as a stack (Array), each entry holds a
reference to an array of the callbacks. For the libxml2 library, the
XML::LibXML::InputCallback callback implementation appears as one single
callback group. The Perl implementation however allows one to manage different
callback stacks on a per libxml2-parser basis.
=head2 Using XML::LibXML::InputCallback
After object instantiation using the parameter-less constructor, you can
register callback groups.
my $input_callbacks = XML::LibXML::InputCallback->new();
$input_callbacks->register_callbacks([ $match_cb1, $open_cb1,
$read_cb1, $close_cb1 ] );
$input_callbacks->register_callbacks([ $match_cb2, $open_cb2,
$read_cb2, $close_cb2 ] );
$input_callbacks->register_callbacks( [ $match_cb3, $open_cb3,
$read_cb3, $close_cb3 ] );
$parser->input_callbacks( $input_callbacks );
$parser->parse_file( $some_xml_file );
=head2 What about the old callback system prior to XML::LibXML::InputCallback?
In XML::LibXML versions prior to 1.59 - i.e. without the
XML::LibXML::InputCallback module - you could define your callbacks either
using globally or locally. You still can do that using
XML::LibXML::InputCallback, and in addition to that you can define the
callbacks on a per parser basis!
If you use the old callback interface through global callbacks,
XML::LibXML::InputCallback will treat them with a lower priority as the ones
registered using the new interface. The global callbacks will not override the
callback groups registered using the new interface. Local callbacks are
attached to a specific parser instance, therefore they are treated with highest
priority. If the I<<<<<< match >>>>>> callback of the callback group registered as local variable is identical to one
of the callback groups registered using the new interface, that callback group
will be replaced.
Users of the old callback implementation whose I<<<<<< open >>>>>> callback returned a plain string, will have to adapt their code to return a
reference to that string after upgrading to version >= 1.59. The new callback
system can only deal with the I<<<<<< open >>>>>> callback returning a reference!
=head1 INTERFACE DESCRIPTION
=head2 Global Variables
=over 4
=item $_CUR_CB
Stores the current callback and can be used as shortcut to access the callback
stack.
=item @_GLOBAL_CALLBACKS
Stores all callback groups for the current parser process.
=item @_CB_STACK
Stores the currently used callback group. Used to prevent parser errors when
dealing with nested XML data.
=back
=head2 Global Callbacks
=over 4
=item _callback_match
Implements the interface for the I<<<<<< match >>>>>> callback at C-level and for the selection of the callback group from the
callbacks defined at the Perl-level.
=item _callback_open
Forwards the I<<<<<< open >>>>>> callback from libxml2 to the corresponding callback function at the Perl-level.
=item _callback_read
Forwards the read request to the corresponding callback function at the
Perl-level and returns the result to libxml2.
=item _callback_close
Forwards the I<<<<<< close >>>>>> callback from libxml2 to the corresponding callback function at the
Perl-level..
=back
=head2 Class methods
=over 4
=item new()
A simple constructor.
=item register_callbacks( [ $match_cb, $open_cb, $read_cb, $close_cb ])
The four callbacks I<<<<<< have >>>>>> to be given as array reference in the above order I<<<<<< match >>>>>>, I<<<<<< open >>>>>>, I<<<<<< read >>>>>>, I<<<<<< close >>>>>>!
=item unregister_callbacks( [ $match_cb, $open_cb, $read_cb, $close_cb ])
With no arguments given, C<<<<<< unregister_callbacks() >>>>>> will delete the last registered callback group from the stack. If four
callbacks are passed as array reference, the callback group to unregister will
be identified by the I<<<<<< match >>>>>> callback and deleted from the callback stack. Note that if several identical I<<<<<< match >>>>>> callbacks are defined in different callback groups, ALL of them will be deleted
from the stack.
=item init_callbacks( $parser )
Initializes the callback system for the provided parser before starting a
parsing process.
=item cleanup_callbacks()
Resets global variables and the libxml2 callback stack.
=item lib_init_callbacks()
Used internally for callback registration at C-level.
=item lib_cleanup_callbacks()
Used internally for callback resetting at the C-level.
=back
=head1 EXAMPLE CALLBACKS
The following example is a purely fictitious example that uses a
MyScheme::Handler object that responds to methods similar to an IO::Handle.
# Define the four callback functions
sub match_uri {
my $uri = shift;
return $uri =~ /^myscheme:/; # trigger our callback group at a 'myscheme' URIs
}
sub open_uri {
my $uri = shift;
my $handler = MyScheme::Handler->new($uri);
return $handler;
}
# The returned $buffer will be parsed by the libxml2 parser
sub read_uri {
my $handler = shift;
my $length = shift;
my $buffer;
read($handler, $buffer, $length);
return $buffer; # $buffer will be an empty string '' if read() is done
}
# Close the handle associated with the resource.
sub close_uri {
my $handler = shift;
close($handler);
}
# Register them with a instance of XML::LibXML::InputCallback
my $input_callbacks = XML::LibXML::InputCallback->new();
$input_callbacks->register_callbacks([ \&match_uri, \&open_uri,
\&read_uri, \&close_uri ] );
# Register the callback group at a parser instance
$parser->input_callbacks( $input_callbacks );
# $some_xml_file will be parsed using our callbacks
$parser->parse_file( $some_xml_file );
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0207
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

112
lib/XML/LibXML/Literal.pm Normal file
View File

@ -0,0 +1,112 @@
# $Id$
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
package XML::LibXML::Literal;
use XML::LibXML::Boolean;
use XML::LibXML::Number;
use strict;
use warnings;
use vars qw ($VERSION);
$VERSION = "2.0207"; # VERSION TEMPLATE: DO NOT CHANGE
use overload
'""' => \&value,
'cmp' => \&cmp;
sub new {
my $class = shift;
my ($string) = @_;
# $string =~ s/&quot;/"/g;
# $string =~ s/&apos;/'/g;
bless \$string, $class;
}
sub as_string {
my $self = shift;
my $string = $$self;
$string =~ s/'/&apos;/g;
return "'$string'";
}
sub as_xml {
my $self = shift;
my $string = $$self;
return "<Literal>$string</Literal>\n";
}
sub value {
my $self = shift;
$$self;
}
sub cmp {
my $self = shift;
my ($cmp, $swap) = @_;
if ($swap) {
return $cmp cmp $$self;
}
return $$self cmp $cmp;
}
sub evaluate {
my $self = shift;
$self;
}
sub to_boolean {
my $self = shift;
return (length($$self) > 0) ? XML::LibXML::Boolean->True : XML::LibXML::Boolean->False;
}
sub to_number { return XML::LibXML::Number->new($_[0]->value); }
sub to_literal { return $_[0]; }
sub string_value { return $_[0]->value; }
1;
__END__
=head1 NAME
XML::LibXML::Literal - Simple string values.
=head1 DESCRIPTION
In XPath terms a Literal is what we know as a string.
=head1 API
=head2 new($string)
Create a new Literal object with the value in $string. Note that &quot; and
&apos; will be converted to " and ' respectively. That is not part of the XPath
specification, but I consider it useful. Note though that you have to go
to extraordinary lengths in an XML template file (be it XSLT or whatever) to
make use of this:
<xsl:value-of select="&quot;I'm feeling &amp;quot;sad&amp;quot;&quot;"/>
Which produces a Literal of:
I'm feeling "sad"
=head2 value()
Also overloaded as stringification, simply returns the literal string value.
=head2 cmp($literal)
Returns the equivalent of perl's cmp operator against the given $literal.
=cut

View File

@ -0,0 +1,161 @@
=head1 NAME
XML::LibXML::Namespace - XML::LibXML Namespace Implementation
=head1 SYNOPSIS
use XML::LibXML;
# Only methods specific to Namespace nodes are listed here,
# see the XML::LibXML::Node manpage for other methods
my $ns = XML::LibXML::Namespace->new($nsURI);
print $ns->nodeName();
print $ns->name();
$localname = $ns->getLocalName();
print $ns->getData();
print $ns->getValue();
print $ns->value();
$known_uri = $ns->getNamespaceURI();
$known_prefix = $ns->getPrefix();
$key = $ns->unique_key();
=head1 DESCRIPTION
Namespace nodes are returned by both $element->findnodes('namespace::foo') or
by $node->getNamespaces().
The namespace node API is not part of any current DOM API, and so it is quite
minimal. It should be noted that namespace nodes are I<<<<<< not >>>>>> a sub class of L<<<<<< XML::LibXML::Node >>>>>>, however Namespace nodes act a lot like attribute nodes, and similarly named
methods will return what you would expect if you treated the namespace node as
an attribute. Note that in order to fix several inconsistencies between the API
and the documentation, the behavior of some functions have been changed in
1.64.
=head1 METHODS
=over 4
=item new
my $ns = XML::LibXML::Namespace->new($nsURI);
Creates a new Namespace node. Note that this is not a 'node' as an attribute or
an element node. Therefore you can't do call all L<<<<<< XML::LibXML::Node >>>>>> Functions. All functions available for this node are listed below.
Optionally you can pass the prefix to the namespace constructor. If this second
parameter is omitted you will create a so called default namespace. Note, the
newly created namespace is not bound to any document or node, therefore you
should not expect it to be available in an existing document.
=item declaredURI
Returns the URI for this namespace.
=item declaredPrefix
Returns the prefix for this namespace.
=item nodeName
print $ns->nodeName();
Returns "xmlns:prefix", where prefix is the prefix for this namespace.
=item name
print $ns->name();
Alias for nodeName()
=item getLocalName
$localname = $ns->getLocalName();
Returns the local name of this node as if it were an attribute, that is, the
prefix associated with the namespace.
=item getData
print $ns->getData();
Returns the URI of the namespace, i.e. the value of this node as if it were an
attribute.
=item getValue
print $ns->getValue();
Alias for getData()
=item value
print $ns->value();
Alias for getData()
=item getNamespaceURI
$known_uri = $ns->getNamespaceURI();
Returns the string "http://www.w3.org/2000/xmlns/"
=item getPrefix
$known_prefix = $ns->getPrefix();
Returns the string "xmlns"
=item unique_key
$key = $ns->unique_key();
This method returns a key guaranteed to be unique for this namespace, and to
always be the same value for this namespace. Two namespace objects return the
same key if and only if they have the same prefix and the same URI. The
returned key value is useful as a key in hashes.
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0207
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

783
lib/XML/LibXML/Node.pod Normal file
View File

@ -0,0 +1,783 @@
=head1 NAME
XML::LibXML::Node - Abstract Base Class of XML::LibXML Nodes
=head1 SYNOPSIS
use XML::LibXML;
$name = $node->nodeName;
$node->setNodeName( $newName );
$bool = $node->isSameNode( $other_node );
$bool = $node->isEqual( $other_node );
$num = $node->unique_key;
$content = $node->nodeValue;
$content = $node->textContent;
$type = $node->nodeType;
$node->unbindNode();
$childnode = $node->removeChild( $childnode );
$oldnode = $node->replaceChild( $newNode, $oldNode );
$node->replaceNode($newNode);
$childnode = $node->appendChild( $childnode );
$childnode = $node->addChild( $childnode );
$node = $parent->addNewChild( $nsURI, $name );
$node->addSibling($newNode);
$newnode =$node->cloneNode( $deep );
$parentnode = $node->parentNode;
$nextnode = $node->nextSibling();
$nextnode = $node->nextNonBlankSibling();
$prevnode = $node->previousSibling();
$prevnode = $node->previousNonBlankSibling();
$boolean = $node->hasChildNodes();
$childnode = $node->firstChild;
$childnode = $node->lastChild;
$documentnode = $node->ownerDocument;
$node = $node->getOwner;
$node->setOwnerDocument( $doc );
$node->insertBefore( $newNode, $refNode );
$node->insertAfter( $newNode, $refNode );
@nodes = $node->findnodes( $xpath_expression );
$result = $node->find( $xpath );
print $node->findvalue( $xpath );
$bool = $node->exists( $xpath_expression );
@childnodes = $node->childNodes();
@childnodes = $node->nonBlankChildNodes();
$xmlstring = $node->toString($format,$docencoding);
$c14nstring = $node->toStringC14N();
$c14nstring = $node->toStringC14N($with_comments, $xpath_expression , $xpath_context);
$c14nstring = $node->toStringC14N_v1_1();
$c14nstring = $node->toStringC14N_v1_1($with_comments, $xpath_expression , $xpath_context);
$ec14nstring = $node->toStringEC14N();
$ec14nstring = $node->toStringEC14N($with_comments, $xpath_expression, $inclusive_prefix_list);
$ec14nstring = $node->toStringEC14N($with_comments, $xpath_expression, $xpath_context, $inclusive_prefix_list);
$str = $doc->serialize($format);
$localname = $node->localname;
$nameprefix = $node->prefix;
$uri = $node->namespaceURI();
$boolean = $node->hasAttributes();
@attributelist = $node->attributes();
$URI = $node->lookupNamespaceURI( $prefix );
$prefix = $node->lookupNamespacePrefix( $URI );
$node->normalize;
@nslist = $node->getNamespaces;
$node->removeChildNodes();
$strURI = $node->baseURI();
$node->setBaseURI($strURI);
$node->nodePath();
$lineno = $node->line_number();
=head1 DESCRIPTION
XML::LibXML::Node defines functions that are common to all Node Types. An
XML::LibXML::Node should never be created standalone, but as an instance of a
high level class such as XML::LibXML::Element or XML::LibXML::Text. The class
itself should provide only common functionality. In XML::LibXML each node is
part either of a document or a document-fragment. Because of this there is no
node without a parent. This may causes confusion with "unbound" nodes.
=head1 METHODS
Many functions listed here are extensively documented in the DOM Level 3 specification (L<<<<<< http://www.w3.org/TR/DOM-Level-3-Core/ >>>>>>). Please refer to the specification for extensive documentation.
=over 4
=item nodeName
$name = $node->nodeName;
Returns the node's name. This function is aware of namespaces and returns the
full name of the current node (C<<<<<< prefix:localname >>>>>>).
Since 1.62 this function also returns the correct DOM names for node types with
constant names, namely: #text, #cdata-section, #comment, #document,
#document-fragment.
=item setNodeName
$node->setNodeName( $newName );
In very limited situations, it is useful to change a nodes name. In the DOM
specification this should throw an error. This Function is aware of namespaces.
=item isSameNode
$bool = $node->isSameNode( $other_node );
returns TRUE (1) if the given nodes refer to the same node structure, otherwise
FALSE (0) is returned.
=item isEqual
$bool = $node->isEqual( $other_node );
deprecated version of isSameNode().
I<<<<<< NOTE >>>>>> isEqual will change behaviour to follow the DOM specification
=item unique_key
$num = $node->unique_key;
This function is not specified for any DOM level. It returns a key guaranteed
to be unique for this node, and to always be the same value for this node. In
other words, two node objects return the same key if and only if isSameNode
indicates that they are the same node.
The returned key value is useful as a key in hashes.
=item nodeValue
$content = $node->nodeValue;
If the node has any content (such as stored in a C<<<<<< text node >>>>>>) it can get requested through this function.
I<<<<<< NOTE: >>>>>> Element Nodes have no content per definition. To get the text value of an
Element use textContent() instead!
=item textContent
$content = $node->textContent;
this function returns the content of all text nodes in the descendants of the
given node as specified in DOM.
=item nodeType
$type = $node->nodeType;
Return a numeric value representing the node type of this node. The module
XML::LibXML by default exports constants for the node types (see the EXPORT
section in the L<<<<<< XML::LibXML >>>>>> manual page).
=item unbindNode
$node->unbindNode();
Unbinds the Node from its siblings and Parent, but not from the Document it
belongs to. If the node is not inserted into the DOM afterwards, it will be
lost after the program terminates. From a low level view, the unbound node is
stripped from the context it is and inserted into a (hidden) document-fragment.
=item removeChild
$childnode = $node->removeChild( $childnode );
This will unbind the Child Node from its parent C<<<<<< $node >>>>>>. The function returns the unbound node. If C<<<<<< $childnode >>>>>> is not a child of the given Node the function will fail.
=item replaceChild
$oldnode = $node->replaceChild( $newNode, $oldNode );
Replaces the C<<<<<< $oldNode >>>>>> with the C<<<<<< $newNode >>>>>>. The C<<<<<< $oldNode >>>>>> will be unbound from the Node. This function differs from the DOM L2
specification, in the case, if the new node is not part of the document, the
node will be imported first.
=item replaceNode
$node->replaceNode($newNode);
This function is very similar to replaceChild(), but it replaces the node
itself rather than a childnode. This is useful if a node found by any XPath
function, should be replaced.
=item appendChild
$childnode = $node->appendChild( $childnode );
The function will add the C<<<<<< $childnode >>>>>> to the end of C<<<<<< $node >>>>>>'s children. The function should fail, if the new childnode is already a child
of C<<<<<< $node >>>>>>. This function differs from the DOM L2 specification, in the case, if the new
node is not part of the document, the node will be imported first.
=item addChild
$childnode = $node->addChild( $childnode );
As an alternative to appendChild() one can use the addChild() function. This
function is a bit faster, because it avoids all DOM conformity checks.
Therefore this function is quite useful if one builds XML documents in memory
where the order and ownership (C<<<<<< ownerDocument >>>>>>) is assured.
addChild() uses libxml2's own xmlAddChild() function. Thus it has to be used
with extra care: If a text node is added to a node and the node itself or its
last childnode is as well a text node, the node to add will be merged with the
one already available. The current node will be removed from memory after this
action. Because perl is not aware of this action, the perl instance is still
available. XML::LibXML will catch the loss of a node and refuse to run any
function called on that node.
my $t1 = $doc->createTextNode( "foo" );
my $t2 = $doc->createTextNode( "bar" );
$t1->addChild( $t2 ); # is OK
my $val = $t2->nodeValue(); # will fail, script dies
Also addChild() will not check if the added node belongs to the same document
as the node it will be added to. This could lead to inconsistent documents and
in more worse cases even to memory violations, if one does not keep track of
this issue.
Although this sounds like a lot of trouble, addChild() is useful if a document
is built from a stream, such as happens sometimes in SAX handlers or filters.
If you are not sure about the source of your nodes, you better stay with
appendChild(), because this function is more user friendly in the sense of
being more error tolerant.
=item addNewChild
$node = $parent->addNewChild( $nsURI, $name );
Similar to C<<<<<< addChild() >>>>>>, this function uses low level libxml2 functionality to provide faster
interface for DOM building. I<<<<<< addNewChild() >>>>>> uses C<<<<<< xmlNewChild() >>>>>> to create a new node on a given parent element.
addNewChild() has two parameters $nsURI and $name, where $nsURI is an
(optional) namespace URI. $name is the fully qualified element name;
addNewChild() will determine the correct prefix if necessary.
The function returns the newly created node.
This function is very useful for DOM building, where a created node can be
directly associated with its parent. I<<<<<< NOTE >>>>>> this function is not part of the DOM specification and its use will limit your
code to XML::LibXML.
=item addSibling
$node->addSibling($newNode);
addSibling() allows adding an additional node to the end of a nodelist, defined
by the given node.
=item cloneNode
$newnode =$node->cloneNode( $deep );
I<<<<<< cloneNode >>>>>> creates a copy of C<<<<<< $node >>>>>>. When $deep is set to 1 (true) the function will copy all child nodes as well.
If $deep is 0 only the current node will be copied. Note that in case of
element, attributes are copied even if $deep is 0.
Note that the behavior of this function for $deep=0 has changed in 1.62 in
order to be consistent with the DOM spec (in older versions attributes and
namespace information was not copied for elements).
=item parentNode
$parentnode = $node->parentNode;
Returns simply the Parent Node of the current node.
=item nextSibling
$nextnode = $node->nextSibling();
Returns the next sibling if any .
=item nextNonBlankSibling
$nextnode = $node->nextNonBlankSibling();
Returns the next non-blank sibling if any (a node is blank if it is a Text or
CDATA node consisting of whitespace only). This method is not defined by DOM.
=item previousSibling
$prevnode = $node->previousSibling();
Analogous to I<<<<<< getNextSibling >>>>>> the function returns the previous sibling if any.
=item previousNonBlankSibling
$prevnode = $node->previousNonBlankSibling();
Returns the previous non-blank sibling if any (a node is blank if it is a Text
or CDATA node consisting of whitespace only). This method is not defined by
DOM.
=item hasChildNodes
$boolean = $node->hasChildNodes();
If the current node has child nodes this function returns TRUE (1), otherwise
it returns FALSE (0, not undef).
=item firstChild
$childnode = $node->firstChild;
If a node has child nodes this function will return the first node in the child
list.
=item lastChild
$childnode = $node->lastChild;
If the C<<<<<< $node >>>>>> has child nodes this function returns the last child node.
=item ownerDocument
$documentnode = $node->ownerDocument;
Through this function it is always possible to access the document the current
node is bound to.
=item getOwner
$node = $node->getOwner;
This function returns the node the current node is associated with. In most
cases this will be a document node or a document fragment node.
=item setOwnerDocument
$node->setOwnerDocument( $doc );
This function binds a node to another DOM. This method unbinds the node first,
if it is already bound to another document.
This function is the opposite calling of L<<<<<< XML::LibXML::Document >>>>>>'s adoptNode() function. Because of this it has the same limitations with
Entity References as adoptNode().
=item insertBefore
$node->insertBefore( $newNode, $refNode );
The method inserts C<<<<<< $newNode >>>>>> before C<<<<<< $refNode >>>>>>. If C<<<<<< $refNode >>>>>> is undefined, the newNode will be set as the new last child of the parent node.
This function differs from the DOM L2 specification, in the case, if the new
node is not part of the document, the node will be imported first,
automatically.
$refNode has to be passed to the function even if it is undefined:
$node->insertBefore( $newNode, undef ); # the same as $node->appendChild( $newNode );
$node->insertBefore( $newNode ); # wrong
Note, that the reference node has to be a direct child of the node the function
is called on. Also, $newChild is not allowed to be an ancestor of the new
parent node.
=item insertAfter
$node->insertAfter( $newNode, $refNode );
The method inserts C<<<<<< $newNode >>>>>> after C<<<<<< $refNode >>>>>>. If C<<<<<< $refNode >>>>>> is undefined, the newNode will be set as the new last child of the parent node.
Note, that $refNode has to be passed explicitly even if it is undef.
=item findnodes
@nodes = $node->findnodes( $xpath_expression );
I<<<<<< findnodes >>>>>> evaluates the xpath expression (XPath 1.0) on the current node and returns the
resulting node set as an array. In scalar context, returns an L<<<<<< XML::LibXML::NodeList >>>>>> object.
The xpath expression can be passed either as a string, or as a L<<<<<< XML::LibXML::XPathExpression >>>>>> object.
I<<<<<< NOTE ON NAMESPACES AND XPATH >>>>>>:
A common mistake about XPath is to assume that node tests consisting of an
element name with no prefix match elements in the default namespace. This
assumption is wrong - by XPath specification, such node tests can only match
elements that are in no (i.e. null) namespace.
So, for example, one cannot match the root element of an XHTML document with C<<<<<< $node-E<gt>find('/html') >>>>>> since C<<<<<< '/html' >>>>>> would only match if the root element C<<<<<< E<lt>htmlE<gt> >>>>>> had no namespace, but all XHTML elements belong to the namespace
http://www.w3.org/1999/xhtml. (Note that C<<<<<< xmlns="..." >>>>>> namespace declarations can also be specified in a DTD, which makes the
situation even worse, since the XML document looks as if there was no default
namespace).
There are several possible ways to deal with namespaces in XPath:
=over 4
=item *
The recommended way is to use the L<<<<<< XML::LibXML::XPathContext >>>>>> module to define an explicit context for XPath evaluation, in which a document
independent prefix-to-namespace mapping can be defined. For example:
my $xpc = XML::LibXML::XPathContext->new;
$xpc->registerNs('x', 'http://www.w3.org/1999/xhtml');
$xpc->find('/x:html',$node);
=item *
Another possibility is to use prefixes declared in the queried document (if
known). If the document declares a prefix for the namespace in question (and
the context node is in the scope of the declaration), C<<<<<< XML::LibXML >>>>>> allows you to use the prefix in the XPath expression, e.g.:
$node->find('/x:html');
=back
See also XML::LibXML::XPathContext->findnodes.
=item find
$result = $node->find( $xpath );
I<<<<<< find >>>>>> evaluates the XPath 1.0 expression using the current node as the context of the
expression, and returns the result depending on what type of result the XPath
expression had. For example, the XPath "1 * 3 + 52" results in a L<<<<<< XML::LibXML::Number >>>>>> object being returned. Other expressions might return an L<<<<<< XML::LibXML::Boolean >>>>>> object, or an L<<<<<< XML::LibXML::Literal >>>>>> object (a string). Each of those objects uses Perl's overload feature to "do
the right thing" in different contexts.
The xpath expression can be passed either as a string, or as a L<<<<<< XML::LibXML::XPathExpression >>>>>> object.
See also L<<<<<< XML::LibXML::XPathContext >>>>>>->find.
=item findvalue
print $node->findvalue( $xpath );
I<<<<<< findvalue >>>>>> is exactly equivalent to:
$node->find( $xpath )->to_literal;
That is, it returns the literal value of the results. This enables you to
ensure that you get a string back from your search, allowing certain shortcuts.
This could be used as the equivalent of XSLT's <xsl:value-of
select="some_xpath"/>.
See also L<<<<<< XML::LibXML::XPathContext >>>>>>->findvalue.
The xpath expression can be passed either as a string, or as a L<<<<<< XML::LibXML::XPathExpression >>>>>> object.
=item exists
$bool = $node->exists( $xpath_expression );
This method behaves like I<<<<<< findnodes >>>>>>, except that it only returns a boolean value (1 if the expression matches a
node, 0 otherwise) and may be faster than I<<<<<< findnodes >>>>>>, because the XPath evaluation may stop early on the first match (this is true
for libxml2 >= 2.6.27).
For XPath expressions that do not return node-set, the method returns true if
the returned value is a non-zero number or a non-empty string.
=item childNodes
@childnodes = $node->childNodes();
I<<<<<< childNodes >>>>>> implements a more intuitive interface to the childnodes of the current node. It
enables you to pass all children directly to a C<<<<<< map >>>>>> or C<<<<<< grep >>>>>>. If this function is called in scalar context, a L<<<<<< XML::LibXML::NodeList >>>>>> object will be returned.
=item nonBlankChildNodes
@childnodes = $node->nonBlankChildNodes();
This is like I<<<<<< childNodes >>>>>>, but returns only non-blank nodes (where a node is blank if it is a Text or
CDATA node consisting of whitespace only). This method is not defined by DOM.
=item toString
$xmlstring = $node->toString($format,$docencoding);
This method is similar to the method C<<<<<< toString >>>>>> of a L<<<<<< XML::LibXML::Document >>>>>> but for a single node. It returns a string consisting of XML serialization of
the given node and all its descendants. Unlike C<<<<<< XML::LibXML::Document::toString >>>>>>, in this case the resulting string is by default a character string (UTF-8
encoded with UTF8 flag on). An optional flag $format controls indentation, as
in C<<<<<< XML::LibXML::Document::toString >>>>>>. If the second optional $docencoding flag is true, the result will be a byte
string in the document encoding (see C<<<<<< XML::LibXML::Document::actualEncoding >>>>>>).
=item toStringC14N
$c14nstring = $node->toStringC14N();
$c14nstring = $node->toStringC14N($with_comments, $xpath_expression , $xpath_context);
The function is similar to toString(). Instead of simply serializing the
document tree, it transforms it as it is specified in the XML-C14N
Specification (see L<<<<<< http://www.w3.org/TR/xml-c14n >>>>>>). Such transformation is known as canonization.
If $with_comments is 0 or not defined, the result-document will not contain any
comments that exist in the original document. To include comments into the
canonized document, $with_comments has to be set to 1.
The parameter $xpath_expression defines the nodeset of nodes that should be
visible in the resulting document. This can be used to filter out some nodes.
One has to note, that only the nodes that are part of the nodeset, will be
included into the result-document. Their child-nodes will not exist in the
resulting document, unless they are part of the nodeset defined by the xpath
expression.
If $xpath_expression is omitted or empty, toStringC14N() will include all nodes
in the given sub-tree, using the following XPath expressions: with comments
(. | .//node() | .//@* | .//namespace::*)
and without comments
(. | .//node() | .//@* | .//namespace::*)[not(self::comment())]
An optional parameter $xpath_context can be used to pass an L<<<<<< XML::LibXML::XPathContext >>>>>> object defining the context for evaluation of $xpath_expression. This is useful
for mapping namespace prefixes used in the XPath expression to namespace URIs.
Note, however, that $node will be used as the context node for the evaluation,
not the context node of $xpath_context!
=item toStringC14N_v1_1
$c14nstring = $node->toStringC14N_v1_1();
$c14nstring = $node->toStringC14N_v1_1($with_comments, $xpath_expression , $xpath_context);
This function behaves like toStringC14N() except that it uses the
"XML_C14N_1_1" constant for canonicalising using the "C14N 1.1 spec".
=item toStringEC14N
$ec14nstring = $node->toStringEC14N();
$ec14nstring = $node->toStringEC14N($with_comments, $xpath_expression, $inclusive_prefix_list);
$ec14nstring = $node->toStringEC14N($with_comments, $xpath_expression, $xpath_context, $inclusive_prefix_list);
The function is similar to toStringC14N() but follows the XML-EXC-C14N
Specification (see L<<<<<< http://www.w3.org/TR/xml-exc-c14n >>>>>>) for exclusive canonization of XML.
The arguments $with_comments, $xpath_expression, $xpath_context are as in
toStringC14N(). An ARRAY reference can be passed as the last argument
$inclusive_prefix_list, listing namespace prefixes that are to be handled in
the manner described by the Canonical XML Recommendation (i.e. preserved in the
output even if the namespace is not used). C.f. the spec for details.
=item serialize
$str = $doc->serialize($format);
An alias for toString(). This function was name added to be more consistent
with libxml2.
=item serialize_c14n
An alias for toStringC14N().
=item serialize_exc_c14n
An alias for toStringEC14N().
=item localname
$localname = $node->localname;
Returns the local name of a tag. This is the part behind the colon.
=item prefix
$nameprefix = $node->prefix;
Returns the prefix of a tag. This is the part before the colon.
=item namespaceURI
$uri = $node->namespaceURI();
returns the URI of the current namespace.
=item hasAttributes
$boolean = $node->hasAttributes();
returns 1 (TRUE) if the current node has any attributes set, otherwise 0
(FALSE) is returned.
=item attributes
@attributelist = $node->attributes();
This function returns all attributes and namespace declarations assigned to the
given node.
Because XML::LibXML does not implement namespace declarations and attributes
the same way, it is required to test what kind of node is handled while
accessing the functions result.
If this function is called in array context the attribute nodes are returned as
an array. In scalar context, the function will return a L<<<<<< XML::LibXML::NamedNodeMap >>>>>> object.
=item lookupNamespaceURI
$URI = $node->lookupNamespaceURI( $prefix );
Find a namespace URI by its prefix starting at the current node.
=item lookupNamespacePrefix
$prefix = $node->lookupNamespacePrefix( $URI );
Find a namespace prefix by its URI starting at the current node.
I<<<<<< NOTE >>>>>> Only the namespace URIs are meant to be unique. The prefix is only document
related. Also the document might have more than a single prefix defined for a
namespace.
=item normalize
$node->normalize;
This function normalizes adjacent text nodes. This function is not as strict as
libxml2's xmlTextMerge() function, since it will not free a node that is still
referenced by the perl layer.
=item getNamespaces
@nslist = $node->getNamespaces;
If a node has any namespaces defined, this function will return these
namespaces. Note, that this will not return all namespaces that are in scope,
but only the ones declared explicitly for that node.
Although getNamespaces is available for all nodes, it only makes sense if used
with element nodes.
=item removeChildNodes
$node->removeChildNodes();
This function is not specified for any DOM level: It removes all childnodes
from a node in a single step. Other than the libxml2 function itself
(xmlFreeNodeList), this function will not immediately remove the nodes from the
memory. This saves one from getting memory violations, if there are nodes still
referred to from the Perl level.
=item baseURI ()
$strURI = $node->baseURI();
Searches for the base URL of the node. The method should work on both XML and
HTML documents even if base mechanisms for these are completely different. It
returns the base as defined in RFC 2396 sections "5.1.1. Base URI within
Document Content" and "5.1.2. Base URI from the Encapsulating Entity". However
it does not return the document base (5.1.3), use method C<<<<<< URI >>>>>> of C<<<<<< XML::LibXML::Document >>>>>> for this.
=item setBaseURI ($strURI)
$node->setBaseURI($strURI);
This method only does something useful for an element node in an XML document.
It sets the xml:base attribute on the node to $strURI, which effectively sets
the base URI of the node to the same value.
Note: For HTML documents this behaves as if the document was XML which may not
be desired, since it does not effectively set the base URI of the node. See RFC
2396 appendix D for an example of how base URI can be specified in HTML.
=item nodePath
$node->nodePath();
This function is not specified for any DOM level: It returns a canonical
structure based XPath for a given node.
=item line_number
$lineno = $node->line_number();
This function returns the line number where the tag was found during parsing.
If a node is added to the document the line number is 0. Problems may occur, if
a node from one document is passed to another one.
IMPORTANT: Due to limitations in the libxml2 library line numbers greater than
65535 will be returned as 65535. Please see L<<<<<< http://bugzilla.gnome.org/show_bug.cgi?id=325533 >>>>>> for more details.
Note: line_number() is special to XML::LibXML and not part of the DOM
specification.
If the line_numbers flag of the parser was not activated before parsing,
line_number() will always return 0.
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0207
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

345
lib/XML/LibXML/NodeList.pm Normal file
View File

@ -0,0 +1,345 @@
# $Id$
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
package XML::LibXML::NodeList;
use strict;
use warnings;
use XML::LibXML::Boolean;
use XML::LibXML::Literal;
use XML::LibXML::Number;
use vars qw($VERSION);
$VERSION = "2.0207"; # VERSION TEMPLATE: DO NOT CHANGE
use overload
'""' => \&to_literal,
'bool' => \&to_boolean,
'cmp' => sub {
my($aa, $bb, $order) = @_;
return ($order ? ("$bb" cmp "$aa") : ("$aa" cmp "$bb"));
},
;
sub new {
my $class = shift;
bless [@_], $class;
}
sub new_from_ref {
my ($class,$array_ref,$reuse) = @_;
return bless $reuse ? $array_ref : [@$array_ref], $class;
}
sub pop {
my $self = CORE::shift;
CORE::pop @$self;
}
sub push {
my $self = CORE::shift;
CORE::push @$self, @_;
}
sub append {
my $self = CORE::shift;
my ($nodelist) = @_;
CORE::push @$self, $nodelist->get_nodelist;
}
sub shift {
my $self = CORE::shift;
CORE::shift @$self;
}
sub unshift {
my $self = CORE::shift;
CORE::unshift @$self, @_;
}
sub prepend {
my $self = CORE::shift;
my ($nodelist) = @_;
CORE::unshift @$self, $nodelist->get_nodelist;
}
sub size {
my $self = CORE::shift;
scalar @$self;
}
sub get_node {
# uses array index starting at 1, not 0
# this is mainly because of XPath.
my $self = CORE::shift;
my ($pos) = @_;
$self->[$pos - 1];
}
sub item
{
my ($self, $pos) = @_;
return $self->[$pos];
}
sub get_nodelist {
my $self = CORE::shift;
@$self;
}
sub to_boolean {
my $self = CORE::shift;
return (@$self > 0) ? XML::LibXML::Boolean->True : XML::LibXML::Boolean->False;
}
# string-value of a nodelist is the string-value of the first node
sub string_value {
my $self = CORE::shift;
return '' unless @$self;
return $self->[0]->string_value;
}
sub to_literal {
my $self = CORE::shift;
return XML::LibXML::Literal->new(
join('', CORE::grep {defined $_} CORE::map { $_->string_value } @$self)
);
}
sub to_literal_delimited {
my $self = CORE::shift;
return XML::LibXML::Literal->new(
join(CORE::shift, CORE::grep {defined $_} CORE::map { $_->string_value } @$self)
);
}
sub to_literal_list {
my $self = CORE::shift;
my @nodes = CORE::map{ XML::LibXML::Literal->new($_->string_value())->value() } @{$self};
if (wantarray) {
return( @nodes );
}
return( \@nodes );
}
sub to_number {
my $self = CORE::shift;
return XML::LibXML::Number->new(
$self->to_literal
);
}
sub iterator {
warn "this function is obsolete!\nIt was disabled in version 1.54\n";
return undef;
}
sub map {
my $self = CORE::shift;
my $sub = __is_code(CORE::shift);
local $_;
my @results = CORE::map { @{[ $sub->($_) ]} } @$self;
return unless defined wantarray;
return wantarray ? @results : (ref $self)->new(@results);
}
sub grep {
my $self = CORE::shift;
my $sub = __is_code(CORE::shift);
local $_;
my @results = CORE::grep { $sub->($_) } @$self;
return unless defined wantarray;
return wantarray ? @results : (ref $self)->new(@results);
}
sub sort {
my $self = CORE::shift;
my $sub = __is_code(CORE::shift);
my @results = CORE::sort { $sub->($a,$b) } @$self;
return wantarray ? @results : (ref $self)->new(@results);
}
sub foreach {
my $self = CORE::shift;
my $sub = CORE::shift;
foreach my $item (@$self)
{
local $_ = $item;
$sub->($item);
}
return wantarray ? @$self : $self;
}
sub reverse {
my $self = CORE::shift;
my @results = CORE::reverse @$self;
return wantarray ? @results : (ref $self)->new(@results);
}
sub reduce {
my $self = CORE::shift;
my $sub = __is_code(CORE::shift);
my @list = @$self;
CORE::unshift @list, $_[0] if @_;
my $a = CORE::shift(@list);
foreach my $b (@list)
{
$a = $sub->($a, $b);
}
return $a;
}
sub __is_code {
my ($code) = @_;
if (ref $code eq 'CODE') {
return $code;
}
# There are better ways of doing this, but here I've tried to
# avoid adding any additional external dependencies.
#
if (UNIVERSAL::can($code, 'can') # is blessed (sort of)
and overload::Overloaded($code) # is overloaded
and overload::Method($code, '&{}')) { # overloads '&{}'
return $code;
}
# The other possibility is that $code is a coderef, but is
# blessed into a class that doesn't overload '&{}'. In which
# case... well, I'm stumped!
die "Not a subroutine reference\n";
}
1;
__END__
=head1 NAME
XML::LibXML::NodeList - a list of XML document nodes
=head1 DESCRIPTION
An XML::LibXML::NodeList object contains an ordered list of nodes, as
detailed by the W3C DOM documentation of Node Lists.
=head1 SYNOPSIS
my $results = $dom->findnodes('//somepath');
foreach my $context ($results->get_nodelist) {
my $newresults = $context->findnodes('./other/element');
...
}
=head1 API
=head2 new(@nodes)
You will almost never have to create a new NodeList object, as it is all
done for you by XPath.
=head2 get_nodelist()
Returns a list of nodes, the contents of the node list, as a perl list.
=head2 string_value()
Returns the string-value of the first node in the list.
See the XPath specification for what "string-value" means.
=head2 to_literal()
Returns the concatenation of all the string-values of all
the nodes in the list.
=head2 to_literal_delimited($separator)
Returns the concatenation of all the string-values of all
the nodes in the list, delimited by the specified separator.
=head2 to_literal_list()
Returns all the string-values of all the nodes in the list as
a perl list.
=head2 get_node($pos)
Returns the node at $pos. The node position in XPath is based at 1, not 0.
=head2 size()
Returns the number of nodes in the NodeList.
=head2 pop()
Equivalent to perl's pop function.
=head2 push(@nodes)
Equivalent to perl's push function.
=head2 append($nodelist)
Given a nodelist, appends the list of nodes in $nodelist to the end of the
current list.
=head2 shift()
Equivalent to perl's shift function.
=head2 unshift(@nodes)
Equivalent to perl's unshift function.
=head2 prepend($nodelist)
Given a nodelist, prepends the list of nodes in $nodelist to the front of
the current list.
=head2 map($coderef)
Equivalent to perl's map function.
=head2 grep($coderef)
Equivalent to perl's grep function.
=head2 sort($coderef)
Equivalent to perl's sort function.
Caveat: Perl's magic C<$a> and C<$b> variables are not available in
C<$coderef>. Instead the two terms are passed to the coderef as arguments.
=head2 reverse()
Equivalent to perl's reverse function.
=head2 foreach($coderef)
Inspired by perl's foreach loop. Executes the coderef on each item in
the list. Similar to C<map>, but instead of returning the list of values
returned by $coderef, returns the original NodeList.
=head2 reduce($coderef, $init)
Equivalent to List::Util's reduce function. C<$init> is optional and
provides an initial value for the reduction.
Caveat: Perl's magic C<$a> and C<$b> variables are not available in
C<$coderef>. Instead the two terms are passed to the coderef as arguments.
=cut

98
lib/XML/LibXML/Number.pm Normal file
View File

@ -0,0 +1,98 @@
# $Id$
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
package XML::LibXML::Number;
use XML::LibXML::Boolean;
use XML::LibXML::Literal;
use strict;
use warnings;
use vars qw ($VERSION);
$VERSION = "2.0207"; # VERSION TEMPLATE: DO NOT CHANGE
use overload
'""' => \&value,
'0+' => \&value,
'<=>' => \&cmp;
sub new {
my $class = shift;
my $number = shift;
if ($number !~ /^\s*(-\s*)?(\d+(\.\d*)?|\.\d+)\s*$/) {
$number = undef;
}
else {
$number =~ s/\s+//g;
}
bless \$number, $class;
}
sub as_string {
my $self = shift;
defined $$self ? $$self : 'NaN';
}
sub as_xml {
my $self = shift;
return "<Number>" . (defined($$self) ? $$self : 'NaN') . "</Number>\n";
}
sub value {
my $self = shift;
$$self;
}
sub cmp {
my $self = shift;
my ($other, $swap) = @_;
if ($swap) {
return $other <=> $$self;
}
return $$self <=> $other;
}
sub evaluate {
my $self = shift;
$self;
}
sub to_boolean {
my $self = shift;
return $$self ? XML::LibXML::Boolean->True : XML::LibXML::Boolean->False;
}
sub to_literal { XML::LibXML::Literal->new($_[0]->as_string); }
sub to_number { $_[0]; }
sub string_value { return $_[0]->value }
1;
__END__
=head1 NAME
XML::LibXML::Number - Simple numeric values.
=head1 DESCRIPTION
This class holds simple numeric values. It doesn't support -0, +/- Infinity,
or NaN, as the XPath spec says it should, but I'm not hurting anyone I don't think.
=head1 API
=head2 new($num)
Creates a new XML::LibXML::Number object, with the value in $num. Does some
rudimentary numeric checking on $num to ensure it actually is a number.
=head2 value()
Also as overloaded stringification. Returns the numeric value held.
=cut

94
lib/XML/LibXML/PI.pod Normal file
View File

@ -0,0 +1,94 @@
=head1 NAME
XML::LibXML::PI - XML::LibXML Processing Instructions
=head1 SYNOPSIS
use XML::LibXML;
# Only methods specific to Processing Instruction nodes are listed here,
# see the XML::LibXML::Node manpage for other methods
$pinode->setData( $data_string );
$pinode->setData( name=>string_value [...] );
=head1 DESCRIPTION
Processing instructions are implemented with XML::LibXML with read and write
access. The PI data is the PI without the PI target (as specified in XML 1.0
[17]) as a string. This string can be accessed with getData as implemented in L<<<<<< XML::LibXML::Node >>>>>>.
The write access is aware about the fact, that many processing instructions
have attribute like data. Therefore setData() provides besides the DOM spec
conform Interface to pass a set of named parameter. So the code segment
my $pi = $dom->createProcessingInstruction("abc");
$pi->setData(foo=>'bar', foobar=>'foobar');
$dom->appendChild( $pi );
will result the following PI in the DOM:
<?abc foo="bar" foobar="foobar"?>
Which is how it is specified in the DOM specification. This three step
interface creates temporary a node in perl space. This can be avoided while
using the insertProcessingInstruction() method. Instead of the three calls
described above, the call
$dom->insertProcessingInstruction("abc",'foo="bar" foobar="foobar"');
will have the same result as above.
L<<<<<< XML::LibXML::PI >>>>>>'s implementation of setData() documented below differs a bit from the standard
version as available in L<<<<<< XML::LibXML::Node >>>>>>:
=over 4
=item setData
$pinode->setData( $data_string );
$pinode->setData( name=>string_value [...] );
This method allows one to change the content data of a PI. Additionally to the
interface specified for DOM Level2, the method provides a named parameter
interface to set the data. This parameter list is converted into a string
before it is appended to the PI.
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0207
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

1008
lib/XML/LibXML/Parser.pod Normal file

File diff suppressed because it is too large Load Diff

114
lib/XML/LibXML/Pattern.pod Normal file
View File

@ -0,0 +1,114 @@
=head1 NAME
XML::LibXML::Pattern - XML::LibXML::Pattern - interface to libxml2 XPath patterns
=head1 SYNOPSIS
use XML::LibXML;
my $pattern = XML::LibXML::Pattern->new('/x:html/x:body//x:div', { 'x' => 'http://www.w3.org/1999/xhtml' });
# test a match on an XML::LibXML::Node $node
if ($pattern->matchesNode($node)) { ... }
# or on an XML::LibXML::Reader
if ($reader->matchesPattern($pattern)) { ... }
# or skip reading all nodes that do not match
print $reader->nodePath while $reader->nextPatternMatch($pattern);
$pattern = XML::LibXML::Pattern->new( pattern, { prefix => namespace_URI, ... } );
$bool = $pattern->matchesNode($node);
=head1 DESCRIPTION
This is a perl interface to libxml2's pattern matching support I<<<<<< http://xmlsoft.org/html/libxml-pattern.html >>>>>>. This feature requires recent versions of libxml2.
Patterns are a small subset of XPath language, which is limited to
(disjunctions of) location paths involving the child and descendant axes in
abbreviated form as described by the extended BNF given below:
Selector ::= Path ( '|' Path )*
Path ::= ('.//' | '//' | '/' )? Step ( '/' Step )*
Step ::= '.' | NameTest
NameTest ::= QName | '*' | NCName ':' '*'
For readability, whitespace may be used in selector XPath expressions even
though not explicitly allowed by the grammar: whitespace may be freely added
within patterns before or after any token, where
token ::= '.' | '/' | '//' | '|' | NameTest
Note that no predicates or attribute tests are allowed.
Patterns are particularly useful for stream parsing provided via the C<<<<<< XML::LibXML::Reader >>>>>> interface.
=over 4
=item new()
$pattern = XML::LibXML::Pattern->new( pattern, { prefix => namespace_URI, ... } );
The constructor of a pattern takes a pattern expression (as described by the
BNF grammar above) and an optional HASH reference mapping prefixes to namespace
URIs. The method returns a compiled pattern object.
Note that if the document has a default namespace, it must still be given an
prefix in order to be matched (as demanded by the XPath 1.0 specification). For
example, to match an element C<<<<<< E<lt>a xmlns="http://foo.bar"E<lt>/aE<gt> >>>>>>, one should use a pattern like this:
$pattern = XML::LibXML::Pattern->new( 'foo:a', { foo => 'http://foo.bar' });
=item matchesNode($node)
$bool = $pattern->matchesNode($node);
Given an XML::LibXML::Node object, returns a true value if the node is matched
by the compiled pattern expression.
=back
=head1 SEE ALSO
L<<<<<< XML::LibXML::Reader >>>>>> for other methods involving compiled patterns.
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0207
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

216
lib/XML/LibXML/Reader.pm Normal file
View File

@ -0,0 +1,216 @@
# $Id: Reader.pm,v 1.1.2.1 2004/04/20 20:09:48 pajas Exp $
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
package XML::LibXML::Reader;
use XML::LibXML;
use Carp;
use strict;
use warnings;
use vars qw ($VERSION);
$VERSION = "2.0207"; # VERSION TEMPLATE: DO NOT CHANGE
use 5.008_000;
BEGIN {
UNIVERSAL::can('XML::LibXML::Reader','_newForFile') or
croak("Cannot use XML::LibXML::Reader module - ".
"your libxml2 is compiled without reader support!");
}
use base qw(Exporter);
use constant {
XML_READER_TYPE_NONE => 0,
XML_READER_TYPE_ELEMENT => 1,
XML_READER_TYPE_ATTRIBUTE => 2,
XML_READER_TYPE_TEXT => 3,
XML_READER_TYPE_CDATA => 4,
XML_READER_TYPE_ENTITY_REFERENCE => 5,
XML_READER_TYPE_ENTITY => 6,
XML_READER_TYPE_PROCESSING_INSTRUCTION => 7,
XML_READER_TYPE_COMMENT => 8,
XML_READER_TYPE_DOCUMENT => 9,
XML_READER_TYPE_DOCUMENT_TYPE => 10,
XML_READER_TYPE_DOCUMENT_FRAGMENT => 11,
XML_READER_TYPE_NOTATION => 12,
XML_READER_TYPE_WHITESPACE => 13,
XML_READER_TYPE_SIGNIFICANT_WHITESPACE => 14,
XML_READER_TYPE_END_ELEMENT => 15,
XML_READER_TYPE_END_ENTITY => 16,
XML_READER_TYPE_XML_DECLARATION => 17,
XML_READER_NONE => -1,
XML_READER_START => 0,
XML_READER_ELEMENT => 1,
XML_READER_END => 2,
XML_READER_EMPTY => 3,
XML_READER_BACKTRACK => 4,
XML_READER_DONE => 5,
XML_READER_ERROR => 6
};
use vars qw( @EXPORT @EXPORT_OK %EXPORT_TAGS );
sub CLONE_SKIP { 1 }
BEGIN {
%EXPORT_TAGS = (
types =>
[qw(
XML_READER_TYPE_NONE
XML_READER_TYPE_ELEMENT
XML_READER_TYPE_ATTRIBUTE
XML_READER_TYPE_TEXT
XML_READER_TYPE_CDATA
XML_READER_TYPE_ENTITY_REFERENCE
XML_READER_TYPE_ENTITY
XML_READER_TYPE_PROCESSING_INSTRUCTION
XML_READER_TYPE_COMMENT
XML_READER_TYPE_DOCUMENT
XML_READER_TYPE_DOCUMENT_TYPE
XML_READER_TYPE_DOCUMENT_FRAGMENT
XML_READER_TYPE_NOTATION
XML_READER_TYPE_WHITESPACE
XML_READER_TYPE_SIGNIFICANT_WHITESPACE
XML_READER_TYPE_END_ELEMENT
XML_READER_TYPE_END_ENTITY
XML_READER_TYPE_XML_DECLARATION
)],
states =>
[qw(
XML_READER_NONE
XML_READER_START
XML_READER_ELEMENT
XML_READER_END
XML_READER_EMPTY
XML_READER_BACKTRACK
XML_READER_DONE
XML_READER_ERROR
)]
);
@EXPORT = (@{$EXPORT_TAGS{types}},@{$EXPORT_TAGS{states}});
@EXPORT_OK = @EXPORT;
$EXPORT_TAGS{all}=\@EXPORT_OK;
}
our %_preserve_flag;
{
my %props = (
load_ext_dtd => 1, # load the external subset
complete_attributes => 2, # default DTD attributes
validation => 3, # validate with the DTD
expand_entities => 4, # substitute entities
);
sub getParserProp {
my ($self, $name) = @_;
my $prop = $props{$name};
return undef unless defined $prop;
return $self->_getParserProp($prop);
}
sub setParserProp {
my $self = shift;
my %args = map { ref($_) eq 'HASH' ? (%$_) : $_ } @_;
my ($key, $value);
while (($key,$value) = each %args) {
my $prop = $props{ $key };
$self->_setParserProp($prop,$value);
}
return;
}
my (%string_pool,%rng_pool,%xsd_pool); # used to preserve data passed to the reader
sub new {
my ($class) = shift;
my %args = map { ref($_) eq 'HASH' ? (%$_) : $_ } @_;
my $encoding = $args{encoding};
my $URI = $args{URI};
$URI="$URI" if defined $URI; # stringify in case it is an URI object
my $options = XML::LibXML->_parser_options(\%args);
my $self = undef;
if ( defined $args{location} ) {
$self = $class->_newForFile( $args{location}, $encoding, $options );
}
elsif ( defined $args{string} ) {
$self = $class->_newForString( $args{string}, $URI, $encoding, $options );
if (defined($self)) {
$string_pool{$self} = \$args{string};
}
}
elsif ( defined $args{IO} ) {
$self = $class->_newForIO( $args{IO}, $URI, $encoding, $options );
}
elsif ( defined $args{DOM} ) {
croak("DOM must be a XML::LibXML::Document node")
unless UNIVERSAL::isa($args{DOM}, 'XML::LibXML::Document');
$self = $class->_newForDOM( $args{DOM} );
}
elsif ( defined $args{FD} ) {
my $fd = fileno($args{FD});
$self = $class->_newForFd( $fd, $URI, $encoding, $options );
}
else {
croak("XML::LibXML::Reader->new: specify location, string, IO, DOM, or FD");
}
if ($args{RelaxNG}) {
if (ref($args{RelaxNG})) {
$rng_pool{$self} = \$args{RelaxNG};
$self->_setRelaxNG($args{RelaxNG});
} else {
$self->_setRelaxNGFile($args{RelaxNG});
}
}
if ($args{Schema}) {
if (ref($args{Schema})) {
$xsd_pool{$self} = \$args{Schema};
$self->_setXSD($args{Schema});
} else {
$self->_setXSDFile($args{Schema});
}
}
return $self;
}
sub DESTROY {
my $self = shift;
delete $string_pool{$self};
delete $rng_pool{$self};
delete $xsd_pool{$self};
$self->_DESTROY;
}
}
sub close {
my ($reader) = @_;
# _close return -1 on failure, 0 on success
# perl close returns 0 on failure, 1 on success
return $reader->_close == 0 ? 1 : 0;
}
sub preservePattern {
my $reader=shift;
my ($pattern,$ns_map)=@_;
if (ref($ns_map) eq 'HASH') {
# translate prefix=>URL hash to a (URL,prefix) list
$reader->_preservePattern($pattern,[reverse %$ns_map]);
} else {
$reader->_preservePattern(@_);
}
}
sub nodePath {
my $reader=shift;
my $path = $reader->_nodePath;
$path=~s/\[\d+\]//g; # make /foo[1]/bar[1] just /foo/bar, since
# sibling count in the buffered fragment is
# basically random and generally misleading
return $path;
}
1;
__END__

677
lib/XML/LibXML/Reader.pod Normal file
View File

@ -0,0 +1,677 @@
=head1 NAME
XML::LibXML::Reader - XML::LibXML::Reader - interface to libxml2 pull parser
=head1 SYNOPSIS
use XML::LibXML::Reader;
my $reader = XML::LibXML::Reader->new(location => "file.xml")
or die "cannot read file.xml\n";
while ($reader->read) {
processNode($reader);
}
sub processNode {
my $reader = shift;
printf "%d %d %s %d\n", ($reader->depth,
$reader->nodeType,
$reader->name,
$reader->isEmptyElement);
}
or
my $reader = XML::LibXML::Reader->new(location => "file.xml")
or die "cannot read file.xml\n";
$reader->preservePattern('//table/tr');
$reader->finish;
print $reader->document->toString(1);
=head1 DESCRIPTION
This is a perl interface to libxml2's pull-parser implementation xmlTextReader I<<<<<< http://xmlsoft.org/html/libxml-xmlreader.html >>>>>>. This feature requires at least libxml2-2.6.21. Pull-parsers (such as StAX in
Java, or XmlReader in C#) use an iterator approach to parse XML documents. They
are easier to program than event-based parser (SAX) and much more lightweight
than tree-based parser (DOM), which load the complete tree into memory.
The Reader acts as a cursor going forward on the document stream and stopping
at each node on the way. At every point, the DOM-like methods of the Reader
object allow one to examine the current node (name, namespace, attributes,
etc.)
The user's code keeps control of the progress and simply calls the C<<<<<< read() >>>>>> function repeatedly to progress to the next node in the document order. Other
functions provide means for skipping complete sub-trees, or nodes until a
specific element, etc.
At every time, only a very limited portion of the document is kept in the
memory, which makes the API more memory-efficient than using DOM. However, it
is also possible to mix Reader with DOM. At every point the user may copy the
current node (optionally expanded into a complete sub-tree) from the processed
document to another DOM tree, or to instruct the Reader to collect sub-document
in form of a DOM tree consisting of selected nodes.
Reader API also supports namespaces, xml:base, entity handling, and DTD
validation. Schema and RelaxNG validation support will probably be added in
some later revision of the Perl interface.
The naming of methods compared to libxml2 and C# XmlTextReader has been changed
slightly to match the conventions of XML::LibXML. Some functions have been
changed or added with respect to the C interface.
=head1 CONSTRUCTOR
Depending on the XML source, the Reader object can be created with either of:
my $reader = XML::LibXML::Reader->new( location => "file.xml", ... );
my $reader = XML::LibXML::Reader->new( string => $xml_string, ... );
my $reader = XML::LibXML::Reader->new( IO => $file_handle, ... );
my $reader = XML::LibXML::Reader->new( FD => fileno(STDIN), ... );
my $reader = XML::LibXML::Reader->new( DOM => $dom, ... );
where ... are (optional) reader options described below in L<<<<<< Reader options >>>>>> or various parser options described in L<<<<<< XML::LibXML::Parser >>>>>>. The constructor recognizes the following XML sources:
=head2 Source specification
=over 4
=item location
Read XML from a local file or (non-HTTPS) URL.
=item string
Read XML from a string.
=item IO
Read XML a Perl IO filehandle.
=item FD
Read XML from a file descriptor (bypasses Perl I/O layer, only applicable to
filehandles for regular files or pipes). Possibly faster than IO.
=item DOM
Use reader API to walk through a pre-parsed L<<<<<< XML::LibXML::Document >>>>>>.
=back
=head2 Reader options
=over 4
=item encoding => $encoding
override document encoding.
=item RelaxNG => $rng_schema
can be used to pass either a L<<<<<< XML::LibXML::RelaxNG >>>>>> object or a filename or (non-HTTPS) URL of a RelaxNG schema to the constructor.
The schema is then used to validate the document as it is processed.
=item Schema => $xsd_schema
can be used to pass either a L<<<<<< XML::LibXML::Schema >>>>>> object or a filename or (non-HTTPS) URL of a W3C XSD schema to the constructor.
The schema is then used to validate the document as it is processed.
=item ...
the reader further supports various parser options described in L<<<<<< XML::LibXML::Parser >>>>>> (specifically those labeled by /reader/).
=back
=head1 METHODS CONTROLLING PARSING PROGRESS
=over 4
=item read ()
Moves the position to the next node in the stream, exposing its properties.
Returns 1 if the node was read successfully, 0 if there is no more nodes to
read, or -1 in case of error
=item readAttributeValue ()
Parses an attribute value into one or more Text and EntityReference nodes.
Returns 1 in case of success, 0 if the reader was not positioned on an
attribute node or all the attribute values have been read, or -1 in case of
error.
=item readState ()
Gets the read state of the reader. Returns the state value, or -1 in case of
error. The module exports constants for the Reader states, see STATES below.
=item depth ()
The depth of the node in the tree, starts at 0 for the root node.
=item next ()
Skip to the node following the current one in the document order while avoiding
the sub-tree if any. Returns 1 if the node was read successfully, 0 if there is
no more nodes to read, or -1 in case of error.
=item nextElement (localname?,nsURI?)
Skip nodes following the current one in the document order until a specific
element is reached. The element's name must be equal to a given localname if
defined, and its namespace must equal to a given nsURI if defined. Either of
the arguments can be undefined (or omitted, in case of the latter or both).
Returns 1 if the element was found, 0 if there is no more nodes to read, or -1
in case of error.
=item nextPatternMatch (compiled_pattern)
Skip nodes following the current one in the document order until an element
matching a given compiled pattern is reached. See L<<<<<< XML::LibXML::Pattern >>>>>> for information on compiled patterns. See also the C<<<<<< matchesPattern >>>>>> method.
Returns 1 if the element was found, 0 if there is no more nodes to read, or -1
in case of error.
=item skipSiblings ()
Skip all nodes on the same or lower level until the first node on a higher
level is reached. In particular, if the current node occurs in an element, the
reader stops at the end tag of the parent element, otherwise it stops at a node
immediately following the parent node.
Returns 1 if successful, 0 if end of the document is reached, or -1 in case of
error.
=item nextSibling ()
It skips to the node following the current one in the document order while
avoiding the sub-tree if any.
Returns 1 if the node was read successfully, 0 if there is no more nodes to
read, or -1 in case of error
=item nextSiblingElement (name?,nsURI?)
Like nextElement but only processes sibling elements of the current node
(moving forward using C<<<<<< nextSibling () >>>>>> rather than C<<<<<< read () >>>>>>, internally).
Returns 1 if the element was found, 0 if there is no more sibling nodes, or -1
in case of error.
=item finish ()
Skip all remaining nodes in the document, reaching end of the document.
Returns 1 if successful, 0 in case of error.
=item close ()
This method releases any resources allocated by the current instance and closes
any underlying input. It returns 0 on failure and 1 on success. This method is
automatically called by the destructor when the reader is forgotten, therefore
you do not have to call it directly.
=back
=head1 METHODS EXTRACTING INFORMATION
=over 4
=item name ()
Returns the qualified name of the current node, equal to (Prefix:)LocalName.
=item nodeType ()
Returns the type of the current node. See NODE TYPES below.
=item localName ()
Returns the local name of the node.
=item prefix ()
Returns the prefix of the namespace associated with the node.
=item namespaceURI ()
Returns the URI defining the namespace associated with the node.
=item isEmptyElement ()
Check if the current node is empty, this is a bit bizarre in the sense that
<a/> will be considered empty while <a></a> will not.
=item hasValue ()
Returns true if the node can have a text value.
=item value ()
Provides the text value of the node if present or undef if not available.
=item readInnerXml ()
Reads the contents of the current node, including child nodes and markup.
Returns a string containing the XML of the node's content, or undef if the
current node is neither an element nor attribute, or has no child nodes.
=item readOuterXml ()
Reads the contents of the current node, including child nodes and markup.
Returns a string containing the XML of the node including its content, or undef
if the current node is neither an element nor attribute.
=item nodePath()
Returns a canonical location path to the current element from the root node to
the current node. Namespaced elements are matched by '*', because there is no
way to declare prefixes within XPath patterns. Unlike C<<<<<< XML::LibXML::Node::nodePath() >>>>>>, this function does not provide sibling counts (i.e. instead of e.g. '/a/b[1]'
and '/a/b[2]' you get '/a/b' for both matches).
=item matchesPattern(compiled_pattern)
Returns a true value if the current node matches a compiled pattern. See L<<<<<< XML::LibXML::Pattern >>>>>> for information on compiled patterns. See also the C<<<<<< nextPatternMatch >>>>>> method.
=back
=head1 METHODS EXTRACTING DOM NODES
=over 4
=item document ()
Provides access to the document tree built by the reader. This function can be
used to collect the preserved nodes (see C<<<<<< preserveNode() >>>>>> and preservePattern).
CAUTION: Never use this function to modify the tree unless reading of the whole
document is completed!
=item copyCurrentNode (deep)
This function is similar a DOM function C<<<<<< copyNode() >>>>>>. It returns a copy of the currently processed node as a corresponding DOM
object. Use deep = 1 to obtain the full sub-tree.
=item preserveNode ()
This tells the XML Reader to preserve the current node in the document tree. A
document tree consisting of the preserved nodes and their content can be
obtained using the method C<<<<<< document() >>>>>> once parsing is finished.
Returns the node or NULL in case of error.
=item preservePattern (pattern,\%ns_map)
This tells the XML Reader to preserve all nodes matched by the pattern (which
is a streaming XPath subset). A document tree consisting of the preserved nodes
and their content can be obtained using the method C<<<<<< document() >>>>>> once parsing is finished.
An optional second argument can be used to provide a HASH reference mapping
prefixes used by the XPath to namespace URIs.
The XPath subset available with this function is described at
http://www.w3.org/TR/xmlschema-1/#Selector
and matches the production
Path ::= ('.//')? ( Step '/' )* ( Step | '@' NameTest )
Returns a positive number in case of success and -1 in case of error
=back
=head1 METHODS PROCESSING ATTRIBUTES
=over 4
=item attributeCount ()
Provides the number of attributes of the current node.
=item hasAttributes ()
Whether the node has attributes.
=item getAttribute (name)
Provides the value of the attribute with the specified qualified name.
Returns a string containing the value of the specified attribute, or undef in
case of error.
=item getAttributeNs (localName, namespaceURI)
Provides the value of the specified attribute.
Returns a string containing the value of the specified attribute, or undef in
case of error.
=item getAttributeNo (no)
Provides the value of the attribute with the specified index relative to the
containing element.
Returns a string containing the value of the specified attribute, or undef in
case of error.
=item isDefault ()
Returns true if the current attribute node was generated from the default value
defined in the DTD.
=item moveToAttribute (name)
Moves the position to the attribute with the specified local name and namespace
URI.
Returns 1 in case of success, -1 in case of error, 0 if not found
=item moveToAttributeNo (no)
Moves the position to the attribute with the specified index relative to the
containing element.
Returns 1 in case of success, -1 in case of error, 0 if not found
=item moveToAttributeNs (localName,namespaceURI)
Moves the position to the attribute with the specified local name and namespace
URI.
Returns 1 in case of success, -1 in case of error, 0 if not found
=item moveToFirstAttribute ()
Moves the position to the first attribute associated with the current node.
Returns 1 in case of success, -1 in case of error, 0 if not found
=item moveToNextAttribute ()
Moves the position to the next attribute associated with the current node.
Returns 1 in case of success, -1 in case of error, 0 if not found
=item moveToElement ()
Moves the position to the node that contains the current attribute node.
Returns 1 in case of success, -1 in case of error, 0 if not moved
=item isNamespaceDecl ()
Determine whether the current node is a namespace declaration rather than a
regular attribute.
Returns 1 if the current node is a namespace declaration, 0 if it is a regular
attribute or other type of node, or -1 in case of error.
=back
=head1 OTHER METHODS
=over 4
=item lookupNamespace (prefix)
Resolves a namespace prefix in the scope of the current element.
Returns a string containing the namespace URI to which the prefix maps or undef
in case of error.
=item encoding ()
Returns a string containing the encoding of the document or undef in case of
error.
=item standalone ()
Determine the standalone status of the document being read. Returns 1 if the
document was declared to be standalone, 0 if it was declared to be not
standalone, or -1 if the document did not specify its standalone status or in
case of error.
=item xmlVersion ()
Determine the XML version of the document being read. Returns a string
containing the XML version of the document or undef in case of error.
=item baseURI ()
Returns the base URI of a given node.
=item isValid ()
Retrieve the validity status from the parser.
Returns 1 if valid, 0 if no, and -1 in case of error.
=item xmlLang ()
The xml:lang scope within which the node resides.
=item lineNumber ()
Provide the line number of the current parsing point.
=item columnNumber ()
Provide the column number of the current parsing point.
=item byteConsumed ()
This function provides the current index of the parser relative to the start of
the current entity. This function is computed in bytes from the beginning
starting at zero and finishing at the size in bytes of the file if parsing a
file. The function is of constant cost if the input is UTF-8 but can be costly
if run on non-UTF-8 input.
=item setParserProp (prop => value, ...)
Change the parser processing behaviour by changing some of its internal
properties. The following properties are available with this function:
``load_ext_dtd'', ``complete_attributes'', ``validation'', ``expand_entities''.
Since some of the properties can only be changed before any read has been done,
it is best to set the parsing properties at the constructor.
Returns 0 if the call was successful, or -1 in case of error
=item getParserProp (prop)
Get value of an parser internal property. The following property names can be
used: ``load_ext_dtd'', ``complete_attributes'', ``validation'',
``expand_entities''.
Returns the value, usually 0 or 1, or -1 in case of error.
=back
=head1 DESTRUCTION
XML::LibXML takes care of the reader object destruction when the last reference
to the reader object goes out of scope. The document tree is preserved, though,
if either of $reader->document or $reader->preserveNode was used and references
to the document tree exist.
=head1 NODE TYPES
The reader interface provides the following constants for node types (the
constant symbols are exported by default or if tag C<<<<<< :types >>>>>> is used).
XML_READER_TYPE_NONE => 0
XML_READER_TYPE_ELEMENT => 1
XML_READER_TYPE_ATTRIBUTE => 2
XML_READER_TYPE_TEXT => 3
XML_READER_TYPE_CDATA => 4
XML_READER_TYPE_ENTITY_REFERENCE => 5
XML_READER_TYPE_ENTITY => 6
XML_READER_TYPE_PROCESSING_INSTRUCTION => 7
XML_READER_TYPE_COMMENT => 8
XML_READER_TYPE_DOCUMENT => 9
XML_READER_TYPE_DOCUMENT_TYPE => 10
XML_READER_TYPE_DOCUMENT_FRAGMENT => 11
XML_READER_TYPE_NOTATION => 12
XML_READER_TYPE_WHITESPACE => 13
XML_READER_TYPE_SIGNIFICANT_WHITESPACE => 14
XML_READER_TYPE_END_ELEMENT => 15
XML_READER_TYPE_END_ENTITY => 16
XML_READER_TYPE_XML_DECLARATION => 17
=head1 STATES
The following constants represent the values returned by C<<<<<< readState() >>>>>>. They are exported by default, or if tag C<<<<<< :states >>>>>> is used:
XML_READER_NONE => -1
XML_READER_START => 0
XML_READER_ELEMENT => 1
XML_READER_END => 2
XML_READER_EMPTY => 3
XML_READER_BACKTRACK => 4
XML_READER_DONE => 5
XML_READER_ERROR => 6
=head1 SEE ALSO
L<<<<<< XML::LibXML::Pattern >>>>>> for information about compiled patterns.
http://xmlsoft.org/html/libxml-xmlreader.html
http://dotgnu.org/pnetlib-doc/System/Xml/XmlTextReader.html
=head1 ORIGINAL IMPLEMENTATION
Heiko Klein, <H.Klein@gmx.net<gt> and Petr Pajas
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0207
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

78
lib/XML/LibXML/RegExp.pod Normal file
View File

@ -0,0 +1,78 @@
=head1 NAME
XML::LibXML::RegExp - XML::LibXML::RegExp - interface to libxml2 regular expressions
=head1 SYNOPSIS
use XML::LibXML;
my $compiled_re = XML::LibXML::RegExp->new('[0-9]{5}(-[0-9]{4})?');
if ($compiled_re->isDeterministic()) { ... }
if ($compiled_re->matches($string)) { ... }
$compiled_re = XML::LibXML::RegExp->new( $regexp_str );
$bool = $compiled_re->matches($string);
$bool = $compiled_re->isDeterministic();
=head1 DESCRIPTION
This is a perl interface to libxml2's implementation of regular expressions,
which are used e.g. for validation of XML Schema simple types (pattern facet).
=over 4
=item new()
$compiled_re = XML::LibXML::RegExp->new( $regexp_str );
The constructor takes a string containing a regular expression and returns a
compiled regexp object.
=item matches($string)
$bool = $compiled_re->matches($string);
Given a string value, returns a true value if the value is matched by the
compiled regular expression.
=item isDeterministic()
$bool = $compiled_re->isDeterministic();
Returns a true value if the regular expression is deterministic; returns false
otherwise. (See the definition of determinism in the XML spec (L<<<<<< http://www.w3.org/TR/REC-xml/#determinism >>>>>>))
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0207
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

View File

@ -0,0 +1,93 @@
=head1 NAME
XML::LibXML::RelaxNG - RelaxNG Schema Validation
=head1 SYNOPSIS
use XML::LibXML;
$doc = XML::LibXML->new->parse_file($url);
$rngschema = XML::LibXML::RelaxNG->new( location => $filename_or_url, no_network => 1 );
$rngschema = XML::LibXML::RelaxNG->new( string => $xmlschemastring, no_network => 1 );
$rngschema = XML::LibXML::RelaxNG->new( DOM => $doc, no_network => 1 );
eval { $rngschema->validate( $doc ); };
=head1 DESCRIPTION
The XML::LibXML::RelaxNG class is a tiny frontend to libxml2's RelaxNG
implementation. Currently it supports only schema parsing and document
validation.
=head1 METHODS
=over 4
=item new
$rngschema = XML::LibXML::RelaxNG->new( location => $filename_or_url, no_network => 1 );
$rngschema = XML::LibXML::RelaxNG->new( string => $xmlschemastring, no_network => 1 );
$rngschema = XML::LibXML::RelaxNG->new( DOM => $doc, no_network => 1 );
The constructor of XML::LibXML::RelaxNG needs to be called with list of
parameters. At least location, string or DOM parameter is required to specify
source of schema. Optional parameter no_network set to 1 cause that parser
would not access network and optional parameter recover set 1 cause that parser
would not call die() on errors.
It is important, that each schema only have a single source.
The location parameter allows one to parse a schema from the filesystem or a
(non-HTTPS) URL.
The string parameter will parse the schema from the given XML string.
The DOM parameter allows one to parse the schema from a pre-parsed L<<<<<< XML::LibXML::Document >>>>>>.
Note that the constructor will die() if the schema does not meed the
constraints of the RelaxNG specification.
=item validate
eval { $rngschema->validate( $doc ); };
This function allows one to validate a (parsed) document against the given
RelaxNG schema. The argument of this function should be an
XML::LibXML::Document object. If this function succeeds, it will return 0,
otherwise it will die() and report the errors found. Because of this validate()
should be always evaluated.
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0207
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

122
lib/XML/LibXML/SAX.pm Normal file
View File

@ -0,0 +1,122 @@
# $Id$
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
package XML::LibXML::SAX;
use strict;
use warnings;
use vars qw($VERSION @ISA);
$VERSION = "2.0207"; # VERSION TEMPLATE: DO NOT CHANGE
use XML::LibXML;
use XML::SAX::Base;
use parent qw(XML::SAX::Base);
use Carp;
use IO::File;
sub CLONE_SKIP {
return $XML::LibXML::__threads_shared ? 0 : 1;
}
sub set_feature {
my ($self, $feat, $val) = @_;
if ($feat eq 'http://xmlns.perl.org/sax/join-character-data') {
$self->{JOIN_CHARACTERS} = $val;
return 1;
}
shift(@_);
return $self->SUPER::set_feature(@_);
}
sub _parse_characterstream {
my ( $self, $fh ) = @_;
# this my catch the xml decl, so the parser won't get confused about
# a possibly wrong encoding.
croak( "not implemented yet" );
}
# See:
# https://rt.cpan.org/Public/Bug/Display.html?id=132759
sub _calc_new_XML_LibXML_parser_for_compatibility_with_XML_Simple_etc
{
return XML::LibXML->new( expand_entities => 1, );
}
sub _parse_bytestream {
my ( $self, $fh ) = @_;
$self->{ParserOptions}{LibParser} = $self->_calc_new_XML_LibXML_parser_for_compatibility_with_XML_Simple_etc() unless defined $self->{ParserOptions}{LibParser};
$self->{ParserOptions}{ParseFunc} = \&XML::LibXML::parse_fh;
$self->{ParserOptions}{ParseFuncParam} = $fh;
$self->_parse;
return $self->end_document({});
}
sub _parse_string {
my ( $self, $string ) = @_;
$self->{ParserOptions}{LibParser} = $self->_calc_new_XML_LibXML_parser_for_compatibility_with_XML_Simple_etc() unless defined $self->{ParserOptions}{LibParser};
$self->{ParserOptions}{ParseFunc} = \&XML::LibXML::parse_string;
$self->{ParserOptions}{ParseFuncParam} = $string;
$self->_parse;
return $self->end_document({});
}
sub _parse_systemid {
my $self = shift;
$self->{ParserOptions}{LibParser} = $self->_calc_new_XML_LibXML_parser_for_compatibility_with_XML_Simple_etc() unless defined $self->{ParserOptions}{LibParser};
$self->{ParserOptions}{ParseFunc} = \&XML::LibXML::parse_file;
$self->{ParserOptions}{ParseFuncParam} = shift;
$self->_parse;
return $self->end_document({});
}
sub parse_chunk {
my ( $self, $chunk ) = @_;
$self->{ParserOptions}{LibParser} = $self->_calc_new_XML_LibXML_parser_for_compatibility_with_XML_Simple_etc() unless defined $self->{ParserOptions}{LibParser};
$self->{ParserOptions}{ParseFunc} = \&XML::LibXML::parse_xml_chunk;
$self->{ParserOptions}{LibParser}->{IS_FILTER}=1; # a hack to prevent parse_xml_chunk from issuing end_document
$self->{ParserOptions}{ParseFuncParam} = $chunk;
$self->_parse;
return;
}
sub _parse {
my $self = shift;
my $args = bless $self->{ParserOptions}, ref($self);
if (defined($self->{JOIN_CHARACTERS})) {
$args->{LibParser}->{JOIN_CHARACTERS} = $self->{JOIN_CHARACTERS};
} else {
$args->{LibParser}->{JOIN_CHARACTERS} = 0;
}
$args->{LibParser}->set_handler( $self );
eval {
$args->{ParseFunc}->($args->{LibParser}, $args->{ParseFuncParam});
};
if ( $args->{LibParser}->{SAX}->{State} == 1 ) {
croak( "SAX Exception not implemented, yet; Data ended before document ended\n" );
}
# break a possible circular reference
$args->{LibParser}->set_handler( undef );
if ( $@ ) {
croak $@;
}
return;
}
1;

67
lib/XML/LibXML/SAX.pod Normal file
View File

@ -0,0 +1,67 @@
=head1 NAME
XML::LibXML::SAX - XML::LibXML direct SAX parser
=head1 DESCRIPTION
XML::LibXML provides an interface to libxml2 direct SAX interface. Through this
interface it is possible to generate SAX events directly while parsing a
document. While using the SAX parser XML::LibXML will not create a DOM Document
tree.
Such an interface is useful if very large XML documents have to be processed
and no DOM functions are required. By using this interface it is possible to
read data stored within an XML document directly into the application data
structures without loading the document into memory.
The SAX interface of XML::LibXML is based on the famous XML::SAX interface. It
uses the generic interface as provided by XML::SAX::Base.
Additionally to the generic functions, which are only able to process entire
documents, XML::LibXML::SAX provides I<<<<<< parse_chunk() >>>>>>. This method generates SAX events from well balanced data such as is often
provided by databases.
=head1 FEATURES
I<<<<<< NOTE: >>>>>> This feature is experimental.
You can enable character data joining which may yield a significant speed boost
in your XML processing in lower markup ratio situations by enabling the
http://xmlns.perl.org/sax/join-character-data feature of this parser. This is
done via the set_feature method like this:
$p->set_feature('http://xmlns.perl.org/sax/join-character-data', 1);
You can also specify a 0 to disable. The default is to have this feature
disabled.
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0207
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

View File

@ -0,0 +1,335 @@
# $Id$
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
package XML::LibXML::SAX::Builder;
use strict;
use warnings;
use XML::LibXML;
use XML::NamespaceSupport;
use vars qw ($VERSION);
sub CLONE_SKIP {
return $XML::LibXML::__threads_shared ? 0 : 1;
}
$VERSION = "2.0207"; # VERSION TEMPLATE: DO NOT CHANGE
sub new {
my $class = shift;
return bless {@_}, $class;
}
sub result { $_[0]->{LAST_DOM}; }
sub done {
my ($self) = @_;
my $dom = $self->{DOM};
$dom = $self->{Parent} unless defined $dom; # this is for parsing document chunks
delete $self->{NamespaceStack};
delete $self->{Parent};
delete $self->{DOM};
$self->{LAST_DOM} = $dom;
return $dom;
}
sub set_document_locator {
}
sub start_dtd {
my ($self, $dtd) = @_;
if (defined $dtd->{Name} and
(defined $dtd->{SystemId} or defined $dtd->{PublicId})) {
$self->{DOM}->createExternalSubset($dtd->{Name},$dtd->{PublicId},$dtd->{SystemId});
}
}
sub end_dtd {
}
sub start_document {
my ($self, $doc) = @_;
$self->{DOM} = XML::LibXML::Document->createDocument();
if ( defined $self->{Encoding} ) {
$self->xml_decl({Version => ($self->{Version} || '1.0') , Encoding => $self->{Encoding}});
}
$self->{NamespaceStack} = XML::NamespaceSupport->new;
$self->{NamespaceStack}->push_context;
$self->{Parent} = undef;
return ();
}
sub xml_decl {
my $self = shift;
my $decl = shift;
if ( defined $decl->{Version} ) {
$self->{DOM}->setVersion( $decl->{Version} );
}
if ( defined $decl->{Encoding} ) {
$self->{DOM}->setEncoding( $decl->{Encoding} );
}
return ();
}
sub end_document {
my ($self, $doc) = @_;
my $d = $self->done();
return $d;
}
sub start_prefix_mapping {
my $self = shift;
my $ns = shift;
unless ( defined $self->{DOM} or defined $self->{Parent} ) {
$self->{Parent} = XML::LibXML::DocumentFragment->new();
$self->{NamespaceStack} = XML::NamespaceSupport->new;
$self->{NamespaceStack}->push_context;
}
$self->{USENAMESPACESTACK} = 1;
$self->{NamespaceStack}->declare_prefix( $ns->{Prefix}, $ns->{NamespaceURI} );
return ();
}
sub end_prefix_mapping {
my $self = shift;
my $ns = shift;
$self->{NamespaceStack}->undeclare_prefix( $ns->{Prefix} );
return ();
}
sub start_element {
my ($self, $el) = @_;
my $node;
unless ( defined $self->{DOM} or defined $self->{Parent} ) {
$self->{Parent} = XML::LibXML::DocumentFragment->new();
$self->{NamespaceStack} = XML::NamespaceSupport->new;
$self->{NamespaceStack}->push_context;
}
if ( defined $self->{Parent} ) {
$el->{NamespaceURI} ||= "";
$node = $self->{Parent}->addNewChild( $el->{NamespaceURI},
$el->{Name} );
}
else {
if ($el->{NamespaceURI}) {
if ( defined $self->{DOM} ) {
$node = $self->{DOM}->createRawElementNS($el->{NamespaceURI},
$el->{Name});
}
else {
$node = XML::LibXML::Element->new( $el->{Name} );
$node->setNamespace( $el->{NamespaceURI},
$el->{Prefix} , 1 );
}
}
else {
if ( defined $self->{DOM} ) {
$node = $self->{DOM}->createRawElement($el->{Name});
}
else {
$node = XML::LibXML::Element->new( $el->{Name} );
}
}
$self->{DOM}->setDocumentElement($node);
}
# build namespaces
my $skip_ns= 0;
foreach my $p ( $self->{NamespaceStack}->get_declared_prefixes() ) {
$skip_ns= 1;
my $uri = $self->{NamespaceStack}->get_uri($p);
my $nodeflag = 0;
if ( defined $uri
and defined $el->{NamespaceURI}
and $uri eq $el->{NamespaceURI} ) {
# $nodeflag = 1;
next;
}
$node->setNamespace($uri, $p, 0 );
}
$self->{Parent} = $node;
$self->{NamespaceStack}->push_context;
# do attributes
foreach my $key (keys %{$el->{Attributes}}) {
my $attr = $el->{Attributes}->{$key};
if (ref($attr)) {
# catch broken name/value pairs
next unless $attr->{Name} ;
next if $self->{USENAMESPACESTACK}
and ( $attr->{Name} eq "xmlns"
or ( defined $attr->{Prefix}
and $attr->{Prefix} eq "xmlns" ) );
if ( defined $attr->{Prefix}
and $attr->{Prefix} eq "xmlns" and $skip_ns == 0 ) {
# ok, the generator does not set namespaces correctly!
my $uri = $attr->{Value};
$node->setNamespace($uri,
$attr->{LocalName},
$uri eq $el->{NamespaceURI} ? 1 : 0 );
}
else {
$node->setAttributeNS($attr->{NamespaceURI} || "",
$attr->{Name}, $attr->{Value});
}
}
else {
$node->setAttribute($key => $attr);
}
}
return ();
}
sub end_element {
my ($self, $el) = @_;
return unless $self->{Parent};
$self->{NamespaceStack}->pop_context;
$self->{Parent} = $self->{Parent}->parentNode();
return ();
}
sub start_cdata {
my $self = shift;
$self->{IN_CDATA} = 1;
return ();
}
sub end_cdata {
my $self = shift;
$self->{IN_CDATA} = 0;
return ();
}
sub characters {
my ($self, $chars) = @_;
if ( not defined $self->{DOM} and not defined $self->{Parent} ) {
$self->{Parent} = XML::LibXML::DocumentFragment->new();
$self->{NamespaceStack} = XML::NamespaceSupport->new;
$self->{NamespaceStack}->push_context;
}
return unless $self->{Parent};
my $node;
unless ( defined $chars and defined $chars->{Data} ) {
return;
}
if ( defined $self->{DOM} ) {
if ( defined $self->{IN_CDATA} and $self->{IN_CDATA} == 1 ) {
$node = $self->{DOM}->createCDATASection($chars->{Data});
}
else {
$node = $self->{Parent}->appendText($chars->{Data});
return;
}
}
elsif ( defined $self->{IN_CDATA} and $self->{IN_CDATA} == 1 ) {
$node = XML::LibXML::CDATASection->new($chars->{Data});
}
else {
$node = XML::LibXML::Text->new($chars->{Data});
}
$self->{Parent}->addChild($node);
return ();
}
sub comment {
my ($self, $chars) = @_;
my $comment;
if ( not defined $self->{DOM} and not defined $self->{Parent} ) {
$self->{Parent} = XML::LibXML::DocumentFragment->new();
$self->{NamespaceStack} = XML::NamespaceSupport->new;
$self->{NamespaceStack}->push_context;
}
unless ( defined $chars and defined $chars->{Data} ) {
return;
}
if ( defined $self->{DOM} ) {
$comment = $self->{DOM}->createComment( $chars->{Data} );
}
else {
$comment = XML::LibXML::Comment->new( $chars->{Data} );
}
if ( defined $self->{Parent} ) {
$self->{Parent}->addChild($comment);
}
else {
$self->{DOM}->addChild($comment);
}
return ();
}
sub processing_instruction {
my ( $self, $pi ) = @_;
my $PI;
return unless defined $self->{DOM};
$PI = $self->{DOM}->createPI( $pi->{Target}, $pi->{Data} );
if ( defined $self->{Parent} ) {
$self->{Parent}->addChild( $PI );
}
else {
$self->{DOM}->addChild( $PI );
}
return ();
}
sub warning {
my $self = shift;
my $error = shift;
# fill $@ but do not die seriously
eval { $error->throw; };
}
sub error {
my $self = shift;
my $error = shift;
delete $self->{NamespaceStack};
delete $self->{Parent};
delete $self->{DOM};
$error->throw;
}
sub fatal_error {
my $self = shift;
my $error = shift;
delete $self->{NamespaceStack};
delete $self->{Parent};
delete $self->{DOM};
$error->throw;
}
1;
__END__

View File

@ -0,0 +1,58 @@
=head1 NAME
XML::LibXML::SAX::Builder - Building DOM trees from SAX events.
=head1 SYNOPSIS
use XML::LibXML::SAX::Builder;
my $builder = XML::LibXML::SAX::Builder->new();
my $gen = XML::Generator::DBI->new(Handler => $builder, dbh => $dbh);
$gen->execute("SELECT * FROM Users");
my $doc = $builder->result();
=head1 DESCRIPTION
This is a SAX handler that generates a DOM tree from SAX events. Usage is as
above. Input is accepted from any SAX1 or SAX2 event generator.
Building DOM trees from SAX events is quite easy with
XML::LibXML::SAX::Builder. The class is designed as a SAX2 final handler not as
a filter!
Since SAX is strictly stream oriented, you should not expect anything to return
from a generator. Instead you have to ask the builder instance directly to get
the document built. XML::LibXML::SAX::Builder's result() function holds the
document generated from the last SAX stream.
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0207
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

View File

@ -0,0 +1,158 @@
# $Id: Generator.pm 772 2009-01-23 21:42:09Z pajas
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
package XML::LibXML::SAX::Generator;
use strict;
use warnings;
use XML::LibXML;
use vars qw ($VERSION);
$VERSION = "2.0207"; # VERSION TEMPLATE: DO NOT CHANGE
sub CLONE_SKIP {
return $XML::LibXML::__threads_shared ? 0 : 1;
}
warn("This class (", __PACKAGE__, ") is deprecated!");
sub new {
my $class = shift;
unshift @_, 'Handler' unless @_ != 1;
my %p = @_;
return bless \%p, $class;
}
sub generate {
my $self = shift;
my ($node) = @_;
my $document = { Parent => undef };
$self->{Handler}->start_document($document);
process_node($self->{Handler}, $node);
$self->{Handler}->end_document($document);
}
sub process_node {
my ($handler, $node) = @_;
my $node_type = $node->getType();
if ($node_type == XML_COMMENT_NODE) {
$handler->comment( { Data => $node->getData } );
}
elsif ($node_type == XML_TEXT_NODE || $node_type == XML_CDATA_SECTION_NODE) {
# warn($node->getData . "\n");
$handler->characters( { Data => $node->getData } );
}
elsif ($node_type == XML_ELEMENT_NODE) {
# warn("<" . $node->getName . ">\n");
process_element($handler, $node);
# warn("</" . $node->getName . ">\n");
}
elsif ($node_type == XML_ENTITY_REF_NODE) {
foreach my $kid ($node->getChildnodes) {
# warn("child of entity ref: " . $kid->getType() . " called: " . $kid->getName . "\n");
process_node($handler, $kid);
}
}
elsif ($node_type == XML_DOCUMENT_NODE) {
# just get root element. Ignore other cruft.
foreach my $kid ($node->getChildnodes) {
if ($kid->getType() == XML_ELEMENT_NODE) {
process_element($handler, $kid);
last;
}
}
}
else {
warn("unknown node type: $node_type");
}
}
sub process_element {
my ($handler, $element) = @_;
my @attr;
foreach my $attr ($element->getAttributes) {
push @attr, XML::LibXML::SAX::AttributeNode->new(
Name => $attr->getName,
Value => $attr->getData,
NamespaceURI => $attr->getNamespaceURI,
Prefix => $attr->getPrefix,
LocalName => $attr->getLocalName,
);
}
my $node = {
Name => $element->getName,
Attributes => { map { $_->{Name} => $_ } @attr },
NamespaceURI => $element->getNamespaceURI,
Prefix => $element->getPrefix,
LocalName => $element->getLocalName,
};
$handler->start_element($node);
foreach my $child ($element->getChildnodes) {
process_node($handler, $child);
}
$handler->end_element($node);
}
package XML::LibXML::SAX::AttributeNode;
use overload '""' => "stringify";
sub new {
my $class = shift;
my %p = @_;
return bless \%p, $class;
}
sub stringify {
my $self = shift;
return $self->{Value};
}
1;
__END__
=head1 NAME
XML::LibXML::SAX::Generator - Generate SAX events from a LibXML tree
=head1 SYNOPSIS
my $handler = MySAXHandler->new();
my $generator = XML::LibXML::SAX::Generator->new(Handler => $handler);
my $dom = XML::LibXML->new->parse_file("foo.xml");
$generator->generate($dom);
=head1 DESCRIPTION
THIS CLASS IS DEPRECATED! Use XML::LibXML::SAX::Parser instead!
This helper class allows you to generate SAX events from any XML::LibXML
node, and all it's sub-nodes. This basically gives you interop from
XML::LibXML to other modules that may implement SAX.
It uses SAX2 style, but should be compatible with anything SAX1, by use
of stringification overloading.
There is nothing to really know about, beyond the synopsis above, and
a general knowledge of how to use SAX, which is beyond the scope here.
=cut

View File

@ -0,0 +1,266 @@
# $Id$
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
package XML::LibXML::SAX::Parser;
use strict;
use warnings;
use vars qw($VERSION @ISA);
use XML::LibXML;
use XML::LibXML::Common qw(:libxml);
use XML::SAX::Base;
use XML::SAX::DocumentLocator;
$VERSION = "2.0207"; # VERSION TEMPLATE: DO NOT CHANGE
@ISA = ('XML::SAX::Base');
sub CLONE_SKIP {
return $XML::LibXML::__threads_shared ? 0 : 1;
}
sub _parse_characterstream {
my ($self, $fh, $options) = @_;
die "parsing a characterstream is not supported at this time";
}
sub _parse_bytestream {
my ($self, $fh, $options) = @_;
my $parser = XML::LibXML->new();
my $doc = exists($options->{Source}{SystemId}) ? $parser->parse_fh($fh, $options->{Source}{SystemId}) : $parser->parse_fh($fh);
$self->generate($doc);
}
sub _parse_string {
my ($self, $str, $options) = @_;
my $parser = XML::LibXML->new();
my $doc = exists($options->{Source}{SystemId}) ? $parser->parse_string($str, $options->{Source}{SystemId}) : $parser->parse_string($str);
$self->generate($doc);
}
sub _parse_systemid {
my ($self, $sysid, $options) = @_;
my $parser = XML::LibXML->new();
my $doc = $parser->parse_file($sysid);
$self->generate($doc);
}
sub generate {
my $self = shift;
my ($node) = @_;
my $doc = $node->ownerDocument();
{
# precompute some DocumentLocator values
my %locator = (
PublicId => undef,
SystemId => undef,
Encoding => undef,
XMLVersion => undef,
);
my $dtd = defined $doc ? $doc->externalSubset() : undef;
if (defined $dtd) {
$locator{PublicId} = $dtd->publicId();
$locator{SystemId} = $dtd->systemId();
}
if (defined $doc) {
$locator{Encoding} = $doc->encoding();
$locator{XMLVersion} = $doc->version();
}
$self->set_document_locator(
XML::SAX::DocumentLocator->new(
sub { $locator{PublicId} },
sub { $locator{SystemId} },
sub { defined($self->{current_node}) ? $self->{current_node}->line_number() : undef },
sub { 1 },
sub { $locator{Encoding} },
sub { $locator{XMLVersion} },
),
);
}
if ( $node->nodeType() == XML_DOCUMENT_NODE
|| $node->nodeType == XML_HTML_DOCUMENT_NODE ) {
$self->start_document({});
$self->xml_decl({Version => $node->getVersion, Encoding => $node->getEncoding});
$self->process_node($node);
$self->end_document({});
}
}
sub process_node {
my ($self, $node) = @_;
local $self->{current_node} = $node;
my $node_type = $node->nodeType();
if ($node_type == XML_COMMENT_NODE) {
$self->comment( { Data => $node->getData } );
}
elsif ($node_type == XML_TEXT_NODE
|| $node_type == XML_CDATA_SECTION_NODE) {
# warn($node->getData . "\n");
$self->characters( { Data => $node->nodeValue } );
}
elsif ($node_type == XML_ELEMENT_NODE) {
# warn("<" . $node->getName . ">\n");
$self->process_element($node);
# warn("</" . $node->getName . ">\n");
}
elsif ($node_type == XML_ENTITY_REF_NODE) {
foreach my $kid ($node->childNodes) {
# warn("child of entity ref: " . $kid->getType() . " called: " . $kid->getName . "\n");
$self->process_node($kid);
}
}
elsif ($node_type == XML_DOCUMENT_NODE
|| $node_type == XML_HTML_DOCUMENT_NODE
|| $node_type == XML_DOCUMENT_FRAG_NODE) {
# sometimes it is just useful to generate SAX events from
# a document fragment (very good with filters).
foreach my $kid ($node->childNodes) {
$self->process_node($kid);
}
}
elsif ($node_type == XML_PI_NODE) {
$self->processing_instruction( { Target => $node->getName, Data => $node->getData } );
}
elsif ($node_type == XML_COMMENT_NODE) {
$self->comment( { Data => $node->getData } );
}
elsif ( $node_type == XML_XINCLUDE_START
|| $node_type == XML_XINCLUDE_END ) {
# ignore!
# i may want to handle this one day, dunno yet
}
elsif ($node_type == XML_DTD_NODE ) {
# ignore!
# i will support DTDs, but had no time yet.
}
else {
# warn("unsupported node type: $node_type");
}
}
sub process_element {
my ($self, $element) = @_;
my $attribs = {};
my @ns_maps = $element->getNamespaces;
foreach my $ns (@ns_maps) {
$self->start_prefix_mapping(
{
NamespaceURI => $ns->href,
Prefix => ( defined $ns->localname ? $ns->localname : ''),
}
);
}
foreach my $attr ($element->attributes) {
my $key;
# warn("Attr: $attr -> ", $attr->getName, " = ", $attr->getData, "\n");
# this isa dump thing...
if ($attr->isa('XML::LibXML::Namespace')) {
# TODO This needs fixing modulo agreeing on what
# is the right thing to do here.
unless ( defined $attr->name ) {
## It's an atter like "xmlns='foo'"
$attribs->{"{}xmlns"} =
{
Name => "xmlns",
LocalName => "xmlns",
Prefix => "",
Value => $attr->href,
NamespaceURI => "",
};
}
else {
my $prefix = "xmlns";
my $localname = $attr->localname;
my $key = "{http://www.w3.org/2000/xmlns/}";
my $name = "xmlns";
if ( defined $localname ) {
$key .= $localname;
$name.= ":".$localname;
}
$attribs->{$key} =
{
Name => $name,
Value => $attr->href,
NamespaceURI => "http://www.w3.org/2000/xmlns/",
Prefix => $prefix,
LocalName => $localname,
};
}
}
else {
my $ns = $attr->namespaceURI;
$ns = '' unless defined $ns;
$key = "{$ns}".$attr->localname;
## Not sure why, but $attr->name is coming through stripped
## of its prefix, so we need to hand-assemble a real name.
my $name = $attr->name;
$name = "" unless defined $name;
my $prefix = $attr->prefix;
$prefix = "" unless defined $prefix;
$name = "$prefix:$name"
if index( $name, ":" ) < 0 && length $prefix;
$attribs->{$key} =
{
Name => $name,
Value => $attr->value,
NamespaceURI => $ns,
Prefix => $prefix,
LocalName => $attr->localname,
};
}
# use Data::Dumper;
# warn("Attr made: ", Dumper($attribs->{$key}), "\n");
}
my $node = {
Name => $element->nodeName,
Attributes => $attribs,
NamespaceURI => $element->namespaceURI,
Prefix => $element->prefix || "",
LocalName => $element->localname,
};
$self->start_element($node);
foreach my $child ($element->childNodes) {
$self->process_node($child);
}
my $end_node = { %$node };
delete $end_node->{Attributes};
$self->end_element($end_node);
foreach my $ns (@ns_maps) {
$self->end_prefix_mapping(
{
NamespaceURI => $ns->href,
Prefix => ( defined $ns->localname ? $ns->localname : ''),
}
);
}
}
1;
__END__

89
lib/XML/LibXML/Schema.pod Normal file
View File

@ -0,0 +1,89 @@
=head1 NAME
XML::LibXML::Schema - XML Schema Validation
=head1 SYNOPSIS
use XML::LibXML;
$doc = XML::LibXML->new->parse_file($url);
$xmlschema = XML::LibXML::Schema->new( location => $filename_or_url, no_network => 1 );
$xmlschema = XML::LibXML::Schema->new( string => $xmlschemastring, no_network => 1 );
eval { $xmlschema->validate( $doc ); };
=head1 DESCRIPTION
The XML::LibXML::Schema class is a tiny frontend to libxml2's XML Schema
implementation. Currently it supports only schema parsing and document
validation. As of 2.6.32, libxml2 only supports decimal types up to 24 digits
(the standard requires at least 18).
=head1 METHODS
=over 4
=item new
$xmlschema = XML::LibXML::Schema->new( location => $filename_or_url, no_network => 1 );
$xmlschema = XML::LibXML::Schema->new( string => $xmlschemastring, no_network => 1 );
The constructor of XML::LibXML::Schema needs to be called with list of
parameters. At least location or string parameter is required to specify source
of schema. Optional parameter no_network set to 1 cause that parser would not
access network and optional parameter recover set 1 cause that parser would not
call die() on errors.
It is important, that each schema only have a single source.
The location parameter allows one to parse a schema from the filesystem or a
(non-HTTPS) URL.
The string parameter will parse the schema from the given XML string.
Note that the constructor will die() if the schema does not meed the
constraints of the XML Schema specification.
=item validate
eval { $xmlschema->validate( $doc ); };
This function allows one to validate a (parsed) document against the given XML
Schema. The argument of this function should be a L<<<<<< XML::LibXML::Document >>>>>> object. If this function succeeds, it will return 0, otherwise it will die()
and report the errors found. Because of this validate() should be always
evaluated.
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0207
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

190
lib/XML/LibXML/Text.pod Normal file
View File

@ -0,0 +1,190 @@
=head1 NAME
XML::LibXML::Text - XML::LibXML Class for Text Nodes
=head1 SYNOPSIS
use XML::LibXML;
# Only methods specific to Text nodes are listed here,
# see the XML::LibXML::Node manpage for other methods
$text = XML::LibXML::Text->new( $content );
$nodedata = $text->data;
$text->setData( $text_content );
$text->substringData($offset, $length);
$text->appendData( $somedata );
$text->insertData($offset, $string);
$text->deleteData($offset, $length);
$text->deleteDataString($remstring, $all);
$text->replaceData($offset, $length, $string);
$text->replaceDataString($old, $new, $flag);
$text->replaceDataRegEx( $search_cond, $replace_cond, $reflags );
=head1 DESCRIPTION
Unlike the DOM specification, XML::LibXML implements the text node as the base
class of all character data node. Therefore there exists no CharacterData
class. This allows one to apply methods of text nodes also to Comments and
CDATA-sections.
=head1 METHODS
The class inherits from L<<<<<< XML::LibXML::Node >>>>>>. The documentation for Inherited methods is not listed here.
Many functions listed here are extensively documented in the DOM Level 3 specification (L<<<<<< http://www.w3.org/TR/DOM-Level-3-Core/ >>>>>>). Please refer to the specification for extensive documentation.
=over 4
=item new
$text = XML::LibXML::Text->new( $content );
The constructor of the class. It creates an unbound text node.
=item data
$nodedata = $text->data;
Although there exists the C<<<<<< nodeValue >>>>>> attribute in the Node class, the DOM specification defines data as a separate
attribute. C<<<<<< XML::LibXML >>>>>> implements these two attributes not as different attributes, but as aliases,
such as C<<<<<< libxml2 >>>>>> does. Therefore
$text->data;
and
$text->nodeValue;
will have the same result and are not different entities.
=item setData($string)
$text->setData( $text_content );
This function sets or replaces text content to a node. The node has to be of
the type "text", "cdata" or "comment".
=item substringData($offset,$length)
$text->substringData($offset, $length);
Extracts a range of data from the node. (DOM Spec) This function takes the two
parameters $offset and $length and returns the sub-string, if available.
If the node contains no data or $offset refers to an non-existing string index,
this function will return I<<<<<< undef >>>>>>. If $length is out of range C<<<<<< substringData >>>>>> will return the data starting at $offset instead of causing an error.
=item appendData($string)
$text->appendData( $somedata );
Appends a string to the end of the existing data. If the current text node
contains no data, this function has the same effect as C<<<<<< setData >>>>>>.
=item insertData($offset,$string)
$text->insertData($offset, $string);
Inserts the parameter $string at the given $offset of the existing data of the
node. This operation will not remove existing data, but change the order of the
existing data.
The $offset has to be a positive value. If $offset is out of range, C<<<<<< insertData >>>>>> will have the same behaviour as C<<<<<< appendData >>>>>>.
=item deleteData($offset, $length)
$text->deleteData($offset, $length);
This method removes a chunk from the existing node data at the given offset.
The $length parameter tells, how many characters should be removed from the
string.
=item deleteDataString($string, [$all])
$text->deleteDataString($remstring, $all);
This method removes a chunk from the existing node data. Since the DOM spec is
quite unhandy if you already know C<<<<<< which >>>>>> string to remove from a text node, this method allows more perlish code :)
The functions takes two parameters: I<<<<<< $string >>>>>> and optional the I<<<<<< $all >>>>>> flag. If $all is not set, I<<<<<< undef >>>>>> or I<<<<<< 0 >>>>>>, C<<<<<< deleteDataString >>>>>> will remove only the first occurrence of $string. If $all is I<<<<<< TRUE >>>>>>C<<<<<< deleteDataString >>>>>> will remove all occurrences of I<<<<<< $string >>>>>> from the node data.
=item replaceData($offset, $length, $string)
$text->replaceData($offset, $length, $string);
The DOM style version to replace node data.
=item replaceDataString($oldstring, $newstring, [$all])
$text->replaceDataString($old, $new, $flag);
The more programmer friendly version of replaceData() :)
Instead of giving offsets and length one can specify the exact string (I<<<<<< $oldstring >>>>>>) to be replaced. Additionally the I<<<<<< $all >>>>>> flag allows one to replace all occurrences of I<<<<<< $oldstring >>>>>>.
=item replaceDataRegEx( $search_cond, $replace_cond, $reflags )
$text->replaceDataRegEx( $search_cond, $replace_cond, $reflags );
This method replaces the node's data by a C<<<<<< simple >>>>>> regular expression. Optional, this function allows one to pass some flags that
will be added as flag to the replace statement.
I<<<<<< NOTE: >>>>>> This is a shortcut for
my $datastr = $node->getData();
$datastr =~ s/somecond/replacement/g; # 'g' is just an example for any flag
$node->setData( $datastr );
This function can make things easier to read for simple replacements. For more
complex variants it is recommended to use the code snippet above.
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0207
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

View File

@ -0,0 +1,147 @@
# $Id: XPathContext.pm 422 2002-11-08 17:10:30Z phish $
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
package XML::LibXML::XPathContext;
use strict;
use warnings;
use vars qw($VERSION @ISA $USE_LIBXML_DATA_TYPES);
use Carp;
use XML::LibXML;
use XML::LibXML::NodeList;
$VERSION = "2.0207"; # VERSION TEMPLATE: DO NOT CHANGE
# should LibXML XPath data types be used for simple objects
# when passing parameters to extension functions (default: no)
$USE_LIBXML_DATA_TYPES = 0;
sub CLONE_SKIP { 1 }
sub findnodes {
my ($self, $xpath, $node) = @_;
my @nodes = $self->_guarded_find_call('_findnodes', $node, $xpath);
if (wantarray) {
return @nodes;
}
else {
return XML::LibXML::NodeList->new(@nodes);
}
}
sub find {
my ($self, $xpath, $node) = @_;
my ($type, @params) = $self->_guarded_find_call('_find', $node, $xpath,0);
if ($type) {
return $type->new(@params);
}
return undef;
}
sub exists {
my ($self, $xpath, $node) = @_;
my (undef, $value) = $self->_guarded_find_call('_find', $node, $xpath,1);
return $value;
}
sub findvalue {
my $self = shift;
return $self->find(@_)->to_literal->value;
}
sub _guarded_find_call {
my ($self, $method, $node)=(shift,shift,shift);
my $prev_node;
if (ref($node)) {
$prev_node = $self->getContextNode();
$self->setContextNode($node);
}
my @ret;
eval {
@ret = $self->$method(@_);
};
$self->_free_node_pool;
$self->setContextNode($prev_node) if ref($node);
if ($@) {
my $err = $@;
chomp $err;
croak $err;
}
return @ret;
}
sub registerFunction {
my ($self, $name, $sub) = @_;
$self->registerFunctionNS($name, undef, $sub);
return;
}
sub unregisterNs {
my ($self, $prefix) = @_;
$self->registerNs($prefix, undef);
return;
}
sub unregisterFunction {
my ($self, $name) = @_;
$self->registerFunctionNS($name, undef, undef);
return;
}
sub unregisterFunctionNS {
my ($self, $name, $ns) = @_;
$self->registerFunctionNS($name, $ns, undef);
return;
}
sub unregisterVarLookupFunc {
my ($self) = @_;
$self->registerVarLookupFunc(undef, undef);
return;
}
# extension function perl dispatcher
# borrowed from XML::LibXSLT
sub _perl_dispatcher {
my $func = shift;
my @params = @_;
my @perlParams;
my $i = 0;
while (@params) {
my $type = shift(@params);
if ($type eq 'XML::LibXML::Literal' or
$type eq 'XML::LibXML::Number' or
$type eq 'XML::LibXML::Boolean')
{
my $val = shift(@params);
unshift(@perlParams, $USE_LIBXML_DATA_TYPES ? $type->new($val) : $val);
}
elsif ($type eq 'XML::LibXML::NodeList') {
my $node_count = shift(@params);
unshift(@perlParams, $type->new(splice(@params, 0, $node_count)));
}
}
$func = "main::$func" unless ref($func) || $func =~ /(.+)::/;
no strict 'refs';
my $res = $func->(@perlParams);
return $res;
}
1;

View File

@ -0,0 +1,382 @@
=head1 NAME
XML::LibXML::XPathContext - XPath Evaluation
=head1 SYNOPSIS
my $xpc = XML::LibXML::XPathContext->new();
my $xpc = XML::LibXML::XPathContext->new($node);
$xpc->registerNs($prefix, $namespace_uri)
$xpc->unregisterNs($prefix)
$uri = $xpc->lookupNs($prefix)
$xpc->registerVarLookupFunc($callback, $data)
$data = $xpc->getVarLookupData();
$callback = $xpc->getVarLookupFunc();
$xpc->unregisterVarLookupFunc($name);
$xpc->registerFunctionNS($name, $uri, $callback)
$xpc->unregisterFunctionNS($name, $uri)
$xpc->registerFunction($name, $callback)
$xpc->unregisterFunction($name)
@nodes = $xpc->findnodes($xpath)
@nodes = $xpc->findnodes($xpath, $context_node )
$nodelist = $xpc->findnodes($xpath, $context_node )
$object = $xpc->find($xpath )
$object = $xpc->find($xpath, $context_node )
$value = $xpc->findvalue($xpath )
$value = $xpc->findvalue($xpath, $context_node )
$bool = $xpc->exists( $xpath_expression, $context_node );
$xpc->setContextNode($node)
my $node = $xpc->getContextNode;
$xpc->setContextPosition($position)
my $position = $xpc->getContextPosition;
$xpc->setContextSize($size)
my $size = $xpc->getContextSize;
$xpc->setContextNode($node)
=head1 DESCRIPTION
The XML::LibXML::XPathContext class provides an almost complete interface to
libxml2's XPath implementation. With XML::LibXML::XPathContext, it is possible
to evaluate XPath expressions in the context of arbitrary node, context size,
and context position, with a user-defined namespace-prefix mapping, custom
XPath functions written in Perl, and even a custom XPath variable resolver.
=head1 EXAMPLES
=head2 Namespaces
This example demonstrates C<<<<<< registerNs() >>>>>> method. It finds all paragraph nodes in an XHTML document.
my $xc = XML::LibXML::XPathContext->new($xhtml_doc);
$xc->registerNs('xhtml', 'http://www.w3.org/1999/xhtml');
my @nodes = $xc->findnodes('//xhtml:p');
=head2 Custom XPath functions
This example demonstrates C<<<<<< registerFunction() >>>>>> method by defining a function filtering nodes based on a Perl regular
expression:
sub grep_nodes {
my ($nodelist,$regexp) = @_;
my $result = XML::LibXML::NodeList->new;
for my $node ($nodelist->get_nodelist()) {
$result->push($node) if $node->textContent =~ $regexp;
}
return $result;
};
my $xc = XML::LibXML::XPathContext->new($node);
$xc->registerFunction('grep_nodes', \&grep_nodes);
my @nodes = $xc->findnodes('//section[grep_nodes(para,"\bsearch(ing|es)?\b")]');
=head2 Variables
This example demonstrates C<<<<<< registerVarLookup() >>>>>> method. We use XPath variables to recycle results of previous evaluations:
sub var_lookup {
my ($varname,$ns,$data)=@_;
return $data->{$varname};
}
my $areas = XML::LibXML->new->parse_file('areas.xml');
my $empl = XML::LibXML->new->parse_file('employees.xml');
my $xc = XML::LibXML::XPathContext->new($empl);
my %variables = (
A => $xc->find('/employees/employee[@salary>10000]'),
B => $areas->find('/areas/area[district='Brooklyn']/street'),
);
# get names of employees from $A working in an area listed in $B
$xc->registerVarLookupFunc(\&var_lookup, \%variables);
my @nodes = $xc->findnodes('$A[work_area/street = $B]/name');
=head1 METHODS
=over 4
=item new
my $xpc = XML::LibXML::XPathContext->new();
Creates a new XML::LibXML::XPathContext object without a context node.
my $xpc = XML::LibXML::XPathContext->new($node);
Creates a new XML::LibXML::XPathContext object with the context node set to C<<<<<< $node >>>>>>.
=item registerNs
$xpc->registerNs($prefix, $namespace_uri)
Registers namespace C<<<<<< $prefix >>>>>> to C<<<<<< $namespace_uri >>>>>>.
=item unregisterNs
$xpc->unregisterNs($prefix)
Unregisters namespace C<<<<<< $prefix >>>>>>.
=item lookupNs
$uri = $xpc->lookupNs($prefix)
Returns namespace URI registered with C<<<<<< $prefix >>>>>>. If C<<<<<< $prefix >>>>>> is not registered to any namespace URI returns C<<<<<< undef >>>>>>.
=item registerVarLookupFunc
$xpc->registerVarLookupFunc($callback, $data)
Registers variable lookup function C<<<<<< $callback >>>>>>. The registered function is executed by the XPath engine each time an XPath
variable is evaluated. It takes three arguments: C<<<<<< $data >>>>>>, variable name, and variable ns-URI and must return one value: a number or
string or any C<<<<<< XML::LibXML:: >>>>>> object that can be a result of findnodes: Boolean, Literal, Number, Node (e.g.
Document, Element, etc.), or NodeList. For convenience, simple (non-blessed)
array references containing only L<<<<<< XML::LibXML::Node >>>>>> objects can be used instead of an L<<<<<< XML::LibXML::NodeList >>>>>>.
=item getVarLookupData
$data = $xpc->getVarLookupData();
Returns the data that have been associated with a variable lookup function
during a previous call to C<<<<<< registerVarLookupFunc >>>>>>.
=item getVarLookupFunc
$callback = $xpc->getVarLookupFunc();
Returns the variable lookup function previously registered with C<<<<<< registerVarLookupFunc >>>>>>.
=item unregisterVarLookupFunc
$xpc->unregisterVarLookupFunc($name);
Unregisters variable lookup function and the associated lookup data.
=item registerFunctionNS
$xpc->registerFunctionNS($name, $uri, $callback)
Registers an extension function C<<<<<< $name >>>>>> in C<<<<<< $uri >>>>>> namespace. C<<<<<< $callback >>>>>> must be a CODE reference. The arguments of the callback function are either
simple scalars or C<<<<<< XML::LibXML::* >>>>>> objects depending on the XPath argument types. The function is responsible for
checking the argument number and types. Result of the callback code must be a
single value of the following types: a simple scalar (number, string) or an
arbitrary C<<<<<< XML::LibXML::* >>>>>> object that can be a result of findnodes: Boolean, Literal, Number, Node (e.g.
Document, Element, etc.), or NodeList. For convenience, simple (non-blessed)
array references containing only L<<<<<< XML::LibXML::Node >>>>>> objects can be used instead of a L<<<<<< XML::LibXML::NodeList >>>>>>.
=item unregisterFunctionNS
$xpc->unregisterFunctionNS($name, $uri)
Unregisters extension function C<<<<<< $name >>>>>> in C<<<<<< $uri >>>>>> namespace. Has the same effect as passing C<<<<<< undef >>>>>> as C<<<<<< $callback >>>>>> to registerFunctionNS.
=item registerFunction
$xpc->registerFunction($name, $callback)
Same as C<<<<<< registerFunctionNS >>>>>> but without a namespace.
=item unregisterFunction
$xpc->unregisterFunction($name)
Same as C<<<<<< unregisterFunctionNS >>>>>> but without a namespace.
=item findnodes
@nodes = $xpc->findnodes($xpath)
@nodes = $xpc->findnodes($xpath, $context_node )
$nodelist = $xpc->findnodes($xpath, $context_node )
Performs the xpath statement on the current node and returns the result as an
array. In scalar context, returns an L<<<<<< XML::LibXML::NodeList >>>>>> object. Optionally, a node may be passed as a second argument to set the
context node for the query.
The xpath expression can be passed either as a string, or as a L<<<<<< XML::LibXML::XPathExpression >>>>>> object.
=item find
$object = $xpc->find($xpath )
$object = $xpc->find($xpath, $context_node )
Performs the xpath expression using the current node as the context of the
expression, and returns the result depending on what type of result the XPath
expression had. For example, the XPath C<<<<<< 1 * 3 + 52 >>>>>> results in an L<<<<<< XML::LibXML::Number >>>>>> object being returned. Other expressions might return a L<<<<<< XML::LibXML::Boolean >>>>>> object, or a L<<<<<< XML::LibXML::Literal >>>>>> object (a string). Each of those objects uses Perl's overload feature to ``do
the right thing'' in different contexts. Optionally, a node may be passed as a
second argument to set the context node for the query.
The xpath expression can be passed either as a string, or as a L<<<<<< XML::LibXML::XPathExpression >>>>>> object.
=item findvalue
$value = $xpc->findvalue($xpath )
$value = $xpc->findvalue($xpath, $context_node )
Is exactly equivalent to:
$xpc->find( $xpath, $context_node )->to_literal;
That is, it returns the literal value of the results. This enables you to
ensure that you get a string back from your search, allowing certain shortcuts.
This could be used as the equivalent of <xsl:value-of select=``some_xpath''/>.
Optionally, a node may be passed in the second argument to set the context node
for the query.
The xpath expression can be passed either as a string, or as a L<<<<<< XML::LibXML::XPathExpression >>>>>> object.
=item exists
$bool = $xpc->exists( $xpath_expression, $context_node );
This method behaves like I<<<<<< findnodes >>>>>>, except that it only returns a boolean value (1 if the expression matches a
node, 0 otherwise) and may be faster than I<<<<<< findnodes >>>>>>, because the XPath evaluation may stop early on the first match (this is true
for libxml2 >= 2.6.27).
For XPath expressions that do not return node-set, the method returns true if
the returned value is a non-zero number or a non-empty string.
=item setContextNode
$xpc->setContextNode($node)
Set the current context node.
=item getContextNode
my $node = $xpc->getContextNode;
Get the current context node.
=item setContextPosition
$xpc->setContextPosition($position)
Set the current context position. By default, this value is -1 (and evaluating
XPath function C<<<<<< position() >>>>>> in the initial context raises an XPath error), but can be set to any value up
to context size. This usually only serves to cheat the XPath engine to return
given position when C<<<<<< position() >>>>>> XPath function is called. Setting this value to -1 restores the default
behavior.
=item getContextPosition
my $position = $xpc->getContextPosition;
Get the current context position.
=item setContextSize
$xpc->setContextSize($size)
Set the current context size. By default, this value is -1 (and evaluating
XPath function C<<<<<< last() >>>>>> in the initial context raises an XPath error), but can be set to any
non-negative value. This usually only serves to cheat the XPath engine to
return the given value when C<<<<<< last() >>>>>> XPath function is called. If context size is set to 0, position is
automatically also set to 0. If context size is positive, position is
automatically set to 1. Setting context size to -1 restores the default
behavior.
=item getContextSize
my $size = $xpc->getContextSize;
Get the current context size.
=item setContextNode
$xpc->setContextNode($node)
Set the current context node.
=back
=head1 BUGS AND CAVEATS
XML::LibXML::XPathContext objects I<<<<<< are >>>>>> reentrant, meaning that you can call methods of an XML::LibXML::XPathContext
even from XPath extension functions registered with the same object or from a
variable lookup function. On the other hand, you should rather avoid
registering new extension functions, namespaces and a variable lookup function
from within extension functions and a variable lookup function, unless you want
to experience untested behavior.
=head1 AUTHORS
Ilya Martynov and Petr Pajas, based on XML::LibXML and XML::LibXSLT code by
Matt Sergeant and Christian Glahn.
=head1 HISTORICAL REMARK
Prior to XML::LibXML 1.61 this module was distributed separately for
maintenance reasons.
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0207
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

View File

@ -0,0 +1,72 @@
=head1 NAME
XML::LibXML::XPathExpression - XML::LibXML::XPathExpression - interface to libxml2 pre-compiled XPath expressions
=head1 SYNOPSIS
use XML::LibXML;
my $compiled_xpath = XML::LibXML::XPathExpression->new('//foo[@bar="baz"][position()<4]');
# interface from XML::LibXML::Node
my $result = $node->find($compiled_xpath);
my @nodes = $node->findnodes($compiled_xpath);
my $value = $node->findvalue($compiled_xpath);
# interface from XML::LibXML::XPathContext
my $result = $xpc->find($compiled_xpath,$node);
my @nodes = $xpc->findnodes($compiled_xpath,$node);
my $value = $xpc->findvalue($compiled_xpath,$node);
$compiled = XML::LibXML::XPathExpression->new( xpath_string );
=head1 DESCRIPTION
This is a perl interface to libxml2's pre-compiled XPath expressions.
Pre-compiling an XPath expression can give in some performance benefit if the
same XPath query is evaluated many times. C<<<<<< XML::LibXML::XPathExpression >>>>>> objects can be passed to all C<<<<<< find... >>>>>> functions C<<<<<< XML::LibXML >>>>>> that expect an XPath expression.
=over 4
=item new()
$compiled = XML::LibXML::XPathExpression->new( xpath_string );
The constructor takes an XPath 1.0 expression as a string and returns an object
representing the pre-compiled expressions (the actual data structure is
internal to libxml2).
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0207
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

1324
perl-libxml-mm.c Normal file

File diff suppressed because it is too large Load Diff

372
perl-libxml-mm.h Normal file
View File

@ -0,0 +1,372 @@
/**
* perl-libxml-mm.h
* $Id$
*
* Basic concept:
* perl varies in the implementation of UTF8 handling. this header (together
* with the c source) implements a few functions, that can be used from within
* the core module in order to avoid cascades of c pragmas
*/
#ifndef __PERL_LIBXML_MM_H__
#define __PERL_LIBXML_MM_H__
#ifdef __cplusplus
extern "C" {
#endif
#include "EXTERN.h"
#include "perl.h"
#include <libxml/parser.h>
#ifdef __cplusplus
}
#endif
/*
* NAME xs_warn
* TYPE MACRO
*
* this makro is for XML::LibXML development and debugging.
*
* SYNOPSIS
* xs_warn("my warning")
*
* this makro takes only a single string(!) and passes it to perls
* warn function if the XS_WARNRINGS pragma is used at compile time
* otherwise any xs_warn call is ignored.
*
* pay attention, that xs_warn does not implement a complete wrapper
* for warn!!
*/
#ifdef XS_WARNINGS
#define xs_warn(string) warn("%s",string)
#else
#define xs_warn(string)
#endif
/*
* @node: Reference to the node the structure proxies
* @owner: libxml defines only the document, but not the node owner
* (in case of document fragments, they are not the same!)
* @count: this is the internal reference count!
* @encoding: this value is missing in libxml2's doc structure
*
* Since XML::LibXML will not know, is a certain node is already
* defined in the perl layer, it can't surely tell when a node can be
* safely be removed from the memory. This structure helps to keep
* track how intense the nodes of a document are used and will not
* delete the nodes unless they are not referred from somewhere else.
*/
struct _ProxyNode {
xmlNodePtr node;
xmlNodePtr owner;
int count;
};
struct _DocProxyNode {
xmlNodePtr node;
xmlNodePtr owner;
int count;
int encoding; /* only used for proxies of xmlDocPtr */
int psvi_status; /* see below ... */
};
/* the psvi_status flag requires some explanation:
each time libxml2 validates a document (using DTD, Schema or
RelaxNG) it stores a pointer to a last successfully applied grammar
rule in node->psvi. Upon next validation, if libxml2 wants to check
that node matches some grammar rule, it first compares the rule
pointer and node->psvi. If these are equal, the validation of the
node's subtree is skipped and the node is assumed to match the
rule.
This causes problems when the tree is modified and then
re-validated or when the schema is freed and the document is
revalidated using a different schema and by bad chance a rule
tested against some node got allocated to the exact same location
as the rule from the schema used for the prior validation, already
freed, but still pointed to by node->psvi).
Thus, the node->psvi values can't be trusted at all and we want to
make sure all psvi slots are NULL before each validation. To aviod
traversing the tree in the most common case, when each document is
validated just once, we maintain the psvi_status flag.
Validating a document triggers this flag (sets it to 1). The
document with psvi_status==1 is traversed and psvi slots are nulled
prior to any validation. When the flag is triggered, it remains
triggered for the rest of the document's life, there is no way to
null it (even nulling up the psvi's does not null the flag, because
there may be unlinked parts of the document floating around which
we don't know about and thus cannot null their psvi pointers; these
unlinked document parts would cause inconsistency when re-attached
to the document tree).
Also, importing a node from a document with psvi_status==1 to a
document with psvi_status==0 automatically triggers psvi_status on
the target document.
NOTE: We could alternatively just null psvis from any imported
subtrees, but that would add an O(n) cleanup operation (n the size
of the imported subtree) on every importNode (possibly needlessly
since the target document may not ever be revalidated) whereas
triggering the flag is O(1) and possibly adds one O(N) cleanup
operation (N the size of the document) to the first validation of
the target document (any subsequent re-validation of the document
would have to perform the operation anyway). The sum of all n's may
be less then N, but OTH, there is a great chance that the O(N)
cleanup will never be performed. (BTW, validation is at least
O(N), probably O(Nlog N) anyway, so the cleanup has little impact;
similarly, importNode does xmlSetTreeDoc which is also O(n). So in
fact, neither solution should have significant performance impact
overall....).
*/
#define Pmm_NO_PSVI 0
#define Pmm_PSVI_TAINTED 1
/* helper type for the proxy structure */
typedef struct _DocProxyNode DocProxyNode;
typedef struct _ProxyNode ProxyNode;
/* pointer to the proxy structure */
typedef ProxyNode* ProxyNodePtr;
typedef DocProxyNode* DocProxyNodePtr;
/* this my go only into the header used by the xs */
#define SvPROXYNODE(x) (INT2PTR(ProxyNodePtr,SvIV(SvRV(x))))
#define PmmPROXYNODE(x) (INT2PTR(ProxyNodePtr,x->_private))
#define SvNAMESPACE(x) (INT2PTR(xmlNsPtr,SvIV(SvRV(x))))
#define PmmREFCNT(node) node->count
#define PmmREFCNT_inc(node) node->count++
#define PmmNODE(xnode) xnode->node
#define PmmOWNER(node) node->owner
#define PmmOWNERPO(node) ((node && PmmOWNER(node)) ? (ProxyNodePtr)PmmOWNER(node)->_private : node)
#define PmmENCODING(node) ((DocProxyNodePtr)(node))->encoding
#define PmmNodeEncoding(node) ((DocProxyNodePtr)(node->_private))->encoding
#define SetPmmENCODING(node,code) PmmENCODING(node)=(code)
#define SetPmmNodeEncoding(node,code) PmmNodeEncoding(node)=(code)
#define PmmInvalidatePSVI(doc) if (doc && doc->_private) ((DocProxyNodePtr)(doc->_private))->psvi_status = Pmm_PSVI_TAINTED;
#define PmmIsPSVITainted(doc) (doc && doc->_private && (((DocProxyNodePtr)(doc->_private))->psvi_status == Pmm_PSVI_TAINTED))
#define PmmClearPSVI(node) if (node && node->doc && node->doc->_private && \
((DocProxyNodePtr)(node->doc->_private))->psvi_status == Pmm_PSVI_TAINTED) \
domClearPSVI((xmlNodePtr) node)
#ifndef NO_XML_LIBXML_THREADS
#ifdef USE_ITHREADS
#define XML_LIBXML_THREADS
#endif
#endif
#ifdef XML_LIBXML_THREADS
/* structure for storing thread-local refcount */
struct _LocalProxyNode {
ProxyNodePtr proxy;
int count;
};
typedef struct _LocalProxyNode LocalProxyNode;
typedef LocalProxyNode* LocalProxyNodePtr;
#define PmmUSEREGISTRY (PROXY_NODE_REGISTRY_MUTEX != NULL)
#define PmmREGISTRY (INT2PTR(xmlHashTablePtr,SvIV(SvRV(get_sv("XML::LibXML::__PROXY_NODE_REGISTRY",0)))))
/* #define PmmREGISTRY (INT2PTR(xmlHashTablePtr,SvIV(SvRV(PROXY_NODE_REGISTRY)))) */
void
PmmCloneProxyNodes();
int
PmmProxyNodeRegistrySize();
void
PmmDumpRegistry(xmlHashTablePtr r);
void
PmmRegistryREFCNT_dec(ProxyNodePtr proxy);
#endif
void
PmmFreeHashTable(xmlHashTablePtr table);
ProxyNodePtr
PmmNewNode(xmlNodePtr node);
ProxyNodePtr
PmmNewFragment(xmlDocPtr document);
SV*
PmmCreateDocNode( unsigned int type, ProxyNodePtr pdoc, ...);
int
PmmREFCNT_dec( ProxyNodePtr node );
SV*
PmmNodeToSv( xmlNodePtr node, ProxyNodePtr owner );
/* PmmFixProxyEncoding
* TYPE
* Method
* PARAMETER
* @dfProxy: The proxystructure to fix.
*
* DESCRIPTION
*
* This little helper allows to fix the proxied encoding information
* after a not standard operation was done. This is required for
* XML::LibXSLT
*/
void
PmmFixProxyEncoding( ProxyNodePtr dfProxy );
/* PmmSvNodeExt
* TYPE
* Function
* PARAMETER
* @perlnode: the perl reference that holds the scalar.
* @copy : copy flag
*
* DESCRIPTION
*
* The function recognizes XML::LibXML and XML::GDOME
* nodes as valid input data. The second parameter 'copy'
* indicates if in case of GDOME nodes the libxml2 node
* should be copied. In some cases, where the node is
* cloned anyways, this flag has to be set to '0', while
* the default value should be allways '1'.
*/
xmlNodePtr
PmmSvNodeExt( SV * perlnode, int copy );
/* PmmSvNode
* TYPE
* Macro
* PARAMETER
* @perlnode: a perl reference that holds a libxml node
*
* DESCRIPTION
*
* PmmSvNode fetches the libxml node such as PmmSvNodeExt does. It is
* a wrapper, that sets the copy always to 1, which is good for all
* cases XML::LibXML uses.
*/
#define PmmSvNode(n) PmmSvNodeExt(n,1)
xmlNodePtr
PmmSvOwner( SV * perlnode );
SV*
PmmSetSvOwner(SV * perlnode, SV * owner );
int
PmmFixOwner(ProxyNodePtr node, ProxyNodePtr newOwner );
void
PmmFixOwnerNode(xmlNodePtr node, ProxyNodePtr newOwner );
int
PmmContextREFCNT_dec( ProxyNodePtr node );
SV*
PmmContextSv( xmlParserCtxtPtr ctxt );
xmlParserCtxtPtr
PmmSvContext( SV * perlctxt );
/**
* NAME PmmCopyNode
* TYPE function
*
* returns libxml2 node
*
* DESCRIPTION
* This function implements a nodetype independent node cloning.
*
* Note that this function has to stay in this module, since
* XML::LibXSLT reuses it.
*/
xmlNodePtr
PmmCloneNode( xmlNodePtr node , int deep );
/**
* NAME PmmNodeToGdomeSv
* TYPE function
*
* returns XML::GDOME node
*
* DESCRIPTION
* creates an Gdome node from our XML::LibXML node.
* this function is very useful for the parser.
*
* the function will only work, if XML::LibXML is compiled with
* XML::GDOME support.
*
*/
SV *
PmmNodeToGdomeSv( xmlNodePtr node );
/**
* NAME PmmNodeTypeName
* TYPE function
*
* returns the perl class name for the given node
*
* SYNOPSIS
* CLASS = PmmNodeTypeName( node );
*/
const char*
PmmNodeTypeName( xmlNodePtr elem );
xmlChar*
PmmEncodeString( const char *encoding, const xmlChar *string, STRLEN len );
char*
PmmDecodeString( const char *encoding, const xmlChar *string, STRLEN* len);
/* string manipulation will go elsewhere! */
/*
* NAME c_string_to_sv
* TYPE function
* SYNOPSIS
* SV *my_sv = c_string_to_sv( "my string", encoding );
*
* this function converts a libxml2 string to a SV*. although the
* string is copied, the func does not free the c-string for you!
*
* encoding is either NULL or a encoding string such as provided by
* the documents encoding. if encoding is NULL UTF8 is assumed.
*
*/
SV*
C2Sv( const xmlChar *string, const xmlChar *encoding );
/*
* NAME sv_to_c_string
* TYPE function
* SYNOPSIS
* SV *my_sv = sv_to_c_string( my_sv, encoding );
*
* this function converts a SV* to a libxml string. the SV-value will
* be copied into a *newly* allocated string. (don't forget to free it!)
*
* encoding is either NULL or a encoding string such as provided by
* the documents encoding. if encoding is NULL UTF8 is assumed.
*
*/
xmlChar *
Sv2C( SV* scalar, const xmlChar *encoding );
SV*
nodeC2Sv( const xmlChar * string, xmlNodePtr refnode );
xmlChar *
nodeSv2C( SV * scalar, xmlNodePtr refnode );
#endif

1685
perl-libxml-sax.c Normal file

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More