Import Upstream version 1.7
This commit is contained in:
commit
c6ae72187b
|
@ -0,0 +1,57 @@
|
|||
Revision history for Perl extension Text::Iconv.
|
||||
|
||||
0.01 Sat Aug 23 16:14:12 1997
|
||||
- original version; created by h2xs 1.18
|
||||
|
||||
1.0 Sun Feb 27 16:50:11 MET 2000
|
||||
- renamed to Text::Iconv.
|
||||
|
||||
1.1 Mon Dec 18 00:50:45 MET 2000
|
||||
- works with Perl 5.6
|
||||
- improved and expanded test script
|
||||
- fixes core dumps with aborted conversions
|
||||
- adds experimental class attribute/method raise_error
|
||||
|
||||
1.2 Fri Jul 27 10:46:56 METDST 2001
|
||||
- converting undef now always returns undef
|
||||
- since HP-UX seems to be the only platform where the second
|
||||
argument to iconv() is *not* const char**, put an #ifdef
|
||||
into Iconv.xs
|
||||
|
||||
1.3 Mon Jun 28 19:25:43 CEST 2004
|
||||
- Makefile.PL now tries to detect the need for -liconv
|
||||
- added retval() method to Text::Iconv objects, which returns the
|
||||
iconv() return value (according to the Single UNIX Specification,
|
||||
"the number of non-identical conversions performed")
|
||||
- to make this possible, Text::Iconv objects are--on the C level--no
|
||||
longer just the iconv handle, but a struct (which can contain other
|
||||
information besides the handle)
|
||||
|
||||
1.4 Sun Jul 18 00:09:21 CEST 2004
|
||||
- Added instance attribute raise_error and corresponding
|
||||
method to control exception raising on a per-object basis.
|
||||
- Reset shift state for state-dependent conversions.
|
||||
- Makefile.PL now passes all options besides LIBS and INC to
|
||||
MakeMaker, so options like INSTALLDIRS=vendor work again.
|
||||
|
||||
1.5 Thu Aug 30 14:42:08 CEST 2007
|
||||
- Various small changes to make Text::Iconv compile on OpenVMS
|
||||
(based on a patch by Peter (Stig) Edwards)
|
||||
- Output buffer was always initialized to 5 bytes--this was
|
||||
just for testing, but I'd forgotten to revert back to the
|
||||
original code (detected via a bug report by Aldo LeTellier).
|
||||
|
||||
1.6 Sat Oct 13 00:16:30 CEST 2007
|
||||
- Makefile.PL now uses File::Spec to portably specify the null
|
||||
device. This should make it possible to build Text::Iconv
|
||||
on Windows "out of the box".
|
||||
- Added get_attr() and set_attr() methods. They provide an
|
||||
interface to the iconvctl() function of GNU libiconv.
|
||||
|
||||
1.7 Wed Oct 17 15:49:56 CEST 2007
|
||||
- Makefile.PL now explicitly declares "use 5.006;" since it
|
||||
uses functionality like delete() for array elements.
|
||||
- In Iconv.xs, moved the variable declarations in
|
||||
ti_set_attr() and ti_get_attr() into the PREINIT: section.
|
||||
This makes it possible to compile it using gcc 2.96.
|
||||
|
|
@ -0,0 +1,160 @@
|
|||
package Text::Iconv;
|
||||
# @(#) $Id: Iconv.pm,v 1.10 2007/10/17 14:14:22 mxp Exp $
|
||||
# Copyright (c) 2007 Michael Piotrowski
|
||||
|
||||
use strict;
|
||||
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
|
||||
|
||||
require Exporter;
|
||||
require DynaLoader;
|
||||
require AutoLoader;
|
||||
|
||||
@ISA = qw(Exporter AutoLoader DynaLoader);
|
||||
# Items to export into callers namespace by default. Note: do not export
|
||||
# names by default without a very good reason. Use EXPORT_OK instead.
|
||||
# Do not simply export all your public functions/methods/constants.
|
||||
@EXPORT_OK = qw(
|
||||
convert
|
||||
);
|
||||
$VERSION = '1.7';
|
||||
|
||||
bootstrap Text::Iconv $VERSION;
|
||||
|
||||
# Preloaded methods go here.
|
||||
|
||||
# Autoload methods go after =cut, and are processed by the autosplit program.
|
||||
|
||||
1;
|
||||
__END__
|
||||
# Below is the documentation for the module.
|
||||
|
||||
=head1 NAME
|
||||
|
||||
Text::Iconv - Perl interface to iconv() codeset conversion function
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
use Text::Iconv;
|
||||
$converter = Text::Iconv->new("fromcode", "tocode");
|
||||
$converted = $converter->convert("Text to convert");
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
The B<Text::Iconv> module provides a Perl interface to the iconv()
|
||||
function as defined by the Single UNIX Specification.
|
||||
|
||||
The convert() method converts the encoding of characters in the input
|
||||
string from the I<fromcode> codeset to the I<tocode> codeset, and
|
||||
returns the result.
|
||||
|
||||
Settings of I<fromcode> and I<tocode> and their permitted combinations
|
||||
are implementation-dependent. Valid values are specified in the
|
||||
system documentation; the iconv(1) utility should also provide a B<-l>
|
||||
option that lists all supported codesets.
|
||||
|
||||
=head2 Utility methods
|
||||
|
||||
B<Text::Iconv> objects also provide the following methods:
|
||||
|
||||
retval() returns the return value of the underlying iconv() function
|
||||
for the last conversion; according to the Single UNIX Specification,
|
||||
this value indicates "the number of non-identical conversions
|
||||
performed." Note, however, that iconv implementations vary widely in
|
||||
the interpretation of this specification.
|
||||
|
||||
This method can be called after calling convert(), e.g.:
|
||||
|
||||
$result = $converter->convert("lorem ipsum dolor sit amet");
|
||||
$retval = $converter->retval;
|
||||
|
||||
When called before the first call to convert(), or if an error occured
|
||||
during the conversion, retval() returns B<undef>.
|
||||
|
||||
get_attr(): This method is only available with GNU libiconv, otherwise
|
||||
it throws an exception. The get_attr() method allows you to query
|
||||
various attributes which influence the behavior of convert(). The
|
||||
currently supported attributes are I<trivialp>, I<transliterate>, and
|
||||
I<discard_ilseq>, e.g.:
|
||||
|
||||
$state = $converter->get_attr("transliterate");
|
||||
|
||||
See iconvctl(3) for details. To ensure portability to other iconv
|
||||
implementations you should first check for the availability of this
|
||||
method using B<eval {}>, e.g.:
|
||||
|
||||
eval { $conv->get_attr("trivialp") };
|
||||
if ($@)
|
||||
{
|
||||
# get_attr() is not available
|
||||
}
|
||||
else
|
||||
{
|
||||
# get_attr() is available
|
||||
}
|
||||
|
||||
This method should be considered experimental.
|
||||
|
||||
set_attr(): This method is only available with GNU libiconv, otherwise
|
||||
it throws an exception. The set_attr() method allows you to set
|
||||
various attributes which influence the behavior of convert(). The
|
||||
currently supported attributes are I<transliterate> and
|
||||
I<discard_ilseq>, e.g.:
|
||||
|
||||
$state = $converter->set_attr("transliterate");
|
||||
|
||||
See iconvctl(3) for details. To ensure portability to other iconv
|
||||
implementations you should first check for the availability of this
|
||||
method using B<eval {}>, cf. the description of set_attr() above.
|
||||
|
||||
This method should be considered experimental.
|
||||
|
||||
=head1 ERRORS
|
||||
|
||||
If the conversion can't be initialized an exception is raised (using
|
||||
croak()).
|
||||
|
||||
=head2 Handling of conversion errors
|
||||
|
||||
I<Text::Iconv> provides a class attribute B<raise_error> and a
|
||||
corresponding class method for setting and getting its value. The
|
||||
handling of errors during conversion depends on the setting of this
|
||||
attribute. If B<raise_error> is set to a true value, an exception is
|
||||
raised; otherwise, the convert() method only returns B<undef>. By
|
||||
default B<raise_error> is false. Example usage:
|
||||
|
||||
Text::Iconv->raise_error(1); # Conversion errors raise exceptions
|
||||
Text::Iconv->raise_error(0); # Conversion errors return undef
|
||||
$a = Text::Iconv->raise_error(); # Get current setting
|
||||
|
||||
=head2 Per-object handling of conversion errors
|
||||
|
||||
As an experimental feature, I<Text::Iconv> also provides an instance
|
||||
attribute B<raise_error> and a corresponding method for setting and
|
||||
getting its value. If B<raise_error> is B<undef>, the class-wide
|
||||
settings apply. If B<raise_error> is 1 or 0 (true or false), the
|
||||
object settings override the class-wide settings.
|
||||
|
||||
Consult L<iconv(3)> for details on errors that might occur.
|
||||
|
||||
=head2 Conversion of B<undef>
|
||||
|
||||
Converting B<undef>, e.g.,
|
||||
|
||||
$converted = $converter->convert(undef);
|
||||
|
||||
always returns B<undef>. This is not considered an error.
|
||||
|
||||
=head1 NOTES
|
||||
|
||||
The supported codesets, their names, the supported conversions, and
|
||||
the quality of the conversions are all system-dependent.
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Michael Piotrowski <mxp@dynalabs.de>
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
iconv(1), iconv(3)
|
||||
|
||||
=cut
|
|
@ -0,0 +1,400 @@
|
|||
/* $Id: Iconv.xs,v 1.15 2007/10/17 14:06:22 mxp Exp $ */
|
||||
/* XSUB for Perl module Text::Iconv */
|
||||
/* Copyright (c) 2007 Michael Piotrowski */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
#include "EXTERN.h"
|
||||
#include "perl.h"
|
||||
#include "XSUB.h"
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#include <iconv.h>
|
||||
|
||||
/*****************************************************************************/
|
||||
/* This struct represents a Text::Iconv object */
|
||||
|
||||
struct tiobj
|
||||
{
|
||||
iconv_t handle; /* iconv handle (returned by iconv_open()) */
|
||||
SV *retval; /* iconv() return value (according to the Single UNIX
|
||||
Specification, "the number of non-identical
|
||||
conversions performed") */
|
||||
SV *raise_error; /* Per-object flag controlling whether exceptions
|
||||
are to be thrown */
|
||||
};
|
||||
|
||||
/*****************************************************************************/
|
||||
|
||||
static int
|
||||
not_here(s)
|
||||
char *s;
|
||||
{
|
||||
croak("%s not implemented on this architecture", s);
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int raise_error = 0;
|
||||
|
||||
/* Macro for checking when to throw an exception for use in the
|
||||
do_conv() function. The logic is: Throw an exception IF
|
||||
obj->raise_error is undef AND raise_error is true OR IF
|
||||
obj->raise_error is true */
|
||||
#define RAISE_ERROR_P (!SvOK(obj->raise_error) && raise_error) \
|
||||
|| SvTRUE(obj->raise_error)
|
||||
|
||||
SV *do_conv(struct tiobj *obj, SV *string)
|
||||
{
|
||||
char *ibuf; /* char* to the content of SV *string */
|
||||
char *obuf; /* temporary output buffer */
|
||||
size_t inbytesleft; /* no. of bytes left to convert; initially
|
||||
this is the length of the input string,
|
||||
and 0 when the conversion has finished */
|
||||
size_t outbytesleft; /* no. of bytes in the output buffer */
|
||||
size_t l_obuf; /* length of the output buffer */
|
||||
char *icursor; /* current position in the input buffer */
|
||||
/* The Single UNIX Specification (version 1 and version 2), as well
|
||||
as the HP-UX documentation from which the XPG iconv specs are
|
||||
derived, are unclear about the type of the second argument to
|
||||
iconv() (here called icursor): The manpages say const char **,
|
||||
while the header files say char **. */
|
||||
char *ocursor; /* current position in the output buffer */
|
||||
size_t ret; /* iconv() return value */
|
||||
SV *perl_str; /* Perl return string */
|
||||
|
||||
/* Check if the input string is actually `defined'; otherwise
|
||||
simply return undef. This is not considered an error. */
|
||||
|
||||
if (! SvOK(string))
|
||||
{
|
||||
return(&PL_sv_undef);
|
||||
}
|
||||
|
||||
perl_str = newSVpv("", 0);
|
||||
|
||||
/* Get length of input string. That's why we take an SV* instead
|
||||
of a char*: This way we can convert UCS-2 strings because we
|
||||
know their length. */
|
||||
|
||||
inbytesleft = SvCUR(string);
|
||||
ibuf = SvPV(string, inbytesleft);
|
||||
|
||||
/* Calculate approximate amount of memory needed for the temporary
|
||||
output buffer and reserve the memory. The idea is to choose it
|
||||
large enough from the beginning to reduce the number of copy
|
||||
operations when converting from a single-byte to a multibyte
|
||||
encoding. */
|
||||
|
||||
if(inbytesleft <= MB_LEN_MAX)
|
||||
{
|
||||
outbytesleft = MB_LEN_MAX + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
outbytesleft = 2 * inbytesleft;
|
||||
}
|
||||
|
||||
l_obuf = outbytesleft;
|
||||
|
||||
New(0, obuf, outbytesleft, char); /* Perl malloc */
|
||||
if (obuf == NULL)
|
||||
{
|
||||
croak("New: %s", strerror(errno));
|
||||
}
|
||||
|
||||
/**************************************************************************/
|
||||
|
||||
icursor = ibuf;
|
||||
ocursor = obuf;
|
||||
|
||||
/**************************************************************************/
|
||||
|
||||
while(inbytesleft != 0)
|
||||
{
|
||||
#if (defined(__hpux) || defined(__linux) || defined(VMS)) && ! defined(_LIBICONV_VERSION)
|
||||
/* Even in HP-UX 11.00, documentation and header files do not agree */
|
||||
/* glibc doesn't seem care too much about standards */
|
||||
ret = iconv(obj->handle, &icursor, &inbytesleft,
|
||||
&ocursor, &outbytesleft);
|
||||
#else
|
||||
ret = iconv(obj->handle, (const char **)&icursor, &inbytesleft,
|
||||
&ocursor, &outbytesleft);
|
||||
#endif
|
||||
|
||||
if(ret == (size_t) -1)
|
||||
{
|
||||
obj->retval = &PL_sv_undef;
|
||||
|
||||
switch(errno)
|
||||
{
|
||||
case EILSEQ:
|
||||
/* Stop conversion if input character encountered which
|
||||
does not belong to the input char set */
|
||||
if (RAISE_ERROR_P)
|
||||
croak("Character not from source char set: %s",
|
||||
strerror(errno));
|
||||
Safefree(obuf);
|
||||
/* INIT_SHIFT_STATE(obj->handle, ocursor, outbytesleft); */
|
||||
return(&PL_sv_undef);
|
||||
case EINVAL:
|
||||
/* Stop conversion if we encounter an incomplete
|
||||
character or shift sequence */
|
||||
if (RAISE_ERROR_P)
|
||||
croak("Incomplete character or shift sequence: %s",
|
||||
strerror(errno));
|
||||
Safefree(obuf);
|
||||
return(&PL_sv_undef);
|
||||
case E2BIG:
|
||||
/* fprintf(stdout, "%s\n", obuf); */
|
||||
|
||||
/* If the output buffer is not large enough, copy the
|
||||
converted bytes to the return string, reset the
|
||||
output buffer and continue */
|
||||
sv_catpvn(perl_str, obuf, l_obuf - outbytesleft);
|
||||
ocursor = obuf;
|
||||
outbytesleft = l_obuf;
|
||||
break;
|
||||
default:
|
||||
if (RAISE_ERROR_P)
|
||||
croak("iconv error: %s", strerror(errno));
|
||||
Safefree(obuf);
|
||||
return(&PL_sv_undef);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
obj->retval = newSViv(ret);
|
||||
}
|
||||
}
|
||||
|
||||
/* For state-dependent encodings, place conversion descriptor into
|
||||
initial shift state and place the byte sequence to change the
|
||||
output buffer to its initial shift state.
|
||||
|
||||
The only (documented) error for this use of iconv() is E2BIG;
|
||||
here it could happen only if the output buffer has no more room
|
||||
for the reset sequence. We can simply prevent this case by
|
||||
copying its content to the return string before calling iconv()
|
||||
(just like when E2BIG happens during the "normal" use of
|
||||
iconv(), see above). This adds the (slight, I'd guess) overhead
|
||||
of an additional call to sv_catpvn(), but it makes the code much
|
||||
cleaner.
|
||||
|
||||
Note: Since we currently don't return incomplete conversion
|
||||
results in case of EINVAL and EILSEQ, we don't have to care
|
||||
about the shift state there. If we did return the results in
|
||||
these cases, we'd also have to reset the shift state there.
|
||||
*/
|
||||
|
||||
sv_catpvn(perl_str, obuf, l_obuf - outbytesleft);
|
||||
ocursor = obuf;
|
||||
outbytesleft = l_obuf;
|
||||
|
||||
if((ret = iconv(obj->handle, NULL, NULL, &ocursor, &outbytesleft))
|
||||
== (size_t) -1)
|
||||
{
|
||||
croak("iconv error (while trying to reset shift state): %s",
|
||||
strerror(errno));
|
||||
Safefree(obuf);
|
||||
return(&PL_sv_undef);
|
||||
}
|
||||
|
||||
/* Copy the converted bytes to the return string, and free the
|
||||
output buffer */
|
||||
|
||||
sv_catpvn(perl_str, obuf, l_obuf - outbytesleft);
|
||||
Safefree(obuf); /* Perl malloc */
|
||||
|
||||
return perl_str;
|
||||
}
|
||||
|
||||
/* */
|
||||
|
||||
#if _LIBICONV_VERSION >= 0x0109
|
||||
int do_iconvctl(struct tiobj *obj, int request, void *arg)
|
||||
{
|
||||
return iconvctl(obj->handle, request, arg);
|
||||
}
|
||||
#endif
|
||||
|
||||
typedef struct tiobj Text__Iconv;
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Perl interface */
|
||||
|
||||
MODULE = Text::Iconv PACKAGE = Text::Iconv
|
||||
|
||||
PROTOTYPES: ENABLE
|
||||
|
||||
int
|
||||
raise_error(...)
|
||||
CODE:
|
||||
if (items > 0 && SvIOK(ST(0))) /* if called as function */
|
||||
raise_error = SvIV(ST(0));
|
||||
if (items > 1 && SvIOK(ST(1))) /* if called as class method */
|
||||
raise_error = SvIV(ST(1));
|
||||
RETVAL = raise_error;
|
||||
OUTPUT:
|
||||
RETVAL
|
||||
|
||||
Text::Iconv *
|
||||
new(self, fromcode, tocode)
|
||||
char *fromcode
|
||||
char *tocode
|
||||
CODE:
|
||||
iconv_t handle;
|
||||
Text__Iconv *obj;
|
||||
|
||||
if ((handle = iconv_open(tocode, fromcode)) == (iconv_t)-1)
|
||||
{
|
||||
switch(errno)
|
||||
{
|
||||
case ENOMEM:
|
||||
croak("Insufficient memory to initialize conversion: %s",
|
||||
strerror(errno));
|
||||
case EINVAL:
|
||||
croak("Unsupported conversion from %s to %s: %s",
|
||||
fromcode, tocode, strerror(errno));
|
||||
default:
|
||||
croak("Couldn't initialize conversion: %s", strerror(errno));
|
||||
}
|
||||
}
|
||||
|
||||
Newz(0, obj, 1, Text__Iconv);
|
||||
if (obj == NULL)
|
||||
{
|
||||
croak("Newz: %s", strerror(errno));
|
||||
}
|
||||
|
||||
obj->handle = handle;
|
||||
obj->retval = &PL_sv_undef;
|
||||
obj->raise_error = newSViv(0);
|
||||
sv_setsv(obj->raise_error, &PL_sv_undef);
|
||||
RETVAL = obj;
|
||||
OUTPUT:
|
||||
RETVAL
|
||||
|
||||
MODULE = Text::Iconv PACKAGE = Text::IconvPtr PREFIX = ti_
|
||||
|
||||
SV *
|
||||
ti_convert(self, string)
|
||||
Text::Iconv *self
|
||||
SV *string
|
||||
CODE:
|
||||
RETVAL = do_conv(self, string);
|
||||
OUTPUT:
|
||||
RETVAL
|
||||
|
||||
SV *
|
||||
ti_retval(self)
|
||||
Text::Iconv *self
|
||||
CODE:
|
||||
RETVAL = self->retval;
|
||||
OUTPUT:
|
||||
RETVAL
|
||||
|
||||
SV *
|
||||
ti_raise_error(self, ...)
|
||||
Text::Iconv *self
|
||||
PPCODE:
|
||||
if (items > 1 && SvIOK(ST(1)))
|
||||
{
|
||||
sv_setiv(self->raise_error, SvIV(ST(1)));
|
||||
}
|
||||
XPUSHs(sv_mortalcopy(self->raise_error));
|
||||
|
||||
#if _LIBICONV_VERSION >= 0x0109
|
||||
|
||||
int
|
||||
ti_get_attr(self, request)
|
||||
Text::Iconv *self
|
||||
char *request;
|
||||
PREINIT:
|
||||
int reqno;
|
||||
int arg;
|
||||
int err;
|
||||
CODE:
|
||||
if (strEQ(request, "trivialp"))
|
||||
reqno = ICONV_TRIVIALP;
|
||||
else if (strEQ(request, "transliterate"))
|
||||
reqno = ICONV_GET_TRANSLITERATE;
|
||||
else if (strEQ(request, "discard_ilseq"))
|
||||
reqno = ICONV_GET_DISCARD_ILSEQ;
|
||||
else
|
||||
reqno = -1;
|
||||
|
||||
err = do_iconvctl(self, reqno, &arg);
|
||||
|
||||
if (err < 0)
|
||||
RETVAL = err;
|
||||
else
|
||||
RETVAL = arg;
|
||||
OUTPUT:
|
||||
RETVAL
|
||||
|
||||
#else
|
||||
|
||||
int
|
||||
ti_get_attr(self, request)
|
||||
Text::Iconv *self
|
||||
char *request;
|
||||
CODE:
|
||||
not_here("iconvctl (needed for get_attr())");
|
||||
RETVAL = -1;
|
||||
OUTPUT:
|
||||
RETVAL
|
||||
|
||||
#endif
|
||||
|
||||
#if _LIBICONV_VERSION >= 0x0109
|
||||
|
||||
int
|
||||
ti_set_attr(self, request, arg)
|
||||
Text::Iconv *self
|
||||
char *request;
|
||||
int arg;
|
||||
PREINIT:
|
||||
int reqno;
|
||||
int err;
|
||||
CODE:
|
||||
if (strEQ(request, "transliterate"))
|
||||
reqno = ICONV_SET_TRANSLITERATE;
|
||||
else if (strEQ(request, "discard_ilseq"))
|
||||
reqno = ICONV_SET_DISCARD_ILSEQ;
|
||||
else
|
||||
reqno = -1;
|
||||
|
||||
err = do_iconvctl(self, reqno, &arg);
|
||||
|
||||
if (err < 0)
|
||||
RETVAL = err;
|
||||
else
|
||||
RETVAL = arg;
|
||||
OUTPUT:
|
||||
RETVAL
|
||||
|
||||
#else
|
||||
|
||||
int
|
||||
ti_set_attr(self, request, arg)
|
||||
Text::Iconv *self
|
||||
char *request;
|
||||
int arg;
|
||||
CODE:
|
||||
not_here("iconvctl (needed for set_attr())");
|
||||
RETVAL = -1;
|
||||
OUTPUT:
|
||||
RETVAL
|
||||
|
||||
#endif
|
||||
|
||||
void
|
||||
ti_DESTROY(self)
|
||||
Text::Iconv * self
|
||||
CODE:
|
||||
/* printf("Now in Text::Iconv::DESTROY\n"); */
|
||||
(void) iconv_close(self->handle);
|
||||
Safefree(self);
|
|
@ -0,0 +1,10 @@
|
|||
Changes
|
||||
Iconv.pm
|
||||
Iconv.xs
|
||||
MANIFEST
|
||||
Makefile.PL
|
||||
README
|
||||
typemap
|
||||
t/00_load.t
|
||||
t/01_charsets.t
|
||||
META.yml Module meta-data (added by MakeMaker)
|
|
@ -0,0 +1,10 @@
|
|||
# http://module-build.sourceforge.net/META-spec.html
|
||||
#XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX#
|
||||
name: Text-Iconv
|
||||
version: 1.7
|
||||
version_from: Iconv.pm
|
||||
installdirs: site
|
||||
requires:
|
||||
|
||||
distribution_type: module
|
||||
generated_by: ExtUtils::MakeMaker version 6.30
|
|
@ -0,0 +1,162 @@
|
|||
# @(#) $Id: Makefile.PL,v 1.9 2007/10/17 14:06:22 mxp Exp $
|
||||
|
||||
use 5.006;
|
||||
use ExtUtils::MakeMaker;
|
||||
use Config;
|
||||
use File::Spec;
|
||||
|
||||
my %config;
|
||||
my $ok;
|
||||
my $devnull = File::Spec->devnull();
|
||||
|
||||
###############################################################################
|
||||
# Read settings from the commandline
|
||||
# We must delete the options we're handling ourselves to keep
|
||||
# MakeMaker from processing them, but the rest should be preserved so
|
||||
# that we get the default MakeMaker behavior.
|
||||
|
||||
my $i = 0;
|
||||
|
||||
while ($i <= $#ARGV)
|
||||
{
|
||||
my ($key, $val) = split(/=/, $ARGV[$i], 2);
|
||||
$config{$key} = $val;
|
||||
|
||||
if ($key eq 'LIBS' || $key eq 'INC')
|
||||
{
|
||||
delete $ARGV[$i];
|
||||
}
|
||||
|
||||
$i++;
|
||||
}
|
||||
|
||||
###############################################################################
|
||||
# Check for iconv.
|
||||
|
||||
if ($config{LIBS} or $config{INC})
|
||||
{
|
||||
print "Your settings:\n",
|
||||
" LIBS: ", $config{LIBS}, "\n", " INC: ", $config{INC}, "\n";
|
||||
}
|
||||
|
||||
print 'Checking for iconv ... ';
|
||||
|
||||
if (linktest($config{LIBS}, $config{INC}))
|
||||
{
|
||||
$ok = 1;
|
||||
print "ok (iconv apparently in libc)\n";
|
||||
}
|
||||
elsif ($config{LIBS} !~ /-liconv/)
|
||||
{
|
||||
$config{LIBS} .= ' -liconv';
|
||||
|
||||
if (linktest($config{LIBS}, $config{INC}))
|
||||
{
|
||||
$ok = 1;
|
||||
print "ok (added -liconv)\n";
|
||||
}
|
||||
}
|
||||
|
||||
if ($ok)
|
||||
{
|
||||
print <<EOT;
|
||||
|
||||
NOTE: If you have multiple iconv implementations installed, you might
|
||||
want to make sure that I've found the one you want to use.
|
||||
If necessary, you can explicitly specify paths like this:
|
||||
|
||||
$^X Makefile.PL LIBS='-L/path/to/lib' INC='-I/path/to/include'
|
||||
|
||||
EOT
|
||||
}
|
||||
else
|
||||
{
|
||||
print "fail\n";
|
||||
|
||||
print "Failed to find iconv, please check your settings and re-run as:\n";
|
||||
print "$^X Makefile.PL LIBS='-L/path/to/lib' INC='-I/path/to/include'\n";
|
||||
exit 1;
|
||||
}
|
||||
|
||||
###############################################################################
|
||||
# Write the makefile
|
||||
|
||||
WriteMakefile(
|
||||
'NAME' => 'Text::Iconv',
|
||||
'VERSION_FROM' => 'Iconv.pm', # finds $VERSION
|
||||
'PREREQ_PM' => {}, # e.g., Module::Name => 1.1
|
||||
($] >= 5.005 ? ## Add these new keywords supported since 5.005
|
||||
(ABSTRACT_FROM => 'Iconv.pm', # retrieve abstract from module
|
||||
AUTHOR => 'Michael Piotrowski <mxp@dynalabs.de>') : ()),
|
||||
'LIBS' => $config{LIBS},
|
||||
'DEFINE' => "@DEFINE",
|
||||
'INC' => $config{INC},
|
||||
'dist' => {COMPRESS => 'gzip', SUFFIX => 'gz'},
|
||||
);
|
||||
|
||||
###############################################################################
|
||||
|
||||
sub linktest
|
||||
{
|
||||
my $libs = shift;
|
||||
my $incs = shift;
|
||||
|
||||
my $file = 'linktest';
|
||||
my $obj_ext = $Config{_o};
|
||||
|
||||
my $prog = <<EOT;
|
||||
#include <iconv.h>
|
||||
|
||||
int main(void)
|
||||
{
|
||||
(void) iconv_open("", "");
|
||||
}
|
||||
EOT
|
||||
|
||||
my $compile;
|
||||
|
||||
unless ($^O eq 'VMS')
|
||||
{
|
||||
# It is admittedly a bit simplistic to simply concatenate all
|
||||
# flags, but it seems to work in most cases.
|
||||
$compile = join ' ', $Config{cc}, $incs, $Config{ccflags},
|
||||
$Config{ldflags}, $libs;
|
||||
}
|
||||
else
|
||||
{
|
||||
$compile = join ' ', $Config{cc}, $incs, $Config{ccflags}, $libs;
|
||||
}
|
||||
|
||||
if (exists $config{verbose})
|
||||
{
|
||||
print "\nCompiler: '$compile'\n";
|
||||
}
|
||||
|
||||
open LINKTEST, '>', "$file.c" or die "Can't create test file '$file.c'.";
|
||||
print LINKTEST $prog;
|
||||
close LINKTEST;
|
||||
|
||||
my $compile_line = "$compile -o $file $file.c $libs 2> $devnull";
|
||||
|
||||
if ($^O eq 'VMS')
|
||||
{
|
||||
$compile_line = "pipe $compile $file.c $libs 2> NL:";
|
||||
}
|
||||
|
||||
if (exists $config{verbose})
|
||||
{
|
||||
print "\nCompiler command line: '$compile_line'\n";
|
||||
}
|
||||
|
||||
my $result = system($compile_line) / 256;
|
||||
unlink $file, "$file.c", "$file$obj_ext";
|
||||
|
||||
if ($result == 0)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,98 @@
|
|||
Text::Iconv Version 1.6
|
||||
|
||||
Copyright © 2007 Michael Piotrowski. All Rights Reserved.
|
||||
|
||||
This library is free software; you can redistribute it and/or modify
|
||||
it under the same terms as Perl itself.
|
||||
|
||||
Description
|
||||
-----------
|
||||
|
||||
This module provides a Perl interface to the iconv() codeset
|
||||
conversion function, as defined by the Single UNIX Specification. For
|
||||
more details see the POD documentation embedded in the file Iconv.pm,
|
||||
which will also be installed as Text::Iconv(3) man page.
|
||||
|
||||
Prerequisites
|
||||
-------------
|
||||
|
||||
Your system should be compliant with the Single UNIX Specification or
|
||||
at least provide iconv_open(), iconv() and iconv_close() functions
|
||||
compliant with it.
|
||||
|
||||
Note that the possible conversions and the quality of the conversions
|
||||
depend on the available iconv conversion tables and algorithms, which
|
||||
are in most cases supplied by the operating system vendor. Some
|
||||
systems also allow you to build your own tables (e.g., HP-UX, Tru64
|
||||
UNIX, and AIX provide the genxlt(1) command).
|
||||
|
||||
It is also possible to use a separate iconv library such as GNU
|
||||
libiconv package, just make sure that the desired library is found
|
||||
(see <http://www.gnu.org/directory/localization/libiconv.html>).
|
||||
|
||||
Building the module
|
||||
-------------------
|
||||
|
||||
Since the Single UNIX Specification only specifies an interface,
|
||||
several issues are left to the implementation to define, namely:
|
||||
|
||||
1. The supported codesets
|
||||
2. The supported conversions
|
||||
3. The names for the supported codesets
|
||||
|
||||
Please check your system documentation for the above points.
|
||||
|
||||
The module can be built using this sequence of commands:
|
||||
|
||||
perl Makefile.PL
|
||||
make
|
||||
make test
|
||||
|
||||
On some systems, the iconv functions are in libc; on others (and when
|
||||
you want to use a separate iconv library, such as GNU libiconv), you
|
||||
have to link with -liconv. Makefile.PL tries to automatically detect
|
||||
this. However, if your iconv library is in a non-standard path, you
|
||||
might have to help it. You can specify library and include paths like
|
||||
this:
|
||||
|
||||
perl Makefile.PL LIBS='-L/path/to/lib' INC='-I/path/to/include
|
||||
|
||||
You might also have to do this if you have multiple iconv
|
||||
implementations installed and want to use a specific one.
|
||||
|
||||
The purpose of the test scripts (run by "make test") is to check if an
|
||||
iconv library can be found, and if the iconv functions can be called.
|
||||
It is neither intended to find out which conversions are supported,
|
||||
nor to test the quality of the conversions provided by your iconv
|
||||
library. However, since the iconv functions can only be tested by
|
||||
calling them, which in turn requires the specification of codesets to
|
||||
be converted. The test scripts try some conversions which seem to be
|
||||
widely supported. Conversions not supported by your iconv
|
||||
implementation are skipped, but for the purpose outlined above this is
|
||||
no problem, as it shows that the iconv functions are working. There
|
||||
is no standard way to find out which conversions are supported by an
|
||||
implementation. This is also the reason why Text::Iconv can't provide
|
||||
platform-independent support for codeset identifiers (even if it it
|
||||
would be nice to have).
|
||||
|
||||
Installation
|
||||
------------
|
||||
|
||||
make install
|
||||
|
||||
Feedback
|
||||
--------
|
||||
|
||||
Text::Iconv was tested on various platforms and with different iconv
|
||||
implementations.
|
||||
|
||||
If you built Text::Iconv on a platform where you needed to specify
|
||||
additional libraries, or where none of the codeset identifiers in the
|
||||
test scripts are supported, please drop me a note.
|
||||
|
||||
Thanks
|
||||
------
|
||||
|
||||
Thanks go to all those who reported bugs and suggested features.
|
||||
|
||||
Michael Piotrowski <mxp@dynalabs.de>
|
|
@ -0,0 +1,9 @@
|
|||
BEGIN { $| = 1; print "1..1\n"; }
|
||||
END {print "not ok 1\n" unless $loaded;}
|
||||
use Text::Iconv;
|
||||
$loaded = 1;
|
||||
print "ok 1\n";
|
||||
|
||||
### Local variables:
|
||||
### mode: perl
|
||||
### End:
|
|
@ -0,0 +1,120 @@
|
|||
# @(#) $Id: 01_charsets.t,v 1.4 2007/10/12 21:38:01 mxp Exp $
|
||||
# -*- encoding: iso-8859-1 -*-
|
||||
|
||||
BEGIN { $| = 1; print "1..13\n"; }
|
||||
END {print "not ok 1\n" unless $loaded;}
|
||||
use Text::Iconv;
|
||||
$loaded = 1;
|
||||
print "ok 1\n";
|
||||
|
||||
Text::Iconv->raise_error(1);
|
||||
|
||||
# Note: On VMS codepages are found in SYS$I18N_ICONV
|
||||
|
||||
%codesets = ('iso88591' => [qw(iso88591 iso8859-1 iso-8859-1 ISO88591
|
||||
ISO8859-1 ISO-8859-1 88591 8859-1)],
|
||||
'cp037' => [qw(cp037 CP037 ibm037 IBM037 ibm-037 IBM-037)],
|
||||
'cp850' => [qw(cp850 CP850 ibm850 IBM850 ibm-850 IBM-850)],
|
||||
'utf8' => [qw(utf8 utf-8 UTF8 UTF-8)]);
|
||||
|
||||
%strings = ('iso88591' => "Schöne Grüße",
|
||||
'cp037' => "\xa2\xa4\xa4\x94\x40\x83\xa4\x89\x98\xa4\x85",
|
||||
'cp850' => "Sch\x94ne Gr\x81\xe1e",
|
||||
'utf8' => "Sch\xc3\xb6ne Gr\xc3\xbc\xc3\x9fe");
|
||||
|
||||
$test_no = 1;
|
||||
|
||||
foreach $source (keys %strings)
|
||||
{
|
||||
foreach $target (keys %codesets)
|
||||
{
|
||||
unless ($source eq $target)
|
||||
{
|
||||
$test_no++;
|
||||
|
||||
$c1 = try_codesets($codesets{$source}, $codesets{$target});
|
||||
$c2 = try_codesets($codesets{$target}, $codesets{$source});
|
||||
|
||||
if (not defined $c1 or not defined $c2)
|
||||
{
|
||||
print "not ok $test_no \t # (call to open_iconv() failed)\n";
|
||||
}
|
||||
elsif ($c1 == 0 or $c2 == 0)
|
||||
{
|
||||
print "ok $test_no \t ",
|
||||
"# skip ($source <-> $target conversion not supported)\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
eval
|
||||
{
|
||||
$r1 = $c1->convert($strings{$source});
|
||||
$r2 = $c2->convert($r1);
|
||||
};
|
||||
|
||||
if ($@)
|
||||
{
|
||||
print "not ok $test_no \t ",
|
||||
"# ($source <-> $target conversion failed: $@)\n";
|
||||
}
|
||||
elsif ($r2 eq $strings{$source})
|
||||
{
|
||||
print "ok $test_no \t # ($source <-> $target) ",
|
||||
"[", $c1->retval, "/", $c2->retval, "]\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
print "not ok $test_no \t ",
|
||||
"# ($source <-> $target roundtrip failed)",
|
||||
"[", $c1->retval, "/", $c2->retval, "]\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
###############################################################################
|
||||
|
||||
# This function expects two array references, each listing all the
|
||||
# alternative names to try for the source and target codesets. If the
|
||||
# codeset is not supported (at least not under any of the names that
|
||||
# were given), it returns 0. If the call to iconv_open() fails due to
|
||||
# other reasons, it returns undef. Otherwise a Text::Iconv object for
|
||||
# the requested conversion is returned.
|
||||
|
||||
sub try_codesets
|
||||
{
|
||||
my ($from, $to) = @_;
|
||||
my $converter;
|
||||
|
||||
TRY:
|
||||
foreach my $f (@$from)
|
||||
{
|
||||
foreach my $t (@$to)
|
||||
{
|
||||
eval
|
||||
{
|
||||
$converter = new Text::Iconv($f, $t);
|
||||
};
|
||||
|
||||
last TRY if not $@;
|
||||
}
|
||||
}
|
||||
|
||||
if ($@ =~ /^Unsupported conversion/)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
elsif ($@)
|
||||
{
|
||||
return undef;
|
||||
}
|
||||
else
|
||||
{
|
||||
return $converter;
|
||||
}
|
||||
}
|
||||
|
||||
### Local variables:
|
||||
### mode: perl
|
||||
### End:
|
Loading…
Reference in New Issue