Import Upstream version 2.500

This commit is contained in:
zhangyichun 2022-09-14 16:56:36 +08:00
commit 193fb07d34
33 changed files with 24545 additions and 0 deletions

51
Build.PL Normal file
View File

@ -0,0 +1,51 @@
require 5.008005;
use strict;
use utf8;
use Module::Build;
my $b = Module::Build->new(
'module_name' => 'Net::IDN::Encode',
'license' => 'perl',
'dist_author' => 'Claus Färber <CFAERBER@cpan.org>',
'dist_abstract' => 'Internationalizing Domain Names in Applications (UTS #46)',
'sign' => 1,
'create_license' => 1,
'create_makefile_pl' => 'traditional',
'requires' => {
'Unicode::Normalize' => 0,
'perl' => 5.008005,
},
'test_requires' => {
'Test::More' => 0,
'Test::NoWarnings' => 0,
},
'needs_compiler' => undef,
'PL_Files' => {
'lib/Unicode/UTS46/_Mapping.PL' => [
'lib/Net/IDN/UTS46/_Mapping.pm',
],
'lib/Unicode/UTS46/GenTests.PL' => [
't/uts46_to_ascii.t',
't/uts46_to_ascii-trans.t',
't/uts46_to_unicode.t',
],
},
'no_index' => {
'directory' => ['eg', 'data']
},
'meta_add' => {
'resources' => {
'homepage' => 'https://metacpan.org/release/Net-IDN-Encode',
'bugtracker' => 'https://rt.cpan.org/Public/Dist/Display.html?Name=Net-IDN-Encode',
'repository' => 'http://github.com/cfaerber/Net-IDN-Encode',
},
},
);
$b->create_build_script;

206
Changes Normal file
View File

@ -0,0 +1,206 @@
Revision history for Perl extension Net::IDN::Encode
2.500 2018-10-06
- update to Unicode 10.0.0
- Net::IDN::UTS46: remove workarounds for pre-9.0.0 test vectors; the
module now more closely follows the written spec
- Net::IDN::UTS46: fix validation for some non-valid characters
- Net::IDN::UTS46: for perl 5.8.x/5.10.x, include workaround for bidi
validation where some labels would incorrectly be marked as invalid
[B1] because of a bug in perl's Unicode implementation
- Net::IDN::Punycode: fix for warnings under perl ≤ 5.8.7 (EXPERIMENTAL)
2.401 2018-09-20
- FIXES: #127056: [PATCH 1/2] Fix domain_to_ascii AllowUnassigned param
(report and patch by SKJM)
- FIXES: #127057: [PATCH 2/2] Fix domain_to_unicode AllowUnassigned
param (report and patch by SKJM)
2.400 2017-01-01
- update to Unicode 9.0.0
- FIXES #119468: [PATCH] spelling fixes (reported by GREGOA)
- possible fix for utf8 warnings under perl 5.8.x
- changed generation of tests so that TODO is no longer required when
the module author's perl doesn't support the newest Unicode version
- remove author tests
- more spelling fixes
2.303 2016-12-10
- FIXES: warnings when compiling lib/Net/IDN/Punycode.xs
(reported/patch provided by paul@c***-***.org)
2.302 2016-12-07
- Fixes memory bug introduced by fix for #118924
2.301 2016-12-03
- FIXES: #118924: encode_punycode heap overflow
(reported by Alexander Bluhm)
2.300 2015-06-17
- update to Unicode 8.0.0
2.202 2015-04-18
- use updated IdnaTest.txt from Unicode 7.0.0 database
FIXES: #96749: Fails with bleadperl
- documentation updates, point to perl Unicode tutorials
- tests for domain xn--zcaa.de;
REJECTED: #103205 for Net-IDN-Encode: conversion of domain name
2.201 2014-08-30
- correct handling of uppercase a-labels in
domain_to_{ascii,unicode}
- FIXES: #98354: Capitalized ACE prefix does not work (reported
by victor@*****.ru)
2.200 2014-06-21
- Net::IDN::UTS46: update to Unicode® 7.0.0 and UTS #46 r13
- typo and metadata fixes from dstreinbrunner
2.100 2013-12-30
- Net::IDN::Encode: preserve case in pure-ASCII labels (bypass
en-/decoding)
FIXES: #91059: case not preserved (reported by DMUEY)
- Net::IDN::Encode: simplify scalar-via-blob syntax (pull req.
by DMUEY)
- Net::IDN::Encode: add SMALL COMMERCIAL AT to list of possible
@ signs (pull req. by DMUEY)
- Net::IDN::UTS46: update to Unicode® 6.3.0 and UTS #46 r11:
* new UTS #46 test vectors in data/IdnaTest.txt
* built on perl-blead (5.19.7) for support of Unicode® 6.3.0
in tests generated from data/IdnaTest.txt
* tweaks and fixes regarding edge cases not clearly described
in UTS #46
- Net::IDN::UTS46: test vectors supposed to fail due to
Unassigned characters are no longer skipped.
- Net::IDN::Punycode: use utf8_to_uvchr_buf instead of deprecated
utf8_to_uvuni (perl 5.15.9 and higher; utf8_to_uvuni_buf is
deprecated from perl 5.19.5)
2.005 2013-11-03
- better documentation for unassigned characters,
FIXES: #89750: Can't create IDN for a special domain
(reported by felix.*****@*****.de)
- FIXES: #89270: [PATCH] fix spelling errors in the docs
(reported by cstamas@*****.hu)
2.004 2013-08-12
- FIXES: #85552 3 uts46 tests FAIL under perl-5.18.0
(reported by d.thomas@*****.au)
2.003 2012-01-22
- FIXES required version of Unicode::Normalize in UTS46.pm
(reported by CPAN testers)
2.002 2012-01-18
- FIXES dependencies/required perl version
- FIXES: #74021 Makefile.PL bad value for
version-requirement
2.001 2012-01-12
- FIXES XS_VERSION mismatch
- FIXES depencency on Unicode::Normalize (was 1.000 or higher,
but this is not needed).
2.000 2012-01-08
- switch to Unicode Technical Standard #46 (previously,
IDNA2003 has been used, which is now available as
Net::IDN::IDNA2003):
- add Net::IDN::UTS46 + test vectors from UTS #46
- remove Net::IDN::Nameprep (only required for IDNA2003)
- add documentation about IDNA Standards and IDNA module
Overview/Roadmad
- allow NON-LDH labels (e.g. for SRV records), even if
UseSTD3Rules=true (parameter now only applies to
U-labels and A-labels, i.e. labels that are converted by
IDNA).
- FIXES potential portability problems in
Net::IDN::Punycode XS 1.999_20120108
- FIXES 'wide character' warning with tests if tests
fail/if TB2 is used on modern perl installations.
- FIXES decoding bug in Net::IDN::Punycode::PP (discovered
through UTS #46 test vectors)
1.101 2011-12-08
- FIXES: #72615 faulty data in Build.PL causes a lack of
meta files, which breaks carton.
1.100 2010-06-08
[patch by Loïc Etienne]
- new parameters AllowUnassigned/UseSTD3ASCIIRules for
to_ascii, to_unicode (RFC 3490)
domain_to_ascii, domain_to_unicode
- case insensitive ACE prefix (RFC 3490)
- new length 255 check in domain_to_ascii (RFC 1034)
- length 63 check moved to to_ascii
- dots replacement only in domain_to_ascii
(domain_to_unicode does not require it)
- o-modifier in regexs
- _domain replaced by domain_to_unicode and domain_to_ascii
- _nameprep replaced by Net::IDN::Nameprep
1.000 2010-01-13
- clean-up
- release
0.999_20090112 2010-01-10
- add XS for decode_punycode
0.999_20090110 2010-01-10
- add XS for encode_punycode
- include Net::IDN::Nameprep into Net::IDN::Encode *sigh*
- drop IDNA::Punycode
0.99_20091231 2009-12-31
- depend on perl 5.8.3
- optimise Net::IDN::Punycode
0.99_20091226 2009-12-26
- some clean-ups
0.99_20091216 2009-12-16
- switch to Module::Build
- switch to Github, remove svn:keywords, add .gitignore
- use ASCII in POD, fixes FAILs with perl 5.6.x
- add examples in eg/
0.99_20080913 2009-09-13
- fixed perl 5.6.x (no warnings 'utf8')
0.99_20080913 2009-09-13
- require perl version 5.6.0 instead of 5.6.6
- skip more tests in lower perl versions
- removed Encode::Punycode; Encode is only available from
perl 5.7.3
- renamed back to Net::IDN::Encode; without
Encode::Punycode, the new name does not make sense.
0.99_20071012 2007-10-12
- renamed Net-IDN-Encode distribution to Net-IDN-tools
- includes Net::IDN::Punycode (from IDNA::Punycode v0.02)
- includes Net::IDN::Nameprep (complete rewrite, uses
Unicode::Stringprep)
- includes IDNA::Punycode (deprecated, new version based
on Net::IDN::Punycode/::Encode)
- includes Encode::Punycode (new version based on
Net::IDN::Punycode)
- uses Unicode::Stringprep
- more tests, including test vectors from Internet Draft
draft-josefsson-idn-test-vectors-00.
- FIXES: #16150: Net::IDN::Encode depends on non-modulelist module IDNA::Punycode
- FIXES: #16145: IDNA::Punycode 0.03
- FIXES: #28123: Undeclared dependency on Unicode::Stringprep (reported by ANDK)
- FIXES WARNING: v-string in use/require non-portable (Net::IDN::Nameprep[::*])
0.02 2004-06-20
- fixed handling of incomplete/empty email addresses
0.01 2004-05-30
- first release

379
LICENSE Normal file
View File

@ -0,0 +1,379 @@
This software is copyright (c) 2018 by Claus Färber <CFAERBER@cpan.org>.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
Terms of the Perl programming language system itself
a) the GNU General Public License as published by the Free
Software Foundation; either version 1, or (at your option) any
later version, or
b) the "Artistic License"
--- The GNU General Public License, Version 1, February 1989 ---
This software is Copyright (c) 2018 by Claus Färber <CFAERBER@cpan.org>.
This is free software, licensed under:
The GNU General Public License, Version 1, February 1989
GNU GENERAL PUBLIC LICENSE
Version 1, February 1989
Copyright (C) 1989 Free Software Foundation, Inc.
51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The license agreements of most software companies try to keep users
at the mercy of those companies. By contrast, our General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. The
General Public License applies to the Free Software Foundation's
software and to any other program whose authors commit to using it.
You can use it for your programs, too.
When we speak of free software, we are referring to freedom, not
price. Specifically, the General Public License is designed to make
sure that you have the freedom to give away or sell copies of free
software, that you receive source code or can get it if you want it,
that you can change the software or use pieces of it in new free
programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of a such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must tell them their rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License Agreement applies to any program or other work which
contains a notice placed by the copyright holder saying it may be
distributed under the terms of this General Public License. The
"Program", below, refers to any such program or work, and a "work based
on the Program" means either the Program or any work containing the
Program or a portion of it, either verbatim or with modifications. Each
licensee is addressed as "you".
1. You may copy and distribute verbatim copies of the Program's source
code as you receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice and
disclaimer of warranty; keep intact all the notices that refer to this
General Public License and to the absence of any warranty; and give any
other recipients of the Program a copy of this General Public License
along with the Program. You may charge a fee for the physical act of
transferring a copy.
2. You may modify your copy or copies of the Program or any portion of
it, and copy and distribute such modifications under the terms of Paragraph
1 above, provided that you also do the following:
a) cause the modified files to carry prominent notices stating that
you changed the files and the date of any change; and
b) cause the whole of any work that you distribute or publish, that
in whole or in part contains the Program or any part thereof, either
with or without modifications, to be licensed at no charge to all
third parties under the terms of this General Public License (except
that you may choose to grant warranty protection to some or all
third parties, at your option).
c) If the modified program normally reads commands interactively when
run, you must cause it, when started running for such interactive use
in the simplest and most usual way, to print or display an
announcement including an appropriate copyright notice and a notice
that there is no warranty (or else, saying that you provide a
warranty) and that users may redistribute the program under these
conditions, and telling the user how to view a copy of this General
Public License.
d) You may charge a fee for the physical act of transferring a
copy, and you may at your option offer warranty protection in
exchange for a fee.
Mere aggregation of another independent work with the Program (or its
derivative) on a volume of a storage or distribution medium does not bring
the other work under the scope of these terms.
3. You may copy and distribute the Program (or a portion or derivative of
it, under Paragraph 2) in object code or executable form under the terms of
Paragraphs 1 and 2 above provided that you also do one of the following:
a) accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of
Paragraphs 1 and 2 above; or,
b) accompany it with a written offer, valid for at least three
years, to give any third party free (except for a nominal charge
for the cost of distribution) a complete machine-readable copy of the
corresponding source code, to be distributed under the terms of
Paragraphs 1 and 2 above; or,
c) accompany it with the information you received as to where the
corresponding source code may be obtained. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form alone.)
Source code for a work means the preferred form of the work for making
modifications to it. For an executable file, complete source code means
all the source code for all modules it contains; but, as a special
exception, it need not include source code for modules which are standard
libraries that accompany the operating system on which the executable
file runs, or for standard header files or definitions files that
accompany that operating system.
4. You may not copy, modify, sublicense, distribute or transfer the
Program except as expressly provided under this General Public License.
Any attempt otherwise to copy, modify, sublicense, distribute or transfer
the Program is void, and will automatically terminate your rights to use
the Program under this License. However, parties who have received
copies, or rights to use copies, from you under this General Public
License will not have their licenses terminated so long as such parties
remain in full compliance.
5. By copying, distributing or modifying the Program (or any work based
on the Program) you indicate your acceptance of this license to do so,
and all its terms and conditions.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the original
licensor to copy, distribute or modify the Program subject to these
terms and conditions. You may not impose any further restrictions on the
recipients' exercise of the rights granted herein.
7. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of the license which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
the license, you may choose any version ever published by the Free Software
Foundation.
8. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
9. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
10. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
Appendix: How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to humanity, the best way to achieve this is to make it
free software which everyone can redistribute and change under these
terms.
To do so, attach the following notices to the program. It is safest to
attach them to the start of each source file to most effectively convey
the exclusion of warranty; and each file should have at least the
"copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) 19yy <name of author>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 1, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
Gnomovision version 69, Copyright (C) 19xx name of author
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the
appropriate parts of the General Public License. Of course, the
commands you use may be called something other than `show w' and `show
c'; they could even be mouse-clicks or menu items--whatever suits your
program.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary. Here a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the
program `Gnomovision' (a program to direct compilers to make passes
at assemblers) written by James Hacker.
<signature of Ty Coon>, 1 April 1989
Ty Coon, President of Vice
That's all there is to it!
--- The Artistic License 1.0 ---
This software is Copyright (c) 2018 by Claus Färber <CFAERBER@cpan.org>.
This is free software, licensed under:
The Artistic License 1.0
The Artistic License
Preamble
The intent of this document is to state the conditions under which a Package
may be copied, such that the Copyright Holder maintains some semblance of
artistic control over the development of the package, while giving the users of
the package the right to use and distribute the Package in a more-or-less
customary fashion, plus the right to make reasonable modifications.
Definitions:
- "Package" refers to the collection of files distributed by the Copyright
Holder, and derivatives of that collection of files created through
textual modification.
- "Standard Version" refers to such a Package if it has not been modified,
or has been modified in accordance with the wishes of the Copyright
Holder.
- "Copyright Holder" is whoever is named in the copyright or copyrights for
the package.
- "You" is you, if you're thinking about copying or distributing this Package.
- "Reasonable copying fee" is whatever you can justify on the basis of media
cost, duplication charges, time of people involved, and so on. (You will
not be required to justify it to the Copyright Holder, but only to the
computing community at large as a market that must bear the fee.)
- "Freely Available" means that no fee is charged for the item itself, though
there may be fees involved in handling the item. It also means that
recipients of the item may redistribute it under the same conditions they
received it.
1. You may make and give away verbatim copies of the source form of the
Standard Version of this Package without restriction, provided that you
duplicate all of the original copyright notices and associated disclaimers.
2. You may apply bug fixes, portability fixes and other modifications derived
from the Public Domain or from the Copyright Holder. A Package modified in such
a way shall still be considered the Standard Version.
3. You may otherwise modify your copy of this Package in any way, provided that
you insert a prominent notice in each changed file stating how and when you
changed that file, and provided that you do at least ONE of the following:
a) place your modifications in the Public Domain or otherwise make them
Freely Available, such as by posting said modifications to Usenet or an
equivalent medium, or placing the modifications on a major archive site
such as ftp.uu.net, or by allowing the Copyright Holder to include your
modifications in the Standard Version of the Package.
b) use the modified Package only within your corporation or organization.
c) rename any non-standard executables so the names do not conflict with
standard executables, which must also be provided, and provide a separate
manual page for each non-standard executable that clearly documents how it
differs from the Standard Version.
d) make other distribution arrangements with the Copyright Holder.
4. You may distribute the programs of this Package in object code or executable
form, provided that you do at least ONE of the following:
a) distribute a Standard Version of the executables and library files,
together with instructions (in the manual page or equivalent) on where to
get the Standard Version.
b) accompany the distribution with the machine-readable source of the Package
with your modifications.
c) accompany any non-standard executables with their corresponding Standard
Version executables, giving the non-standard executables non-standard
names, and clearly documenting the differences in manual pages (or
equivalent), together with instructions on where to get the Standard
Version.
d) make other distribution arrangements with the Copyright Holder.
5. You may charge a reasonable copying fee for any distribution of this
Package. You may charge any fee you choose for support of this Package. You
may not charge a fee for this Package itself. However, you may distribute this
Package in aggregate with other (possibly commercial) programs as part of a
larger (possibly commercial) software distribution provided that you do not
advertise this Package as a product of your own.
6. The scripts and library files supplied as input to or produced as output
from the programs of this Package do not automatically fall under the copyright
of this Package, but belong to whomever generated them, and may be sold
commercially, and may be aggregated with this Package.
7. C or perl subroutines supplied by you and linked into this Package shall not
be considered part of this Package.
8. The name of the Copyright Holder may not be used to endorse or promote
products derived from this software without specific prior written permission.
9. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
The End

33
MANIFEST Normal file
View File

@ -0,0 +1,33 @@
Build.PL
Changes
eg/hello_idn.pl
eg/hello_idn_email.pl
lib/Net/IDN/Encode.pm
lib/Net/IDN/Overview.pod
lib/Net/IDN/Punycode.pm
lib/Net/IDN/Punycode.xs
lib/Net/IDN/Punycode/PP.pm
lib/Net/IDN/Standards.pod
lib/Net/IDN/UTS46.pm
lib/Net/IDN/UTS46/_Mapping.pm
LICENSE
Makefile.PL
MANIFEST
META.json
META.yml
README
t/00use.t
t/domain_to_ascii.t
t/domain_to_unicode.t
t/encode_bytes.t
t/encode_utf8.t
t/punycode_vec-pp.t
t/punycode_vec-xs.t
t/uts46_api_call.t
t/uts46_encode_bytes.t
t/uts46_encode_utf8.t
t/uts46_to_ascii-trans.t
t/uts46_to_ascii.t
t/uts46_to_unicode.t
t/xtra_pp.t
SIGNATURE Added here by Module::Build

58
META.json Normal file
View File

@ -0,0 +1,58 @@
{
"abstract" : "Internationalizing Domain Names in Applications (UTS #46)",
"author" : [
"Claus Färber <CFAERBER@cpan.org>"
],
"dynamic_config" : 1,
"generated_by" : "Module::Build version 0.4224",
"license" : [
"perl_5"
],
"meta-spec" : {
"url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
"version" : "2"
},
"name" : "Net-IDN-Encode",
"no_index" : {
"directory" : [
"eg",
"data"
]
},
"prereqs" : {
"build" : {
"requires" : {
"ExtUtils::CBuilder" : "0"
}
},
"configure" : {
"requires" : {
"Module::Build" : "0.42"
}
},
"runtime" : {
"requires" : {
"Unicode::Normalize" : "0",
"perl" : "5.008005"
}
},
"test" : {
"requires" : {
"Test::More" : "0",
"Test::NoWarnings" : "0"
}
}
},
"release_status" : "stable",
"resources" : {
"bugtracker" : {
"web" : "https://rt.cpan.org/Public/Dist/Display.html?Name=Net-IDN-Encode"
},
"homepage" : "https://metacpan.org/release/Net-IDN-Encode",
"repository" : {
"url" : "http://github.com/cfaerber/Net-IDN-Encode"
}
},
"version" : "2.500",
"x_serialization_backend" : "JSON::PP version 2.27400_02"
}

30
META.yml Normal file
View File

@ -0,0 +1,30 @@
---
abstract: 'Internationalizing Domain Names in Applications (UTS #46)'
author:
- 'Claus Färber <CFAERBER@cpan.org>'
build_requires:
ExtUtils::CBuilder: '0'
Test::More: '0'
Test::NoWarnings: '0'
configure_requires:
Module::Build: '0.42'
dynamic_config: 1
generated_by: 'Module::Build version 0.4224, CPAN::Meta::Converter version 2.150010'
license: perl
meta-spec:
url: http://module-build.sourceforge.net/META-spec-v1.4.html
version: '1.4'
name: Net-IDN-Encode
no_index:
directory:
- eg
- data
requires:
Unicode::Normalize: '0'
perl: '5.008005'
resources:
bugtracker: https://rt.cpan.org/Public/Dist/Display.html?Name=Net-IDN-Encode
homepage: https://metacpan.org/release/Net-IDN-Encode
repository: http://github.com/cfaerber/Net-IDN-Encode
version: '2.500'
x_serialization_backend: 'CPAN::Meta::YAML version 0.018'

16
Makefile.PL Normal file
View File

@ -0,0 +1,16 @@
# Note: this file was auto-generated by Module::Build::Compat version 0.4224
require 5.008005;
use ExtUtils::MakeMaker;
WriteMakefile
(
'PL_FILES' => {},
'NAME' => 'Net::IDN::Encode',
'EXE_FILES' => [],
'VERSION_FROM' => 'lib/Net/IDN/Encode.pm',
'INSTALLDIRS' => 'site',
'PREREQ_PM' => {
'Unicode::Normalize' => 0,
'ExtUtils::CBuilder' => 0
}
)
;

34
README Normal file
View File

@ -0,0 +1,34 @@
OVERVIEW
Net::IDN::Encode -- Internationalized Domain Names in
Applications (IDNA)
Net::IDN::UTS46 -- Unicode IDNA Compatibility Processing
(UTS #46)
Net::IDN::Punycode -- ASCII-compatible encoding of Unicode
(Punycode, RFC 3492)
INSTALLATION
To install this module type the following:
perl Build.PL
./Build
./Build test
./Build install
DEPENDENCIES
This module requires these other modules and libraries:
Unicode::Normalize
AUTHOR
Claus Färber <CFAERBER@cpan.org>
ACKNOWLEDGMENTS
Tatsuhiko Miyagawa <miyagawa@bulknews.net>
Robert Urban <urban@UNIX-Beratung.de>

59
SIGNATURE Normal file
View File

@ -0,0 +1,59 @@
This file contains message digests of all files listed in MANIFEST,
signed via the Module::Signature module, version 0.83.
To verify the content in this distribution, first make sure you have
Module::Signature installed, then type:
% cpansign -v
It will check each file's integrity, as well as the signature's
validity. If "==> Signature verified OK! <==" is not displayed,
the distribution may already have been compromised, and you should
not run its Makefile.PL or Build.PL.
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA256
SHA256 4693fdce53a610d9aa8759433e8d9e1b881b886cfa0d0d4dc3696d1d0b45c7a6 Build.PL
SHA256 61751a3382cd64ae05f5fb7e256259ad60f4671bf9c91a1e6a432102e2dd5581 Changes
SHA256 4bbb4460302739506858a117c0419f5323b6bd33a5da277ccf5b7fe327aa1147 LICENSE
SHA256 423c1d1953556eaacc7dde323b31a9e9b2beb009b5368d5778442401e52d22dd MANIFEST
SHA256 94c7a0317780509b6e0708b7477f9f0d7119dbecf70e85c756949954df6e8254 META.json
SHA256 10dd10188276a0b8833a2a402f75595961f9aa9b60820deda3e12405374a5f4e META.yml
SHA256 f319d964112761fb8122ae02bdd52e154a7e932daa8436a35ddce12a833a4132 Makefile.PL
SHA256 cc56c62a1f4ec596550474677be680e59357e6bcc85c47a3980c969c6b30140a README
SHA256 17766e80f51841d87fde559de98074c5f54a9b5c58aea9209916b0253cf44238 eg/hello_idn.pl
SHA256 04bbc370a82d2a83cd7f6c8b05997fd503e615b8db97d3e6aa90278db82875c8 eg/hello_idn_email.pl
SHA256 f3286492fcae83495a5f72a06de94ea1e2227855f6e70e919cac5b931fda62d7 lib/Net/IDN/Encode.pm
SHA256 73187c982aab7aecd5affa041640970f20afa93194399274920ce07d61cf5ca8 lib/Net/IDN/Overview.pod
SHA256 c461e5cd16a13cf54836839863ec0c300b9526a5d2079e4891c991d283f469c9 lib/Net/IDN/Punycode.pm
SHA256 86237c6f390dc10f79f97769250bff5c4be7f72352d602c0af34fa71cc7d6d7d lib/Net/IDN/Punycode.xs
SHA256 eb3a9e4cc58845a310d88d4ed6ab64f9ac7a1c605857e0c31d6683582b8bc90e lib/Net/IDN/Punycode/PP.pm
SHA256 1220a53f28ad7934425a4e7d1aa64dd9d04338220f3cd0463b5ce08fd2b12d9a lib/Net/IDN/Standards.pod
SHA256 eb22e6af3552da94e467fa8a028d6f80d3a18e09da2570e4518ef12248302cd7 lib/Net/IDN/UTS46.pm
SHA256 39d83fe9f8a53bbca49e41c5e5acefa53cd6bd8b0f53a35739fd31998782db8c lib/Net/IDN/UTS46/_Mapping.pm
SHA256 86bc523e0e50ab0aa8ee82f269251b962bc33a8240202d3d9fe60921c829ce14 t/00use.t
SHA256 23e70f56f7cd4d6be661d0a2adb75864c79c6da67a2cdc05bd44b3a8f94ab33d t/domain_to_ascii.t
SHA256 3cbe0ec5076c6e312b071772adeb05bd852d01084883071cd5640f7741fc2059 t/domain_to_unicode.t
SHA256 8bc1406f117c71d42e8d32b045a6848ee6bd8c8795a2083d8776df9660ac5dae t/encode_bytes.t
SHA256 9e7b8f0f75afc0f0eba2d9336db3c6258ca7a176be1dc5b6e1e8d4e639e1d361 t/encode_utf8.t
SHA256 7b43ef649f6ed8d190112fda703dca176431bf64d0527128a7f2b4cb1bed8b58 t/punycode_vec-pp.t
SHA256 16c6d0b535307b666d79280ee239ab27e1d559e95febfef4c2b483c2b2455986 t/punycode_vec-xs.t
SHA256 de38cf9a35faf24d3ce081caaff55690502d51608c782d26667890d385a70eb7 t/uts46_api_call.t
SHA256 231ae5a43577ada176e21b92984f4d1b5ba8234349d049f5fbbf2abb392f568c t/uts46_encode_bytes.t
SHA256 6fa4c3d49561bfb59815d8bc4a1f21c44fa10a15f94e97f5bdba3ee1efbb63bf t/uts46_encode_utf8.t
SHA256 16a3373aac4a377aa19ef6a703444d1eca3d52fa56075802a23a77da571bd138 t/uts46_to_ascii-trans.t
SHA256 bf5effc197d5b7e641d303ab116a24c060d4e9a3a575d5c1ee12d42a6cd8deee t/uts46_to_ascii.t
SHA256 c232df2fea5fefa7e5299a8430ef83aa909cd6820cefc55187dc375a59b04d0e t/uts46_to_unicode.t
SHA256 038fb1201517afa4bba421f4d031395225e51fb900ce447a4985d7c594893d03 t/xtra_pp.t
-----BEGIN PGP SIGNATURE-----
iQEzBAEBCAAdFiEEMNwyPmkVmFwPfD8AsRl4Qp8hpEYFAlu4AZcACgkQsRl4Qp8h
pEbd3gf8DCr86hNujt20VyRb/EuvULx/fFlNwyp2KVEjtGpJcmRXTAm4Jn8pB4c6
psa5kiuLqwSXrUIEw9JCj5h9AfKTeH/FD4SBYi+vRVgG7BS1b20IrJi9utJhzZGZ
WvKN14rvaWUPdifj2t0KZ2jRuf2ZPPRjzcP62Rq9jEq/XUbZAjVI7v3LBHQIYLVE
vnnZP3RsuglS6GdpIzGJ0CVUAsVBtfi+5asenMncX6HoQXqdS5G+1CvcYn8yLnJh
7m/F6xweklxoke+Rv1nhDXNpp8MDYq7qa+dawnNHrSY+q43eGl0+cIraHmY40PTK
0i93JopUhn7qSb5OSlmuI+bGVCNqow==
=4+qi
-----END PGP SIGNATURE-----

18
eg/hello_idn.pl Normal file
View File

@ -0,0 +1,18 @@
#!/usr/bin/perl
use strict;
use utf8;
binmode STDOUT, ":utf8";
use Net::IDN::Encode;
my @domain = (
'例.テスト',
'müller.example.net',
);
foreach (@domain) {
printf "%s: toASCII=<%s>, toUnicode=<%s>\n",
$_, domain_to_ascii($_), domain_to_unicode($_);
}

18
eg/hello_idn_email.pl Normal file
View File

@ -0,0 +1,18 @@
#!/usr/bin/perl
use strict;
use utf8;
binmode STDOUT, ":utf8";
use Net::IDN::Encode;
my @email = (
'postmaster@例.テスト',
'infomüller.example.net',
);
foreach (@email) {
printf "%s: toASCII=<%s>, toUnicode=<%s>\n",
$_, email_to_ascii($_), email_to_unicode($_);
}

347
lib/Net/IDN/Encode.pm Executable file
View File

@ -0,0 +1,347 @@
package Net::IDN::Encode;
require 5.006;
use strict;
use utf8;
use warnings;
our $VERSION = "2.500";
$VERSION = eval $VERSION;
use Carp;
use Exporter;
our @ISA = ('Exporter');
our @EXPORT = ();
our %EXPORT_TAGS = (
'all' => [
'to_ascii',
'to_unicode',
'domain_to_ascii',
'domain_to_unicode',
'email_to_ascii',
'email_to_unicode',
],
'_var' => [
'$IDNA_PREFIX',
'IsIDNADot',
'IsIDNAAtsign',
]
);
Exporter::export_ok_tags(keys %EXPORT_TAGS);
use Net::IDN::Punycode 1.102 ();
our $IDNA_PREFIX = 'xn--';
sub IsIDNADot { "002E\n3002\nFF0E\nFF61" }
sub IsIDNAAtsign{ "0040\nFE6B\nFF20" }
require Net::IDN::UTS46; # after declaration of vars!
sub to_ascii {
my($label,%param) = @_;
croak 'Invalid label' if $label =~ m/\p{IsIDNADot}/o;
if($label =~ m/\P{ASCII}/o) {
$label = Net::IDN::UTS46::to_ascii(@_);
} else {
croak 'label empty' if length($label) < 1;
croak 'label too long' if length($label) > 63;
}
return $label;
}
sub to_unicode {
my($label,%param) = @_;
croak 'Invalid label' if $label =~ m/\p{IsIDNADot}/o;
if($label =~ m/\P{ASCII}|^(?:(?i)$IDNA_PREFIX)/o) {
$label = Net::IDN::UTS46::to_unicode(@_);
}
return $label;
}
sub _domain {
my ($domain,$to_function,$ascii,%param) = @_;
$param{'UseSTD3ASCIIRules'} = 1 unless exists $param{'UseSTD3ASCIIRules'};
my $even_odd = 1;
return join '',
map { $even_odd++ % 2 ? $to_function->($_, %param) : $ascii ? '.' : $_ }
split /(\p{IsIDNADot})/o, $domain;
}
sub _email {
my ($email,$to_function,$ascii,%param) = @_;
return $email if !defined($email) || $email eq '';
$email =~ m/^(
(?(?!\p{IsIDNAAtsign}|").|(?!))+
|
"(?:(?:[^"]|\\.)*[^\\])?"
)
(?:
(\p{IsIDNAAtsign})
(?:([^\[\]]*)|(\[.*\]))?
)?$/xo || croak "Invalid email address";
my($local_part,$at,$domain,$domain_literal) = ($1,$2,$3);
$local_part =~ m/\P{ASCII}/ && croak "Non-ASCII characters in local-part";
$domain_literal =~ m/\P{ASCII}/ && croak "Non-ASCII characters in domain-literal" if $domain_literal;
$domain = $to_function->($domain,%param) if $domain;
$at = '@' if $ascii;
return ($domain || $domain_literal)
? ($local_part.$at.($domain || $domain_literal))
: ($local_part);
}
sub domain_to_ascii { _domain(shift, \&to_ascii, 1, @_) }
sub domain_to_unicode { _domain(shift, \&to_unicode, 0, @_) }
sub email_to_ascii { _email(shift, \&domain_to_ascii, 1, @_) }
sub email_to_unicode { _email(shift, \&domain_to_unicode, 0, @_) }
1;
__END__
=encoding utf8
=head1 NAME
Net::IDN::Encode - Internationalizing Domain Names in Applications (IDNA)
=head1 SYNOPSIS
use Net::IDN::Encode ':all';
my $a = domain_to_ascii("müller.example.org");
my $e = email_to_ascii("POSTMASTER@例。テスト");
my $u = domain_to_unicode('EXAMPLE.XN--11B5BS3A9AJ6G');
=head1 DESCRIPTION
This module provides an easy-to-use interface for encoding and
decoding Internationalized Domain Names (IDNs).
IDNs use characters drawn from a large repertoire (Unicode), but
IDNA allows the non-ASCII characters to be represented using only
the ASCII characters already allowed in so-called host names today
(letter-digit-hyphen, C</[A-Z0-9-]/i>).
Use this module if you just want to convert domain names (or email addresses),
using whatever IDNA standard is the best choice at the moment.
You should be familiar with Unicode support in perl, as this module expects
correctly encoded input. See L<perlunitut>, L<perluniintro> and L<perlunicode>
for details.
=head1 UNICODE VERSION
To convert labels correctly between Unicode and ASCII, each character in the
label must be present in the Unicode version supported by your perl.
Consequently, this module will refuse to convert labels with new Unicode
characters on older perl versions (see below).
=head1 FUNCTIONS
By default, this module does not export any subroutines. You may
use the C<:all> tag to import everything. You can also use regular
expressions such as C</^to_/> or C</^email_/> to select some of
the functions, see L<Exporter> for details.
The following functions are available:
=over
=item to_ascii( $label, %param )
Converts a single label C<$label> to ASCII. Will throw an exception on invalid
input. If C<$label> is already a valid ASCII domain label (including most
NON-LDH labels such as those used for SRV records and fake A-labels), this
function will never fail but return C<$label> as-is if conversion would fail.
This function takes the following optional parameters (C<%param>):
=over
=item AllowUnassigned
(boolean) If set to a true value, code points that are unassigned in the
Unicode version supported by your perl are allowed. This is an extension over
UTS #46.
While this increases the number of labels that can be converted successfully
(especially on older perls) and may thus maximizes the compatibility with
domain names created under future versions of Unicode, it also introduces the
risk of incorrect conversions. Characters added in later versions of Unicode
might have properties that affect the conversion; if these properties are not
known on your version of perl, you might therefore end up with an incorrect
conversion.
The default is false.
=item UseSTD3ASCIIRules
(boolean) If set to a true value, checks the label for compliance with S<STD 3>
(S<RFC 1123>) syntax for host name parts. The exact checks done depend on the
IDNA standard used. Usually, you will want to set this to true.
Please note that UseSTD3ASCIIRules only affects the conversion between ASCII
labels (A-labels) and Unicode labels (U-labels). Labels that are in ASCII may
still be passed-through as-is.
For historical reasons, the default is false (unlike C<domain_to_ascii>).
=item TransitionalProcessing
(boolean) If set to true, the conversion will be compatible with IDNA2003. This
only affects four characters: C<'ß'> (U+00DF), 'ς' (U+03C2), ZWJ (U+200D) and
ZWNJ (U+200C). Usually, you will want to set this to false.
The default is false.
=back
This function does not handle strings that consist of multiple labels (such as
domain names). Use C<domain_to_ascii> instead.
=item to_unicode( $label, %param )
Converts a single label C<$label> to Unicode. Will throw an exception on
invalid input. If C<$label> is an ASCII label (including most NON-LDH labels
such as those used for SRV records), this function will not fail but return
C<$label> as-is if conversion would fail.
This function takes the same optional parameters as C<to_ascii>,
with the same defaults.
If C<$label> is already in ASCII, this function will never fail but return
C<$label> as is as a last resort (i.e. pass-through).
This function takes the following optional parameters (C<%param>):
=over
=item AllowUnassigned
=item UseSTD3ASCIIRules
See C<to_unicode> above. Please note that there is no need for
C<TransitionalProcessing> for C<to_unicode>.
=back
This function does not handle strings that consist of multiple labels (such as
domain names). Use C<domain_to_unicode> instead.
=item domain_to_ascii( $label, %param )
Converts all labels of the hostname C<$domain> (with labels separated by dots)
to ASCII (using C<to_ascii>). Will throw an exception on invalid input.
This function takes the following optional parameters (C<%param>):
=over
=item AllowUnassigned
=item TransitionalProcessing
See C<to_unicode> above.
=item UseSTD3ASCIIRules
(boolean) If set to a true value, checks the label for compliance with S<STD 3>
(S<RFC 1123>) syntax for host name parts.
The default is true (unlike C<to_ascii>).
=back
This function will convert all dots to ASCII, i.e. to U+002E (full stop). The
following characters are recognized as dots: U+002E (full stop), U+3002
(ideographic full stop), U+FF0E (fullwidth full stop), U+FF61 (halfwidth
ideographic full stop).
=item domain_to_unicode( $domain, %param )
Converts all labels of the hostname C<$domain> (with labels separated by dots)
to Unicode. Will throw an exception on invalid input.
This function takes the same optional parameters as C<domain_to_ascii>,
with the same defaults.
This function takes the following optional parameters (C<%param>):
=over
=item AllowUnassigned
=item UseSTD3ASCIIRules
See C<domain_to_unicode> above. Please note that there is no C<TransitionalProcessing>
for C<domain_to_unicode>.
=back
This function will preserve the original version of dots. The following
characters are recognized as dots: U+002E (full stop), U+3002 (ideographic full
stop), U+FF0E (fullwidth full stop), U+FF61 (halfwidth ideographic full stop).
=item email_to_ascii( $email, %param )
Converts the domain part (right hand side, separated by an at sign) of an S<RFC
2821>/2822 email address to ASCII, using C<domain_to_ascii>. May throw an
exception on invalid input.
It takes the same parameters as C<domain_to_ascii>.
This function currently does not handle internationalization of the local-part
(left hand side). Future versions of this module might implement an ASCII
conversion for the local-part, should one be standardized.
This function will convert the at sign to ASCII, i.e. to U+0040 (commercial
at), as well as label separators. The following characters are recognized as at
signs: U+0040 (commercial at), U+FE6B (small commercial at) and U+FF20
(fullwidth commercial at).
=item email_to_unicode( $email, %param )
Converts the domain part (right hand side, separated by an at sign) of an S<RFC
2821>/2822 email address to Unicode, using C<domain_to_unicode>. May throw an
exception on invalid input.
It takes the same parameters as C<domain_to_unicode>.
This function currently does not handle internationalization of the local-part
(left hand side). Future versions of this module might implement a conversion
from ASCII for the local-part, should one be standardized.
This function will preserve the original version of at signs (and label
separators). The following characters are recognized as at signs: U+0040
(commercial at), U+FE6B (small commercial at) and U+FF20 (fullwidth commercial
at).
=back
=head1 AUTHOR
Claus FE<auml>rber <CFAERBER@cpan.org>
=head1 LICENSE
Copyright 2007-2014 Claus FE<auml>rber.
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
=head1 SEE ALSO
L<Net::IDN::Punycode>, L<Net::IDN::UTS46>, L<Net::IDN::IDNA2003>,
L<Net::IDN::IDNA2008>, S<UTS #46> (L<http://www.unicode.org/reports/tr46/>),
S<RFC 5890> (L<http://tools.ietf.org/html/rfc5890>).
=cut

160
lib/Net/IDN/Overview.pod Normal file
View File

@ -0,0 +1,160 @@
=encoding utf8
=head1 NAME
Net::IDN::Overwiew - Internationalized Domain Names for Applications (IDNA)
=head1 DESCRIPTION
The C<Net::IDN::*> modules provide a framework for the handling of
Internationalized Domain Names for Applications (IDNA) in perl programmes.
This document provides an overview of the available modules in order to
allow you to choose the best module for the task at hand.
=head2 AVAILABLE MODULES
=head3 HIGH-LEVEL (USE THIS)
=over
=item L<Net::IDN::Encode>
provides a high-level interface for converting domain names (and
for convenience, email addresses).
Use this module if you just want to convert domain names and don't
care about how this is done internally.
Currently, this module uses L<Net::IDN::UTS46>. However, this
might change in the future if another specification (e.g. a
revision of IDNA2008) becomes more appropriate.
The author aims for Net::IDN::Encode to always use the specification that will
provide the "least surprising" results.
=back
=head3 STANDARD-SPECIFIC
These modules implement different versions of the the IDNA
specifications. Use one of these modules only if you require
compatibility with a specific incarnation of IDNA.
=over
=item L<Net::IDN::IDNA2003>
implements the original IDNA specification, released in 2003
(IDNA2003), which is now obsolete.
IDNA2003 is defined in RFC 3490 L<http://tools.ietf.org/rfc/3490>
and related documents.
=begin comment
=item L<Net::IDN::IDNA2008>
implements the current IDNA specification, released in early 2010
(IDNA2008 or IDNAbis).
Please note that this module will not allow you to convert some
domain names, such as C<√.com> or C<I♥NY.com>, which were allowed
in IDNA2003 but are disallowed in IDNA2008.
IDNA2008 is defined in RFC 5890 L<http://tools.ietf.org/rfc/5890>
and related documents.
=end comment
=item L<Net::IDN::UTS46>
implements Unicode Technical Standard #46 (UTS #46
L<http://unicode.org/reports/tr46/>), Unicode IDNA Compatibility
Processing. This specification supports all domain names allowed
under either IDNA2003 or IDNA2008.
=back
=head3 ENCODING
=over
=item L<Net::IDN::Punycode>
performs the actual conversion between the ASCII and Unicode form
of strings. Punycode is defined in RFC 3492
L<http://tools.ietf.org/rfc/3492> and related documents.
Usually, it is not a good idea to use this module directly. If you
convert domain labels (or other strings) without proper
preparation, you may end up with an ASCII encoding that is not
interoperable or poses security issues due to spoofing.
Even if you think that your domain names are valid and in
already-mapped format, you might be fooled by different Unicode
normalization forms (for example, some environments might
automatically convert your data to NFD, which breaks IDNA).
=back
=head3 DEPRECATED/COMPATIBILITY
These modules are only maintained in order to not break
applications that might rely on them
=over
=item L<Encode::Punycode>
provides an L<Encode> plugin for Punycode. As Punycode is not a
general-purpose encoding, there are limited applications.
=item L<IDNA::Punycode>
has an API depending on global variables. Don't use this module.
=back
=head2 DISTRIBUTIONS
=over
=item Net-IDN-Encode
is the main distribution covering the most common cases for
converting domain names between ASCII and Unicode.
The author tries to keep the dependency chain as small as possible; currently
this distribution only depends on perl 5.8.5 (including the core module
L<Unicode::Normalize> ).
=item Net-IDN-IDNA2003
provides the L<Net::IDN::IDNA2003> module. This is separate
because it has an dependency on L<Unicode::Stringprep> (through
L<Net::IDN::Nameprep>).
=begin comment
=item Net-IDN-IDNA2008
provides the L<Net::IDN::IDNA2008> module. This is separate because it has an
dependency on perl 5.10 or higher (through L<Unicode::Precis>).
=end comment
=item Encode-Punycode
=item IDNA-Punycode
are separate because they are of limited use to the average
user/perl programmer.
=back
=head1 AUTHOR
Claus FE<auml>rber <CFAERBER@cpan.org>
=cut

117
lib/Net/IDN/Punycode.pm Normal file
View File

@ -0,0 +1,117 @@
package Net::IDN::Punycode;
use 5.006;
use strict;
use utf8;
use warnings;
use Exporter;
our $VERSION = "2.500";
$VERSION = eval $VERSION;
our @ISA = qw(Exporter);
our @EXPORT = ();
our @EXPORT_OK = ();
our %EXPORT_TAGS = ( 'all' => [ qw(encode_punycode decode_punycode) ], );
Exporter::export_ok_tags(keys %EXPORT_TAGS);
our $_NO_XS;
eval {
die if $_NO_XS;
require XSLoader;
XSLoader::load('Net::IDN::Punycode');
};
if (!defined(&encode_punycode)) {
require Net::IDN::Punycode::PP;
Net::IDN::Punycode::PP->import(qw(:all));
}
1;
__END__
=head1 NAME
Net::IDN::Punycode - A Bootstring encoding of Unicode for IDNA (S<RFC 3492>)
=head1 SYNOPSIS
use Net::IDN::Punycode qw(:all);
$punycode = encode_punycode($unicode);
$unicode = decode_punycode($punycode);
=head1 DESCRIPTION
This module implements the Punycode encoding, and only the Punycode encoding.
This module does not implement any other steps required for converting
internationalized domain names (IDNs) to and from ASCII. In particular, it does
not do any string preparation as specified by I<Nameprep>/I<IDNA2008>/I<PRECIS>
and does not add nor remove the ACE prefix (C<xn-->). Thus, use
L<Net::IDN::Encode> if you want to convert domain names.
Punycode is an instance of a more general algorithm called Bootstring, which
allows strings composed from a small set of "basic" code points to uniquely
represent any string of code points drawn from a larger set. Punycode is
Bootstring with particular parameter values appropriate for IDNA.
=head1 WARNING
You may be tempted to use this module directly and add/remove the ACE prefix
(C<xn-->) in your code for performance reasons. Usually, this is not a good
idea. If you convert domain labels (or other strings) without proper
preparation, you may end up with an ASCII encoding that is not interoperable or
even poses security issues due to spoofing.
Even if you think that your domain names are valid and already mapped to the
correct form, this may not be true. For example, some environments might
automatically convert your perfectly valid domain names to a different but
equivalent Unicode normalization form (e.g., NFD instead of NFC), which already
breaks IDNA.
=head1 FUNCTIONS
No functions are exported by default. You can use the tag C<:all>
or import them individually.
The following functions are available:
=over
=item encode_punycode($input)
Encodes C<$input> with Punycode and returns the result.
This function will throw an exception on invalid/unencodable input.
=item decode_punycode($input)
Decodes C<$input> with Punycode and returns the result.
This function will throw an exception on invalid input.
=back
=head1 AUTHORS
Tatsuhiko Miyagawa E<lt>miyagawa@bulknews.netE<gt> (versions 0.01 to 0.02)
Claus FE<auml>rber E<lt>CFAERBER@cpan.orgE<gt> (versions 1.000 and higher)
=head1 LICENSE
Copyright 2002-2004 Tatsuhiko Miyagawa E<lt>miyagawa@bulknews.netE<gt>
Copyright 2007-2014 Claus FE<auml>rber E<lt>CFAERBER@cpan.orgE<gt>
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
=head1 SEE ALSO
S<RFC 3492> (L<http://www.ietf.org/rfc/rfc3492.txt>),
L<IETF::ACE>, L<Convert::RACE>
=cut

264
lib/Net/IDN/Punycode.xs Normal file
View File

@ -0,0 +1,264 @@
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
#ifdef XS_VERSION
#undef XS_VERSION
#endif
#define XS_VERSION "2.500"
#define BASE 36
#define TMIN 1
#define TMAX 26
#define SKEW 38
#define DAMP 700
#define INITIAL_BIAS 72
#define INITIAL_N 128
#define isBASE(x) UTF8_IS_INVARIANT((unsigned char)x)
#define DELIM '-'
#define TMIN_MAX(t) (((t) < TMIN) ? (TMIN) : ((t) > TMAX) ? (TMAX) : (t))
#ifndef utf8_to_uvchr_buf
#define utf8_to_uvchr_buf(in_p,in_e,u8) utf8_to_uvchr(in_p,u8);
#endif
static char enc_digit[BASE] = {
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
};
static IV dec_digit[0x80] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 00..0F */
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 10..1F */
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 20..2F */
26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1, /* 30..3F */
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 40..4F */
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, /* 50..5F */
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 60..6F */
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, /* 70..7F */
};
static int adapt(int delta, int numpoints, int first) {
int k;
delta /= first ? DAMP : 2;
delta += delta/numpoints;
for(k=0; delta > ((BASE-TMIN) * TMAX)/2; k += BASE)
delta /= BASE-TMIN;
return k + (((BASE-TMIN+1) * delta) / (delta+SKEW));
};
static void
grow_string(SV *const sv, char **start, char **current, char **end, STRLEN add)
{
STRLEN len;
if(*current + add <= *end)
return;
len = (*current - *start);
*start = SvGROW(sv, (len + add + 15) & ~15);
*current = *start + len;
*end = *start + SvLEN(sv);
}
MODULE = Net::IDN::Punycode PACKAGE = Net::IDN::Punycode
SV*
encode_punycode(input)
SV * input
PREINIT:
UV c, m, n = INITIAL_N;
int k, q, t;
int bias = INITIAL_BIAS;
int delta = 0, skip_delta;
const char *in_s, *in_p, *in_e, *skip_p;
char *re_s, *re_p, *re_e;
int first = 1;
STRLEN length_guess, len, h, u8;
CODE:
in_s = in_p = SvPVutf8(input, len);
in_e = in_s + len;
length_guess = len;
if(length_guess < 64) length_guess = 64; /* optimise for maximum length of domain names */
length_guess += 2; /* plus DELIM + '\0' */
RETVAL = NEWSV('P',length_guess);
SvPOK_only(RETVAL);
re_s = re_p = SvPV_nolen(RETVAL);
re_e = re_s + SvLEN(RETVAL);
h = 0;
/* copy basic code points */
while(in_p < in_e) {
if( isBASE(*in_p) ) {
grow_string(RETVAL, &re_s, &re_p, &re_e, sizeof(char));
*re_p++ = *in_p;
h++;
}
in_p++;
}
/* add DELIM if needed */
if(h) {
grow_string(RETVAL, &re_s, &re_p, &re_e, sizeof(char));
*re_p++ = DELIM;
}
for(;;) {
/* find smallest code point not yet handled */
m = UV_MAX;
q = skip_delta = 0;
for(in_p = skip_p = in_s; in_p < in_e;) {
c = utf8_to_uvchr_buf((U8*)in_p, (U8*)in_e, &u8);
c = NATIVE_TO_UNI(c);
if(c >= n && c < m) {
m = c;
skip_p = in_p;
skip_delta = q;
}
if(c < n)
++q;
in_p += u8;
}
if(m == UV_MAX)
break;
/* increase delta to the state corresponding to
the m code point at the beginning of the string */
delta += (m-n) * (h+1);
n = m;
/* now find the chars to be encoded in this round */
delta += skip_delta;
for(in_p = skip_p; in_p < in_e;) {
c = utf8_to_uvchr_buf((U8*)in_p, (U8*)in_e, &u8);
c = NATIVE_TO_UNI(c);
if(c < n) {
++delta;
} else if( c == n ) {
q = delta;
for(k = BASE;; k += BASE) {
t = TMIN_MAX(k - bias);
if(q < t) break;
grow_string(RETVAL, &re_s, &re_p, &re_e, sizeof(char));
*re_p++ = enc_digit[t + ((q-t) % (BASE-t))];
q = (q-t) / (BASE-t);
}
if(q > BASE) croak("input exceeds punycode limit");
grow_string(RETVAL, &re_s, &re_p, &re_e, sizeof(char));
*re_p++ = enc_digit[q];
bias = adapt(delta, h+1, first);
delta = first = 0;
++h;
}
in_p += u8;
}
++delta;
++n;
}
grow_string(RETVAL, &re_s, &re_p, &re_e, sizeof(char));
*re_p = 0;
SvCUR_set(RETVAL, re_p - re_s);
OUTPUT:
RETVAL
SV*
decode_punycode(input)
SV * input
PREINIT:
UV c, n = INITIAL_N;
IV dc;
int i = 0, oldi, j, k, t, w;
int bias = INITIAL_BIAS;
int delta = 0, skip_delta;
const char *in_s, *in_p, *in_e, *skip_p;
char *re_s, *re_p, *re_e;
int first = 1;
STRLEN length_guess, len, h, u8;
CODE:
in_s = in_p = SvPV_nolen(input);
in_e = SvEND(input);
length_guess = SvCUR(input) * 2;
if(length_guess < 256) length_guess = 256;
RETVAL = NEWSV('D',length_guess);
SvPOK_only(RETVAL);
re_s = re_p = SvPV_nolen(RETVAL);
re_e = re_s + SvLEN(RETVAL);
skip_p = NULL;
for(in_p = in_s; in_p < in_e; in_p++) {
c = *in_p; /* we don't care whether it's UTF-8 */
if(!isBASE(c)) croak("non-base character in input for decode_punycode");
if(c == DELIM) skip_p = in_p;
grow_string(RETVAL, &re_s, &re_p, &re_e, 1);
*re_p++ = c; /* copy it */
}
if(skip_p) {
h = skip_p - in_s; /* base chars handled */
re_p = re_s + h; /* points to end of base chars */
skip_p++; /* skip over DELIM */
} else {
h = 0; /* no base chars */
re_p = re_s;
skip_p = in_s; /* read everything */
}
for(in_p = skip_p; in_p < in_e; i++) {
oldi = i;
w = 1;
for(k = BASE;; k+= BASE) {
if(!(in_p < in_e)) croak("incomplete encoded code point in decode_punycode");
dc = dec_digit[*in_p++]; /* we already know it's in 0..127 */
if(dc < 0) croak("invalid digit in input for decode_punycode");
c = (UV)dc;
i += c * w;
t = TMIN_MAX(k - bias);
if(c < t) break;
w *= BASE-t;
}
h++;
bias = adapt(i-oldi, h, first);
first = 0;
n += i / h; /* code point n to insert */
i = i % h; /* at position i */
u8 = UNISKIP(n); /* how many bytes we need */
j = i;
for(skip_p = re_s; j > 0; j--) /* find position in UTF-8 */
skip_p+=UTF8SKIP(skip_p);
grow_string(RETVAL, &re_s, &re_p, &re_e, u8);
if(skip_p < re_p) /* move succeeding chars */
Move(skip_p, skip_p + u8, re_p - skip_p, char);
re_p += u8;
uvuni_to_utf8_flags((U8*)skip_p, n, UNICODE_ALLOW_ANY);
}
if(!first) SvUTF8_on(RETVAL); /* UTF-8 chars have been inserted */
grow_string(RETVAL, &re_s, &re_p, &re_e, 1);
*re_p = 0;
SvCUR_set(RETVAL, re_p - re_s);
OUTPUT:
RETVAL

195
lib/Net/IDN/Punycode/PP.pm Normal file
View File

@ -0,0 +1,195 @@
package Net::IDN::Punycode::PP;
use 5.008;
use strict;
use utf8;
use warnings;
use Carp;
use Exporter;
our $VERSION = "2.500";
our @ISA = qw(Exporter);
our @EXPORT = ();
our @EXPORT_OK = qw(encode_punycode decode_punycode);
our %EXPORT_TAGS = ( 'all' => \@EXPORT_OK );
use integer;
use constant BASE => 36;
use constant TMIN => 1;
use constant TMAX => 26;
use constant SKEW => 38;
use constant DAMP => 700;
use constant INITIAL_BIAS => 72;
use constant INITIAL_N => 128;
use constant UNICODE_MIN => 0;
use constant UNICODE_MAX => 0x10FFFF;
my $Delimiter = chr 0x2D;
my $BasicRE = "\x00-\x7f";
my $PunyRE = "A-Za-z0-9";
sub _adapt {
my($delta, $numpoints, $firsttime) = @_;
$delta = int($firsttime ? $delta / DAMP : $delta / 2);
$delta += int($delta / $numpoints);
my $k = 0;
while ($delta > int(((BASE - TMIN) * TMAX) / 2)) {
$delta /= BASE - TMIN;
$k += BASE;
}
return $k + (((BASE - TMIN + 1) * $delta) / ($delta + SKEW));
}
sub decode_punycode {
die("Usage: Net::IDN::Punycode::decode_punycode(input)") unless @_;
no warnings 'utf8';
my $input = shift;
my $n = INITIAL_N;
my $i = 0;
my $bias = INITIAL_BIAS;
my @output;
return undef unless defined $input;
return '' unless length $input;
if($input =~ s/(.*)$Delimiter//os) {
my $base_chars = $1;
croak("non-base character in input for decode_punycode")
if $base_chars =~ m/[^$BasicRE]/os;
push @output, split //, $base_chars;
}
my $code = $input;
croak('invalid digit in input for decode_punycode') if $code =~ m/[^$PunyRE]/os;
utf8::downgrade($input); ## handling failure of downgrade is more expensive than
## doing the above regexp w/ utf8 semantics
while(length $code)
{
my $oldi = $i;
my $w = 1;
LOOP:
for (my $k = BASE; 1; $k += BASE) {
my $cp = substr($code, 0, 1, '');
croak("incomplete encoded code point in decode_punycode") if !defined $cp;
my $digit = ord $cp;
## NB: this depends on the PunyRE catching invalid digit characters
## before they turn up here
##
$digit = $digit < 0x40 ? $digit + (26-0x30) : ($digit & 0x1f) -1;
$i += $digit * $w;
my $t = $k - $bias;
$t = $t < TMIN ? TMIN : $t > TMAX ? TMAX : $t;
last LOOP if $digit < $t;
$w *= (BASE - $t);
}
$bias = _adapt($i - $oldi, @output + 1, $oldi == 0);
$n += $i / (@output + 1);
$i = $i % (@output + 1);
croak('invalid code point') if $n < UNICODE_MIN or $n > UNICODE_MAX;
splice(@output, $i, 0, chr($n));
$i++;
}
return join '', @output;
}
sub encode_punycode {
die("Usage: Net::IDN::Punycode::encode_punycode(input)") unless @_;
no warnings 'utf8';
my $input = shift;
my $input_length = length $input;
## my $output = join '', $input =~ m/([$BasicRE]+)/og; ## slower
my $output = $input; $output =~ s/[^$BasicRE]+//ogs;
my $h = my $bb = length $output;
$output .= $Delimiter if $bb > 0;
utf8::downgrade($output); ## no unnecessary use of utf8 semantics
my @input = map ord, split //, $input;
my @chars = sort { $a<=> $b } grep { $_ >= INITIAL_N } @input;
my $n = INITIAL_N;
my $delta = 0;
my $bias = INITIAL_BIAS;
foreach my $m (@chars) {
next if $m < $n;
$delta += ($m - $n) * ($h + 1);
$n = $m;
for(my $i = 0; $i < $input_length; $i++)
{
my $c = $input[$i];
$delta++ if $c < $n;
if ($c == $n) {
my $q = $delta;
LOOP:
for (my $k = BASE; 1; $k += BASE) {
my $t = $k - $bias;
$t = $t < TMIN ? TMIN : $t > TMAX ? TMAX : $t;
last LOOP if $q < $t;
my $o = $t + (($q - $t) % (BASE - $t));
$output .= chr $o + ($o < 26 ? 0x61 : 0x30-26);
$q = int(($q - $t) / (BASE - $t));
}
croak("input exceeds punycode limit") if $q > BASE;
$output .= chr $q + ($q < 26 ? 0x61 : 0x30-26);
$bias = _adapt($delta, $h + 1, $h == $bb);
$delta = 0;
$h++;
}
}
$delta++;
$n++;
}
return $output;
}
1;
__END__
=head1 NAME
Net::IDN::Punycode::PP - pure-perl implementation of Net::IDN::Punycode
=head1 DESCRIPTION
See L<Net::IDN::Punycode>.
=head1 AUTHORS
Tatsuhiko Miyagawa E<lt>miyagawa@bulknews.netE<gt> (versions 0.01 to 0.02)
Claus FE<auml>rber E<lt>CFAERBER@cpan.orgE<gt> (from version 1.00)
=head1 LICENSE
Copyright 2002-2004 Tatsuhiko Miyagawa E<lt>miyagawa@bulknews.netE<gt>
Copyright 2007-2018 Claus FE<auml>rber E<lt>CFAERBER@cpan.orgE<gt>
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
=head1 SEE ALSO
S<RFC 3492> (L<http://www.ietf.org/rfc/rfc3492.txt>),
L<IETF::ACE>, L<Convert::RACE>
=cut

96
lib/Net/IDN/Standards.pod Executable file
View File

@ -0,0 +1,96 @@
=encoding utf8
=head1 NAME
Net::IDN::Standards -- Internationalized Domain Names for Applications (IDNA)
=head1 INTRODUCTION
Historically, domain names and host names were restricted to a
limited repertoire of ASCII characters, i.e. letters, digits and
the hyphen (i.e. C</[A-Z0-9-]/i>). Words and names from languages
that require additional characters (such as diacritics or special
characters) or other scripts could not be used.
Internationalized Domain Names (IDNs) extend the character
repertoire for domain names from ASCII to Unicode while
maintaining backwards compatibility with software that only
expects and handles ASCII characters.
In order to do so, Unicode domain names are converted to ASCII
using an ASCII-compatible encoding (ACE) called Punycode. On the
wire, converted domain names start with C<xn-->, followed by the
ASCII encoding of the Unicode string. The Unicode version is
typically only shown in applications presenting the domain to the
user (hence Internationalized Domain Names for Applications,
IDNA). Internationalized Resource Identifiers (IRIs), the
Unicode version of URLs, may also include domain names in their
Unicode form.
The IDNA specifications, however, do not only cover the actual
Punycode conversion but also include extensive rules for
preparation (mapping and/or validation) of input strings. They
typically define two functions, C<ToASCII> and C<ToUnicode>, which
prepare and convert a domain name to the ACE version or the
Unicode version.
=head1 DIFFERENT STANDARDS
"The nice thing about standards is that you have so many to
choose from."
-- Andrew S. Tanenbaum
While the actual Punycode conversion is stable, there are different
specifications regarding mapping and/or validation (preparation):
=head2 IDNA2003
IDNA2003, which is defined in S<RFC 3490>
(L<http://tools.ietf.org/html/rfc3490>) and related documents, was
the original specification for the internationalization of domain
names.
However, some issues were subsequently identified with IDNA2003:
The specification was tied to Unicode 3.2 and therefore did not
allow characters added in newer versions of Unicode (without
updating the specifications).
Furthermore, a few characters were mapped to other characters or
deleted although they would carry meaning in some languages (i.e.
'ß' and 'ς' were mapped to 'ss' and 'σ'; ZWJ and ZWNJ were always
mapped to nothing, although some scripts like Arabic require them
for correct display).
=head2 IDNA2008
IDNA2008, which is defined in S<RFC 5890>
(L<http://tools.ietf.org/html/rfc5890>) and related documents, resolves the
issues found in IDNA2003.
This was done by allowing some characters that would either be
mapped to other characters, mapped to zero and/or cause the
preparation to fail. The new domain names would not be accessible
by IDNA2003 implementations, of course.
However, IDNA2008 also disallowed a large number of characters
that had been allowed in IDNA2003 (mostly symbols). An
implementation of IDNA2008 would therefore no longer be able to
access domain names such as C<√.com>, which had been registered
under IDNA2003.
=head2 UTS #46
Unicode Technical Standard #46 (UTS #46,
L<http://unicode.org/reports/tr46/>) solves this problem by
allowing domain names that are valid in either IDNA2003 or
IDNA2008.
This makes UTS #46 the perfect fit for domain lookup (be liberal
in what you accept) but unsuitable for validating domain names
prior to registration (be conservative in what you send).
=head1 AUTHOR
Claus FE<auml>rber <CFAERBER@cpan.org>
=cut

449
lib/Net/IDN/UTS46.pm Normal file
View File

@ -0,0 +1,449 @@
package Net::IDN::UTS46;
require 5.008005; # Unicode BiDi classes
use strict;
use utf8;
use warnings;
use Carp;
our $VERSION = "2.500";
$VERSION = eval $VERSION;
our @ISA = ('Exporter');
our @EXPORT = ();
our @EXPORT_OK = ('uts46_to_ascii', 'uts46_to_unicode');
our %EXPORT_TAGS = ( 'all' => \@EXPORT_OK );
use Unicode::Normalize ();
use Net::IDN::Punycode 1.1 (':all');
use Net::IDN::Encode 2.100 (':_var');
use Net::IDN::UTS46::_Mapping 5.002 ('/^(Is|Map).*/'); # UTS #46 is only defined from Unicode 5.2.0
sub uts46_to_unicode {
my ($label, %param) = @_;
croak "Transitional processing is not defined for ToUnicode" if $param{'TransitionalProcessing'};
splice @_, 1, 0, undef;
goto &_process;
}
sub uts46_to_ascii {
my ($label, %param) = @_;
splice @_, 1, 0, sub {
local $_ = shift;
if(m/\P{ASCII}/) {
eval { $_ = $IDNA_PREFIX . encode_punycode($_) };
croak "$@ [A3]" if $@;
}
return $_;
};
goto &_process;
}
*to_unicode = \&uts46_to_unicode;
*to_ascii = \&uts46_to_ascii;
sub _process {
my ($label, $to_ascii, %param) = @_;
no warnings 'utf8';
croak "The following parameter is invalid: $_"
foreach(grep { !m/^(?:TransitionalProcessing|UseSTD3ASCIIRules|AllowUnassigned)$/ } keys %param);
$param{'TransitionalProcessing'} = 0 unless exists $param{'TransitionalProcessing'};
$param{'UseSTD3ASCIIRules'} = 1 unless exists $param{'UseSTD3ASCIIRules'};
$param{'AllowUnassigned'} = 0 unless exists $param{'AllowUnassigned'};
# 1. Map
# - disallowed
#
if($param{'AllowUnassigned'}) {
$label =~ m/(\p{Is_DisallowedAssigned})/ and croak sprintf('disallowed character U+%04X', ord($1));
} else {
$label =~ m/(\p{IsDisallowed})/ and croak sprintf('disallowed character U+%04X', ord($1));
}
if($param{'UseSTD3ASCIIRules'}) {
$label =~ m/(\p{IsDisallowedSTD3Valid})/ and croak sprintf('disallowed_STD3_valid character U+%04X', ord($1));
$label =~ m/(\p{IsDisallowedSTD3Mapped})/ and croak sprintf('disallowed_STD3_mapped character U+%04X', ord($1));
};
# - ignored
#
$label = MapIgnored($label);
## $label = MapDisallowedSTD3Ignored($label) if(!$param{'UseSTD3ASCIIRules'});
# - mapped
#
$label = MapMapped($label);
$label = MapDisallowedSTD3Mapped($label) if(!$param{'UseSTD3ASCIIRules'});
# - deviation
$label = MapDeviation($label) if($param{'TransitionalProcessing'});
# 2. Normalize
#
$label = Unicode::Normalize::NFC($label);
# 3. Break
#
my @ll = split /\./, $label, -1;
## IDNA test vectors: an empty label at the end (separating the root domain
## "", if present) must be preserved. It is not checked for
## the minumum length criteria and the dot separting it is
## not included in the maximum length of the domain.
##
my $rooted = @ll && length($ll[$#ll]) < 1; pop @ll if $rooted;
my $is_bidi = 0;
# 4. Convert/Validate
#
foreach my $l (@ll) {
if($l =~ m/^(?:(?i)$IDNA_PREFIX)(\p{ASCII}+)$/o) {
eval { $l = decode_punycode($1); };
croak 'Invalid Punycode sequence [P4]' if $@;
_validate_label($l, %param,
'TransitionalProcessing' => 0,
) unless $@;
} else {
_validate_label($l,%param,'_AssumeNFC' => 1);
}
$is_bidi = 1 if !$is_bidi && $l =~ m/[\p{Bc:R}\p{Bc:AL}\p{Bc:AN}]/;
}
foreach my $l (@ll) {
_validate_bidi($l,%param) if $is_bidi;
_validate_contextj($l,%param);
if(defined $to_ascii) {
$l = $to_ascii->($l, %param);
}
## IDNA test vectors: labels have to be checked for the minimum length of 1 (but not for the
## maximum length of 63) even in to_unicode.
##
croak "empty label [A4_2]" if length($l) < 1;
croak "label too long [A4_2]" if length($l) > 63 and defined $to_ascii;
}
my $domain = join('.', @ll);
## IDNA test vectors: domains have to be checked for the minimum length of 1 (but not for the
## maximum length of 253 excluding a final dot) even in to_unicode.
##
croak "empty domain name [A4_1]" if length($domain) < 1;
croak "domain name too long [A4_1]" if length($domain) > 253 and defined $to_ascii;
$domain .= '.' if $rooted;
return $domain;
}
sub _validate_label {
my($l,%param) = @_;
no warnings 'utf8';
$l eq Unicode::Normalize::NFC($l) or croak "not in Unicode Normalization Form NFC [V1]" unless $param{'_AssumeNFC'};
$l =~ m/^..--/ and croak "contains U+002D HYPHEN-MINUS in both third and forth position [V2]";
$l =~ m/^-/ and croak "begins with U+002D HYPHEN-MINUS [V3]";
$l =~ m/-$/ and croak "ends with U+002D HYPHEN-MINUS [V3]";
$l =~ m/\./ and croak "contains U+0023 FULL STOP [V4]";
$l =~ m/^\p{IsMark}/ and croak "begins with General_Category=Mark [V5]";
unless($param{'AllowUnassigned'}) {
$l =~m/(\p{Unassigned})/ and croak sprintf "contains unassigned character U+%04X [V6]", ord $1;
}
if($param{'UseSTD3ASCIIRules'}) {
$l =~m/(\p{IsDisallowedSTD3Valid})/ and croak sprintf "contains disallowed_STD3_valid character U+%04X [V6]", ord $1;
}
if($param{'TransitionalProcessing'}) {
$l =~ m/(\p{IsDeviation})/ and croak sprintf "contains deviation character U+%04X [V6]", ord $1;
}
$l =~ m/(\p{IsIgnored})/ and croak sprintf "contains ignored character U+%04X [V6]", ord $1;
$l =~ m/(\p{IsMapped}|\p{IsDisallowedSTD3Mapped})/ and croak sprintf "contains mapped character U+%04X [V6]", ord $1;
$l =~ m/(\p{IsDisallowed})/ and croak sprintf "contains disallowed character U+%04X [V6]", ord $1;
return 1;
}
# For perl versions < 5.11, there is a bug where Bc:L does not match some
# character blocks that are not fully included in the main UnicodeData.txt file:
#
# 3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
# 4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;
# 4E00;<CJK Ideograph, First>;Lo;0;L;;;;;N;;;;;
# 9FBB;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;;
# AC00;<Hangul Syllable, First>;Lo;0;L;;;;;N;;;;;
# D7A3;<Hangul Syllable, Last>;Lo;0;L;;;;;N;;;;;
# 20000;<CJK Ideograph Extension B, First>;Lo;0;L;;;;;N;;;;;
# 2A6D6;<CJK Ideograph Extension B, Last>;Lo;0;L;;;;;N;;;;;
#
my $_RE_BidiClass_L = $] >= 5.011 ? '\p{Bc:L}' : '\p{Bc:L}\x{3400}-\x{4DB5}\x{4E00}-\x{9FBB}\x{AC00}-\x{D7A3}\x{20000}-\x{2A6D6}';
sub _validate_bidi {
my($l,%param) = @_;
no warnings 'utf8';
return 1 unless length($l);
if( $l =~ m/^[$_RE_BidiClass_L]/o ) { # LTR (left-to-right)
$l =~ m/[^$_RE_BidiClass_L\p{Bc:EN}\p{Bc:ES}\p{Bc:CS}\p{Bc:ET}\p{Bc:BN}\p{Bc:ON}\p{Bc:NSM}]/o and croak 'contains characters with wrong bidi class for LTR [B5]';
$l =~ m/[$_RE_BidiClass_L\p{Bc:EN}][\p{Bc:NSM}\P{Assigned}]*$/o or croak 'ends with character of wrong bidi class for LTR [B6]';
return 1;
}
if( $l =~ m/^[\p{Bc:R}\p{Bc:AL}]/ ) { # RTL (right-to-left)
$l =~ m/[^\p{Bc:R}\p{Bc:AL}\p{Bc:AN}\p{Bc:EN}\p{Bc:ES}\p{Bc:CS}\p{Bc:ET}\p{Bc:ON}\p{Bc:BN}\p{Bc:NSM}]/ and croak 'contains characters with wrong bidi class for RTL [B2]';
$l =~ m/[\p{Bc:R}\p{Bc:AL}\p{Bc:EN}\p{Bc:AN}][\p{Bc:NSM}\P{Assigned}]*$/ or croak 'ends with character of wrong bidi class for RTL [B3]';
$l =~ m/\p{Bc:EN}.*\p{Bc:AN}|\p{Bc:AN}.*\p{Bc:EN}/ and croak 'contains characters with both bidi class EN and AN [B4]';
return 1;
}
croak 'starts with character of wrong bidi class [B1]';
}
# For perl versions < 5.11, some Unicode properties such as Ccc or Joining_Type
# are not supported. Instead, we use a conrete list of characters; this is safe
# because the Unicode version supported by theses perl versions will not be
# updated. For newer perl versions, we use the Unicode property (which is
# supported from 5.11), so we will always be up-to-date with the Unicode
# version supported by our underlying perl.
#
my $_RE_Ccc_Virama = $] >= 5.011 ? qr/\p{Ccc:Virama}/ : qr/[\x{094D}\x{09CD}\x{0A4D}\x{0ACD}\x{0B4D}\x{0BCD}\x{0C4D}\x{0CCD}\x{0D4D}\x{0DCA}\x{0E3A}\x{0F84}\x{1039}\x{103A}\x{1714}\x{1734}\x{17D2}\x{1A60}\x{1B44}\x{1BAA}\x{1BF2}\x{1BF3}\x{2D7F}\x{A806}\x{A8C4}\x{A953}\x{A9C0}\x{ABED}\x{00010A3F}\x{00011046}\x{000110B9}]/;
my $_RE_JoiningType_L = $] >= 5.011 ? qr/\p{Joining_Type:L}/ : qr/(?!)/;
my $_RE_JoiningType_R = $] >= 5.011 ? qr/\p{Joining_Type:R}/ : qr/[\x{0622}-\x{0625}\x{0627}\x{0629}\x{062F}-\x{0632}\x{0648}\x{0671}-\x{0673}\x{0675}-\x{0677}\x{0688}-\x{0699}\x{06C0}\x{06C3}-\x{06CB}\x{06CD}\x{06CF}\x{06D2}\x{06D3}\x{06D5}\x{06EE}\x{06EF}\x{0710}\x{0715}-\x{0719}\x{071E}\x{0728}\x{072A}\x{072C}\x{072F}\x{074D}\x{0759}-\x{075B}\x{076B}\x{076C}\x{0771}\x{0773}\x{0774}\x{0778}\x{0779}]/;
my $_RE_JoiningType_D = $] >= 5.011 ? qr/\p{Joining_Type:D}/ : qr/[\x{0620}\x{0626}\x{0628}\x{062A}-\x{062E}\x{0633}-\x{063F}\x{0641}-\x{0647}\x{0649}\x{064A}\x{066E}\x{066F}\x{0678}-\x{0687}\x{069A}-\x{06BF}\x{06C1}\x{06C2}\x{06CC}\x{06CE}\x{06D0}\x{06D1}\x{06FA}-\x{06FC}\x{06FF}\x{0712}-\x{0714}\x{071A}-\x{071D}\x{071F}-\x{0727}\x{0729}\x{072B}\x{072D}\x{072E}\x{074E}-\x{0758}\x{075C}-\x{076A}\x{076D}-\x{0770}\x{0772}\x{0775}-\x{0777}\x{077A}-\x{077F}\x{07CA}-\x{07EA}]/;
my $_RE_JoiningType_T = $] >= 5.011 ? qr/\p{Joining_Type:T}/ : qr/[\x{00AD}\x{0300}-\x{036F}\x{0483}-\x{0489}\x{0591}-\x{05BD}\x{05BF}\x{05C1}\x{05C2}\x{05C4}\x{05C5}\x{05C7}\x{0610}-\x{061A}\x{064B}-\x{065F}\x{0670}\x{06D6}-\x{06DC}\x{06DF}-\x{06E4}\x{06E7}\x{06E8}\x{06EA}-\x{06ED}\x{070F}\x{0711}\x{0730}-\x{074A}\x{07A6}-\x{07B0}\x{07EB}-\x{07F3}\x{0816}-\x{0819}\x{081B}-\x{0823}\x{0825}-\x{0827}\x{0829}-\x{082D}\x{0859}-\x{085B}\x{0900}-\x{0902}\x{093A}\x{093C}\x{0941}-\x{0948}\x{094D}\x{0951}-\x{0957}\x{0962}\x{0963}\x{0981}\x{09BC}\x{09C1}-\x{09C4}\x{09CD}\x{09E2}\x{09E3}\x{0A01}\x{0A02}\x{0A3C}\x{0A41}\x{0A42}\x{0A47}\x{0A48}\x{0A4B}-\x{0A4D}\x{0A51}\x{0A70}\x{0A71}\x{0A75}\x{0A81}\x{0A82}\x{0ABC}\x{0AC1}-\x{0AC5}\x{0AC7}\x{0AC8}\x{0ACD}\x{0AE2}\x{0AE3}\x{0B01}\x{0B3C}\x{0B3F}\x{0B41}-\x{0B44}\x{0B4D}\x{0B56}\x{0B62}\x{0B63}\x{0B82}\x{0BC0}\x{0BCD}\x{0C3E}-\x{0C40}\x{0C46}-\x{0C48}\x{0C4A}-\x{0C4D}\x{0C55}\x{0C56}\x{0C62}\x{0C63}\x{0CBC}\x{0CBF}\x{0CC6}\x{0CCC}\x{0CCD}\x{0CE2}\x{0CE3}\x{0D41}-\x{0D44}\x{0D4D}\x{0D62}\x{0D63}\x{0DCA}\x{0DD2}-\x{0DD4}\x{0DD6}\x{0E31}\x{0E34}-\x{0E3A}\x{0E47}-\x{0E4E}\x{0EB1}\x{0EB4}-\x{0EB9}\x{0EBB}\x{0EBC}\x{0EC8}-\x{0ECD}\x{0F18}\x{0F19}\x{0F35}\x{0F37}\x{0F39}\x{0F71}-\x{0F7E}\x{0F80}-\x{0F84}\x{0F86}\x{0F87}\x{0F8D}-\x{0F97}\x{0F99}-\x{0FBC}\x{0FC6}\x{102D}-\x{1030}\x{1032}-\x{1037}\x{1039}\x{103A}\x{103D}\x{103E}\x{1058}\x{1059}\x{105E}-\x{1060}\x{1071}-\x{1074}\x{1082}\x{1085}\x{1086}\x{108D}\x{109D}\x{135D}-\x{135F}\x{1712}-\x{1714}\x{1732}-\x{1734}\x{1752}\x{1753}\x{1772}\x{1773}\x{17B4}\x{17B5}\x{17B7}-\x{17BD}\x{17C6}\x{17C9}-\x{17D3}\x{17DD}\x{180B}-\x{180D}\x{18A9}\x{1920}-\x{1922}\x{1927}\x{1928}\x{1932}\x{1939}-\x{193B}\x{1A17}\x{1A18}\x{1A56}\x{1A58}-\x{1A5E}\x{1A60}\x{1A62}\x{1A65}-\x{1A6C}\x{1A73}-\x{1A7C}\x{1A7F}\x{1B00}-\x{1B03}\x{1B34}\x{1B36}-\x{1B3A}\x{1B3C}\x{1B42}\x{1B6B}-\x{1B73}\x{1B80}\x{1B81}\x{1BA2}-\x{1BA5}\x{1BA8}\x{1BA9}\x{1BE6}\x{1BE8}\x{1BE9}\x{1BED}\x{1BEF}-\x{1BF1}\x{1C2C}-\x{1C33}\x{1C36}\x{1C37}\x{1CD0}-\x{1CD2}\x{1CD4}-\x{1CE0}\x{1CE2}-\x{1CE8}\x{1CED}\x{1DC0}-\x{1DE6}\x{1DFC}-\x{1DFF}\x{200B}\x{200E}\x{200F}\x{202A}-\x{202E}\x{2060}-\x{2064}\x{206A}-\x{206F}\x{20D0}-\x{20F0}\x{2CEF}-\x{2CF1}\x{2D7F}\x{2DE0}-\x{2DFF}\x{302A}-\x{302F}\x{3099}\x{309A}\x{A66F}-\x{A672}\x{A67C}\x{A67D}\x{A6F0}\x{A6F1}\x{A802}\x{A806}\x{A80B}\x{A825}\x{A826}\x{A8C4}\x{A8E0}-\x{A8F1}\x{A926}-\x{A92D}\x{A947}-\x{A951}\x{A980}-\x{A982}\x{A9B3}\x{A9B6}-\x{A9B9}\x{A9BC}\x{AA29}-\x{AA2E}\x{AA31}\x{AA32}\x{AA35}\x{AA36}\x{AA43}\x{AA4C}\x{AAB0}\x{AAB2}-\x{AAB4}\x{AAB7}\x{AAB8}\x{AABE}\x{AABF}\x{AAC1}\x{ABE5}\x{ABE8}\x{ABED}\x{FB1E}\x{FE00}-\x{FE0F}\x{FE20}-\x{FE26}\x{FEFF}\x{FFF9}-\x{FFFB}\x{101FD}\x{10A01}-\x{10A03}\x{10A05}\x{10A06}\x{10A0C}-\x{10A0F}\x{10A38}-\x{10A3A}\x{10A3F}\x{11001}\x{11038}-\x{11046}\x{11080}\x{11081}\x{110B3}-\x{110B6}\x{110B9}\x{110BA}\x{110BD}\x{1D167}-\x{1D169}\x{1D173}-\x{1D182}\x{1D185}-\x{1D18B}\x{1D1AA}-\x{1D1AD}\x{1D242}-\x{1D244}\x{E0001}\x{E0020}-\x{E007F}\x{E0100}-\x{E01EF}]/;
sub _validate_contextj {
my($l,%param) = @_;
no warnings 'utf8';
return 1 unless defined($l) && length($l);
# catch ContextJ characters without defined rule (as of Unicode 6.0.0, this cannot match)
#
$l =~ m/([^\x{200C}\x{200D}\P{Join_Control}])/ and croak sprintf "contains CONTEXTJ character U+%04X without defined rule [C1]", ord($1);
# RFC 5892, Appendix A.1. ZERO WIDTH NON-JOINER
# Code point:
# U+200C
#
# Overview:
# This may occur in a formally cursive script (such as Arabic) in a
# context where it breaks a cursive connection as required for
# orthographic rules, as in the Persian language, for example. It
# also may occur in Indic scripts in a consonant-conjunct context
# (immediately following a virama), to control required display of
# such conjuncts.
#
#
# Lookup:
# True
#
# Rule Set:
# False;
# If Canonical_Combining_Class(Before(cp)) .eq. Virama Then True;
# If RegExpMatch((Joining_Type:{L,D})(Joining_Type:T)*\u200C
# (Joining_Type:T)*(Joining_Type:{R,D})) Then True;
$l =~ m/
$_RE_Ccc_Virama
\x{200C}
|
(?: $_RE_JoiningType_L | $_RE_JoiningType_D) $_RE_JoiningType_T*
\x{200C}
$_RE_JoiningType_T*(?: $_RE_JoiningType_R | $_RE_JoiningType_D)
|
(\x{200C})
/xo and defined($1) and croak sprintf "rule for CONTEXTJ character U+%04X not satisfied [C2]", ord($1);
# RFC 5892, Appendix A.2. ZERO WIDTH JOINER
#
# Code point:
# U+200D
#
# Overview:
# This may occur in Indic scripts in a consonant-conjunct context
# (immediately following a virama), to control required display of
# such conjuncts.
#
# Lookup:
# True
# Rule Set:
# False;
# If Canonical_Combining_Class(Before(cp)) .eq. Virama Then True;
$l =~ m/
$_RE_Ccc_Virama
\x{200D}
|
(\x{200D})
/xo and defined($1) and croak sprintf "rule for CONTEXTJ character U+%04X not satisfied [C2]", ord($1);
}
1;
__END__
=encoding utf8
=head1 NAME
Net::IDN::UTS46 - Unicode IDNA Compatibility Processing (S<UTS #46>)
=head1 SYNOPSIS
use Net::IDN:: ':all';
my $a = uts46_to_ascii("müller.example.org");
my $b = Net::IDN::UTS46::to_unicode('EXAMPLE.XN--11B5BS3A9AJ6G');
$domain =~ m/\P{Net::IDN::UTS46::IsDisallowed} and die 'oops';
=head1 DESCRIPTION
This module implements the Unicode Technical Standard #46 (Unicode IDNA
Compatibility Processing). UTS #46 is one variant of Internationalized Domain
Names (IDN), which aims to be compatible with domain names registered under
either IDNA2003 or IDNA2008.
You should use this module if you want an exact implementation of the UTS #46
specification.
However, if you just want to convert domain names and don't care which standard
is used internally, you should use L<Net::IDN::Encode> instead.
=head1 FUNCTIONS
By default, this module does not export any subroutines. You may use the
C<:all> tag to import everything.
You can omit the C<'uts46_'> prefix when accessing the functions with a
full-qualified module name (e.g. you can access C<uts46_to_unicode> as
C<Net::IDN::UTS46::uts46_to_unicode> or C<Net::IDN::UTS46::to_unicode>.
The following functions are available:
=over
=item uts46_to_ascii( $domain, %param )
Implements the "ToASCII" function from UTS #46, section 4.2. It converts a domain name to
ASCII and throws an exception on invalid input.
This function takes the following optional parameters (C<%param>):
=over
=item AllowUnassigned
(boolean) If set to a true value, unassigned code points in the label are
allowed. This is an extension over UTS #46.
The default is false.
=item UseSTD3ASCIIRules
(boolean) If set to a true value, checks the label for compliance with S<STD 3>
(S<RFC 1123>) syntax for host name parts.
The default is true.
=item TransitionalProcessing
(boolean) If set to true, the conversion will be compatible with IDNA2003. This
only affects four characters: C<'ß'> (U+00DF), 'ς' (U+03C2), ZWJ (U+200D) and
ZWNJ (U+200C). Usually, you will want to set this to false.
The default is false.
=back
=item uts46_to_unicode( $label, %param )
Implements the "ToUnicode" function from UTS #46, section 4.3. It converts a domain name to
Unicode and throws an exception on invalid input.
This function takes the following optional parameters (C<%param>):
=over
=item AllowUnassigned
see above.
=item UseSTD3ASCIIRules
see above.
=item TransitionalProcessing
(boolean) If given, this parameter must be false. The UTS #46 specification
does not define transitional processing for ToUnicode.
=back
=back
=head1 UNICODE CHARACTER PROPERTIES
This module also defines the character properties listed below.
Each character has exactly one of the following properties:
=over
=item C<\p{Net::IDN::UTS46::IsValid}>
The code point is valid, and not modified (i.e. a deviation character) in UTS #46.
=item C<\p{Net::IDN::UTS46::IsIgnored}>
The code point is removed (i.e. mapped to an empty string) in UTS #46.
=item C<\p{Net::IDN::UTS46::IsMapped}>
The code point is replaced by another string in UTS #46.
=item C<\p{Net::IDN::UTS46::IsDeviation}>
The code point is either mapped or valid, depending on whether the processing is transitional or not.
=item C<\p{Net::IDN::UTS46::IsDisallowed}>
The code point is not allowed in UTS #46.
=item C<\p{Net::IDN::UTS46::IsDisallowedSTD3Ignored}>
The code point is not allowed in UTS #46 if C<UseSTDASCIIRules> are used but would be ignored otherwise.
=item C<\p{Net::IDN::UTS46::IsDisallowedSTD3Mapped}>
The code point is not allowed in UTS #46 if C<UseSTDASCIIRules> are used but would be mapped otherwise.
=back
=head1 AUTHOR
Claus FE<auml>rber <CFAERBER@cpan.org>
=head1 LICENSE
Copyright 2011-2018 Claus FE<auml>rber.
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
=head1 SEE ALSO
L<Net::IDN::UTS46::Mapping>, L<Net::IDN::Encode>, S<UTS #46> (L<http://www.unicode.org/reports/tr46/>)

File diff suppressed because one or more lines are too long

12
t/00use.t Normal file
View File

@ -0,0 +1,12 @@
use strict;
use Test::More tests => 1 + 5;
use Test::NoWarnings;
use_ok 'Net::IDN::Encode';
use_ok 'Net::IDN::Punycode';
use_ok 'Net::IDN::Punycode::PP';
use_ok 'Net::IDN::UTS46';
use_ok 'Net::IDN::UTS46::_Mapping';
exit(0);

29
t/domain_to_ascii.t Executable file
View File

@ -0,0 +1,29 @@
use utf8;
use strict;
BEGIN { binmode STDOUT, ':utf8'; binmode STDERR, ':utf8'; }
use Net::IDN::Encode qw(:all);
use Test::More tests => 1 + 13;
use Test::NoWarnings;
use Net::IDN::Encode qw(:all);
is(eval{domain_to_ascii('müller')} || $@, 'xn--mller-kva', 'single label (to_ascii)');
is(eval{domain_to_ascii('XN--MLLER-KVA')} || $@, 'XN--MLLER-KVA', 'single uppercase label (to_ascii)');
is(eval{domain_to_ascii('www.jürg.xn--mller-kva.com', )} || $@, 'www.xn--jrg-hoa.xn--mller-kva.com', 'mixed utf8/ace/ascii');
is(eval{domain_to_ascii('www.a.b。cd。com', )} || $@, 'www.a.b.c.d.com', 'mixed dots');
is(eval{domain_to_ascii("www.\x{1F985}.example", AllowUnassigned => 1)} || $@, 'www.xn--4s9h.example', 'Unicode 9.0 emoji');
is(eval{domain_to_ascii('www.ä ö ü ß.example', 'UseSTD3ASCIIRules' => 0)}, 'www.xn-- -7kav3ivb.example', 'blank (without STD3 rules) (to_unicode)') or diag $@;
is(eval{domain_to_ascii('www.ä ö ü ß.example', 'UseSTD3ASCIIRules' => 1)}, undef, 'blank (with STD3 rules) (to_unicode)') or diag $@;
is(eval{domain_to_ascii('www.xn-- -7kav3ivb.example', 'UseSTD3ASCIIRules' => 0)}, 'www.xn-- -7kav3ivb.example', 'blank (without STD3 rules) (to_unicode pass-through)') or diag $@;
is(eval{domain_to_ascii('www.xn-- -7kav3ivb.example', 'UseSTD3ASCIIRules' => 1)}, 'www.xn-- -7kav3ivb.example', 'blank (with STD3 rules) (to_unicode pass-through)') or diag $@;
is(eval{domain_to_ascii("I.\x{2665}.Perl.invalid")}, 'I.xn--g6h.Perl.invalid', 'mixed case');
is(eval{domain_to_ascii("I.xn--g6h.Perl.invalid")}, 'I.xn--g6h.Perl.invalid', 'mixed case');
is(eval{domain_to_ascii('www.xn--garbage')}, 'www.xn--garbage', 'Invalid A-label');
is(eval{domain_to_ascii('_test._srv.müller.example.com')}, '_test._srv.xn--mller-kva.example.com', 'SRV record');

31
t/domain_to_unicode.t Executable file
View File

@ -0,0 +1,31 @@
use utf8;
use strict;
BEGIN { binmode STDOUT, ':utf8'; binmode STDERR, ':utf8'; }
use Test::More tests => 1 + 15;
use Test::NoWarnings;
use Net::IDN::Encode qw(:all);
is(eval{domain_to_unicode('xn--mller-kva')} || $@, 'müller', 'single label (to_unicode)');
is(eval{domain_to_unicode('XN--MLLER-KVA')} || $@, 'müller', 'single uppercase label (to_unicode)');
is(eval{domain_to_unicode('www.jürg.xn--mller-kva.com', )} || $@, 'www.jürg.müller.com', 'mixed utf8/ace/ascii (to_unicode)');
is(eval{domain_to_unicode('www.a.b。cd。com', )} || $@, 'www.a.b。cd。com', 'mixed dots (to_unicode)');
is(eval{domain_to_unicode("www.xn--4s9h.example", AllowUnassigned => 1)} || $@, "www.\x{1F985}.example", 'Unicode 9.0 emoji');
is(eval{domain_to_unicode('www.ä ö ü ß.example', 'UseSTD3ASCIIRules' => 0)}, 'www.ä ö ü ß.example', 'blank (without STD3 rules) (to_unicode)') or diag $@;
is(eval{domain_to_unicode('www.ä ö ü ß.example', 'UseSTD3ASCIIRules' => 1)}, undef, 'blank (without STD3 rules) (to_unicode pass-through)') or diag $@;
is(eval{domain_to_unicode('www.xn-- -7kav3ivb.example', 'UseSTD3ASCIIRules' => 0)}, 'www.ä ö ü ß.example', 'blank (with STD3 rules) (to_unicode)') or diag $@;
is(eval{domain_to_unicode('www.xn-- -7kav3ivb.example', 'UseSTD3ASCIIRules' => 1)}, undef, 'blank (with STD3 rules) (to_unicode pass-through)') or diag $@;
is(eval{domain_to_unicode("EXAMPLE.XN--11B5BS3A9AJ6G")}, 'EXAMPLE.परीक्षा', 'lowercase IDNA prefix') or diag $@;
is(eval{domain_to_unicode("EXAMPLE.xn--11B5BS3A9AJ6G")}, 'EXAMPLE.परीक्षा', 'uppercase IDNA prefix') or diag $@;
is(eval{domain_to_unicode("I.\x{2665}.Perl.invalid")}, "I.\x{2665}.Perl.invalid", 'mixed case');
is(eval{domain_to_unicode('I.xn--g6h.Perl.invalid')}, "I.\x{2665}.Perl.invalid", 'mixed case');
is(eval{domain_to_unicode('_test._srv.xn--mller-kva.example.com')}, '_test._srv.müller.example.com', 'SRV record');
is(eval{domain_to_unicode('xn--zcaa.de')}, 'ßß.de', 'bare ßß');

36
t/encode_bytes.t Normal file
View File

@ -0,0 +1,36 @@
use bytes;
use strict;
use Test::More tests => 24;
use Test::NoWarnings;
use Net::IDN::Encode qw(:all);
is(to_ascii('mueller'),'mueller');
is(to_ascii('xn--mller-kva'),'xn--mller-kva');
is(to_ascii('müller'),'xn--mller-kva');
is(to_unicode('mueller'),'mueller');
is(to_unicode('xn--mller-kva'),'müller');
is(to_unicode('müller'),'müller');
is(domain_to_ascii('mueller.example.com'),'mueller.example.com');
is(domain_to_ascii('xn--mller-kva.example.com'),'xn--mller-kva.example.com');
is(domain_to_ascii('müller.example.com'),'xn--mller-kva.example.com');
is(domain_to_unicode('mueller.example.com'),'mueller.example.com');
is(domain_to_unicode('xn--mller-kva.example.com'),'müller.example.com');
is(domain_to_unicode('müller.example.com'),'müller.example.com');
is(email_to_ascii('hans@mueller.example.com'),'hans@mueller.example.com');
is(email_to_ascii('hans@xn--mller-kva.example.com'),'hans@xn--mller-kva.example.com');
is(email_to_ascii('hans@müller.example.com'),'hans@xn--mller-kva.example.com');
is(email_to_ascii(''), '');
is(email_to_ascii(undef), undef);
is(email_to_ascii('test'), 'test');
is(email_to_unicode('hans@mueller.example.com'),'hans@mueller.example.com');
is(email_to_unicode('hans@xn--mller-kva.example.com'),'hans@müller.example.com');
is(email_to_unicode(''),'');
is(email_to_unicode(undef), undef);
is(email_to_unicode('test'),'test');

46
t/encode_utf8.t Normal file
View File

@ -0,0 +1,46 @@
use utf8;
use strict;
BEGIN { binmode STDOUT, ':utf8'; binmode STDERR, ':utf8'; }
use Test::More tests => 32;
use Test::NoWarnings;
use Net::IDN::Encode qw(:all);
is(to_ascii('mueller'),'mueller');
is(to_ascii('xn--mller-kva'),'xn--mller-kva');
is(to_ascii('müller'),'xn--mller-kva');
is(to_ascii('中央大学'),'xn--fiq80yua78t');
is(to_unicode('mueller'),'mueller');
is(to_unicode('xn--mller-kva'),'müller');
is(to_unicode('müller'),'müller');
is(to_unicode('xn--fiq80yua78t'),'中央大学');
is(domain_to_ascii('mueller.example.com'),'mueller.example.com');
is(domain_to_ascii('xn--mller-kva.example.com'),'xn--mller-kva.example.com');
is(domain_to_ascii('müller.example.com'),'xn--mller-kva.example.com');
is(domain_to_ascii('中央大学.tw'),'xn--fiq80yua78t.tw');
is(domain_to_unicode('mueller.example.com'),'mueller.example.com');
is(domain_to_unicode('xn--mller-kva.example.com'),'müller.example.com');
is(domain_to_unicode('müller.example.com'),'müller.example.com');
is(domain_to_unicode('xn--fiq80yua78t.tw'),'中央大学.tw');
is(email_to_ascii('hans@mueller.example.com'),'hans@mueller.example.com');
is(email_to_ascii('hans@xn--mller-kva.example.com'),'hans@xn--mller-kva.example.com');
is(email_to_ascii('hans@müller.example.com'),'hans@xn--mller-kva.example.com');
is(email_to_ascii('test中央大学.tw'),'test@xn--fiq80yua78t.tw');
is(email_to_ascii(''), '');
is(email_to_ascii(undef), undef);
is(email_to_ascii('test'), 'test');
is(email_to_unicode('hans@mueller.example.com'),'hans@mueller.example.com');
is(email_to_unicode('hansmueller.example.com'),'hansmueller.example.com');
is(email_to_unicode('hans@xn--mller-kva.example.com'),'hans@müller.example.com');
is(email_to_unicode('hansxn--mller-kva.example.com'),'hansmüller.example.com');
is(email_to_unicode('test@xn--fiq80yua78t.tw'),'test@中央大学.tw');
is(email_to_unicode(''),'');
is(email_to_unicode(undef), undef);
is(email_to_unicode('test'),'test');

144
t/punycode_vec-pp.t Normal file
View File

@ -0,0 +1,144 @@
use strict;
use utf8;
BEGIN { binmode STDOUT, ':utf8'; binmode STDERR, ':utf8'; }
use Test::More;
use Test::NoWarnings;
use Net::IDN::Punycode::PP ':all';
our @idna = (
["Arabic (Egyptian)",
"\x{0644}\x{064A}\x{0647}\x{0645}\x{0627}\x{0628}\x{062A}\x{0643}".
"\x{0644}\x{0645}\x{0648}\x{0634}\x{0639}\x{0631}\x{0628}\x{064A}\x{061F}",
"egbpdaj6bu4bxfgehfvwxn", 0, 0, 1, 1 ],
["Chinese (simplified)",
"\x{4ED6}\x{4EEC}\x{4E3A}\x{4EC0}\x{4E48}\x{4E0D}\x{8BF4}\x{4E2D}".
"\x{6587}",
"ihqwcrb4cv8a8dqg056pqjye", 0, 0, 1, 1 ],
["Chinese (traditional)",
"\x{4ED6}\x{5011}\x{7232}\x{4EC0}\x{9EBD}\x{4E0D}\x{8AAA}\x{4E2D}".
"\x{6587}",
"ihqwctvzc91f659drss3x8bo0yb", 0, 0, 1, 1 ],
["Czech",
"\x{0050}\x{0072}\x{006F}\x{010D}\x{0070}\x{0072}\x{006F}\x{0073}".
"\x{0074}\x{011B}\x{006E}\x{0065}\x{006D}\x{006C}\x{0075}\x{0076}\x{00ED}".
"\x{010D}\x{0065}\x{0073}\x{006B}\x{0079}",
"Proprostnemluvesky-uyb24dma41a", 0, 0, 1, 1 ],
["Hebrew",
"\x{05DC}\x{05DE}\x{05D4}\x{05D4}\x{05DD}\x{05E4}\x{05E9}\x{05D5}".
"\x{05D8}\x{05DC}\x{05D0}\x{05DE}\x{05D3}\x{05D1}\x{05E8}\x{05D9}\x{05DD}".
"\x{05E2}\x{05D1}\x{05E8}\x{05D9}\x{05EA}",
"4dbcagdahymbxekheh6e0a7fei0b", 0, 0, 1, 1 ],
["Hindi (Devanagari)",
"\x{092F}\x{0939}\x{0932}\x{094B}\x{0917}\x{0939}\x{093F}\x{0928}".
"\x{094D}\x{0926}\x{0940}\x{0915}\x{094D}\x{092F}\x{094B}\x{0902}\x{0928}".
"\x{0939}\x{0940}\x{0902}\x{092C}\x{094B}\x{0932}\x{0938}\x{0915}\x{0924}".
"\x{0947}\x{0939}\x{0948}\x{0902}",
"i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd", 0, 0, 1 ],
["Japanese (kanji and hiragana)",
"\x{306A}\x{305C}\x{307F}\x{3093}\x{306A}\x{65E5}\x{672C}\x{8A9E}".
"\x{3092}\x{8A71}\x{3057}\x{3066}\x{304F}\x{308C}\x{306A}\x{3044}\x{306E}".
"\x{304B}",
"n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa", 0, 0, 1 ],
["Russian (Cyrillic)",
"\x{043F}\x{043E}\x{0447}\x{0435}\x{043C}\x{0443}\x{0436}\x{0435}".
"\x{043E}\x{043D}\x{0438}\x{043D}\x{0435}\x{0433}\x{043E}\x{0432}\x{043E}".
"\x{0440}\x{044F}\x{0442}\x{043F}\x{043E}\x{0440}\x{0443}\x{0441}\x{0441}".
"\x{043A}\x{0438}",
"b1abfaaepdrnnbgefbadotcwatmq2g4l", 0, 0, 1, 1 ],
["Spanish",
"\x{0050}\x{006F}\x{0072}\x{0071}\x{0075}\x{00E9}\x{006E}\x{006F}".
"\x{0070}\x{0075}\x{0065}\x{0064}\x{0065}\x{006E}\x{0073}\x{0069}\x{006D}".
"\x{0070}\x{006C}\x{0065}\x{006D}\x{0065}\x{006E}\x{0074}\x{0065}\x{0068}".
"\x{0061}\x{0062}\x{006C}\x{0061}\x{0072}\x{0065}\x{006E}\x{0045}\x{0073}".
"\x{0070}\x{0061}\x{00F1}\x{006F}\x{006C}",
"PorqunopuedensimplementehablarenEspaol-fmd56a", 0, 0, 1 ],
["Vietnamese",
"\x{0054}\x{1EA1}\x{0069}\x{0073}\x{0061}\x{006F}\x{0068}\x{1ECD}".
"\x{006B}\x{0068}\x{00F4}\x{006E}\x{0067}\x{0074}\x{0068}\x{1EC3}\x{0063}".
"\x{0068}\x{1EC9}\x{006E}\x{00F3}\x{0069}\x{0074}\x{0069}\x{1EBF}\x{006E}".
"\x{0067}\x{0056}\x{0069}\x{1EC7}\x{0074}",
"TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g", 0, 0, 1 ],
["Japanese",
"\x{0033}\x{5E74}\x{0042}\x{7D44}\x{91D1}\x{516B}\x{5148}\x{751F}",
"3B-ww4c5e180e575a65lsy2b", 0, 0, 1, 1 ],
["Japanese",
"\x{5B89}\x{5BA4}\x{5948}\x{7F8E}\x{6075}\x{002D}\x{0077}\x{0069}".
"\x{0074}\x{0068}\x{002D}\x{0053}\x{0055}\x{0050}\x{0045}\x{0052}\x{002D}".
"\x{004D}\x{004F}\x{004E}\x{004B}\x{0045}\x{0059}\x{0053}",
"-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n", 0, 0, 1 ],
["Japanese",
"\x{0048}\x{0065}\x{006C}\x{006C}\x{006F}\x{002D}\x{0041}\x{006E}".
"\x{006F}\x{0074}\x{0068}\x{0065}\x{0072}\x{002D}\x{0057}\x{0061}\x{0079}".
"\x{002D}\x{305D}\x{308C}\x{305E}\x{308C}\x{306E}\x{5834}\x{6240}",
"Hello-Another-Way--fc4qua05auwb3674vfr0b", 0, 0, 1 ],
["Japanese",
"\x{3072}\x{3068}\x{3064}\x{5C4B}\x{6839}\x{306E}\x{4E0B}\x{0032}",
"2-u9tlzr9756bt3uc0v", 0, 0, 1, 1 ],
["Japanese",
"\x{004D}\x{0061}\x{006A}\x{0069}\x{3067}\x{004B}\x{006F}\x{0069}".
"\x{3059}\x{308B}\x{0035}\x{79D2}\x{524D}",
"MajiKoi5-783gue6qz075azm5e", 0, 0, 1, 1 ],
["Japanese",
"\x{30D1}\x{30D5}\x{30A3}\x{30FC}\x{0064}\x{0065}\x{30EB}\x{30F3}".
"\x{30D0}",
"de-jg4avhby1noc0d", 0, 0, 1, 1 ],
["Japanese",
"\x{305D}\x{306E}\x{30B9}\x{30D4}\x{30FC}\x{30C9}\x{3067}",
"d9juau41awczczp", 0, 0, 1, 1 ],
["Greek",
"\x{03b5}\x{03bb}\x{03bb}\x{03b7}\x{03bd}\x{03b9}\x{03ba}\x{03ac}",
"hxargifdar", 0, 0, 1, 1 ],
["Maltese (Malti)",
"\x{0062}\x{006f}\x{006e}\x{0121}\x{0075}\x{0073}\x{0061}\x{0127}".
"\x{0127}\x{0061}",
"bonusaa-5bb1da", 0, 0, 1, 1 ],
["Russian (Cyrillic)",
"\x{043f}\x{043e}\x{0447}\x{0435}\x{043c}\x{0443}\x{0436}\x{0435}".
"\x{043e}\x{043d}\x{0438}\x{043d}\x{0435}\x{0433}\x{043e}\x{0432}\x{043e}".
"\x{0440}\x{044f}\x{0442}\x{043f}\x{043e}\x{0440}\x{0443}\x{0441}\x{0441}".
"\x{043a}\x{0438}",
"b1abfaaepdrnnbgefbadotcwatmq2g4l", 0, 0, 1, 1 ],
## Test vector from IdnaTest.txt
["Hebrew (combining)",
"\x{05D0}\x{0308}",
"ssa73l",
]
);
plan tests => ($#idna+1)*2 + 1;
foreach my $test (@idna)
{
my ($comment,$in,$out,$allowunassigned,$usestd3asciirules,$toascii,$tounicode) = @{$test};
is(encode_punycode($in), $out, $comment.' (encode_punycode)');
is(decode_punycode($out), $in, $comment.' (decode_punycode)');
}
# Test vectors extracted from:
#
# Nameprep and IDNA Test Vectors
# draft-josefsson-idn-test-vectors
#
# Copyright (C) The Internet Society (2003). All Rights Reserved.
#
# This document and translations of it may be copied and furnished
# to others, and derivative works that comment on or otherwise
# explain it or assist in its implementation may be prepared,
# copied, published and distributed, in whole or in part, without
# restriction of any kind, provided that the above copyright
# notice and this paragraph are included on all such copies and
# derivative works. However, this document itself may not be
# modified in any way, such as by removing the copyright notice or
# references to the Internet Society or other Internet
# organizations, except as needed for the purpose of developing
# Internet standards in which case the procedures for copyrights
# defined in the Internet Standards process must be followed, or
# as required to translate it into languages other than English.

153
t/punycode_vec-xs.t Normal file
View File

@ -0,0 +1,153 @@
use strict;
use utf8;
BEGIN { binmode STDOUT, ':utf8'; binmode STDERR, ':utf8'; }
use Test::More;
use Net::IDN::Punycode ':all';
BEGIN {
plan skip_all => 'no XS version' if eval {
\&Net::IDN::Punycode::encode_punycode ==
\&Net::IDN::Punycode::PP::encode_punycode; }
}
use Test::NoWarnings;
our @idna = (
["Arabic (Egyptian)",
"\x{0644}\x{064A}\x{0647}\x{0645}\x{0627}\x{0628}\x{062A}\x{0643}".
"\x{0644}\x{0645}\x{0648}\x{0634}\x{0639}\x{0631}\x{0628}\x{064A}\x{061F}",
"egbpdaj6bu4bxfgehfvwxn", 0, 0, 1, 1 ],
["Chinese (simplified)",
"\x{4ED6}\x{4EEC}\x{4E3A}\x{4EC0}\x{4E48}\x{4E0D}\x{8BF4}\x{4E2D}".
"\x{6587}",
"ihqwcrb4cv8a8dqg056pqjye", 0, 0, 1, 1 ],
["Chinese (traditional)",
"\x{4ED6}\x{5011}\x{7232}\x{4EC0}\x{9EBD}\x{4E0D}\x{8AAA}\x{4E2D}".
"\x{6587}",
"ihqwctvzc91f659drss3x8bo0yb", 0, 0, 1, 1 ],
["Czech",
"\x{0050}\x{0072}\x{006F}\x{010D}\x{0070}\x{0072}\x{006F}\x{0073}".
"\x{0074}\x{011B}\x{006E}\x{0065}\x{006D}\x{006C}\x{0075}\x{0076}\x{00ED}".
"\x{010D}\x{0065}\x{0073}\x{006B}\x{0079}",
"Proprostnemluvesky-uyb24dma41a", 0, 0, 1, 1 ],
["Hebrew",
"\x{05DC}\x{05DE}\x{05D4}\x{05D4}\x{05DD}\x{05E4}\x{05E9}\x{05D5}".
"\x{05D8}\x{05DC}\x{05D0}\x{05DE}\x{05D3}\x{05D1}\x{05E8}\x{05D9}\x{05DD}".
"\x{05E2}\x{05D1}\x{05E8}\x{05D9}\x{05EA}",
"4dbcagdahymbxekheh6e0a7fei0b", 0, 0, 1, 1 ],
["Hindi (Devanagari)",
"\x{092F}\x{0939}\x{0932}\x{094B}\x{0917}\x{0939}\x{093F}\x{0928}".
"\x{094D}\x{0926}\x{0940}\x{0915}\x{094D}\x{092F}\x{094B}\x{0902}\x{0928}".
"\x{0939}\x{0940}\x{0902}\x{092C}\x{094B}\x{0932}\x{0938}\x{0915}\x{0924}".
"\x{0947}\x{0939}\x{0948}\x{0902}",
"i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd", 0, 0, 1 ],
["Japanese (kanji and hiragana)",
"\x{306A}\x{305C}\x{307F}\x{3093}\x{306A}\x{65E5}\x{672C}\x{8A9E}".
"\x{3092}\x{8A71}\x{3057}\x{3066}\x{304F}\x{308C}\x{306A}\x{3044}\x{306E}".
"\x{304B}",
"n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa", 0, 0, 1 ],
["Russian (Cyrillic)",
"\x{043F}\x{043E}\x{0447}\x{0435}\x{043C}\x{0443}\x{0436}\x{0435}".
"\x{043E}\x{043D}\x{0438}\x{043D}\x{0435}\x{0433}\x{043E}\x{0432}\x{043E}".
"\x{0440}\x{044F}\x{0442}\x{043F}\x{043E}\x{0440}\x{0443}\x{0441}\x{0441}".
"\x{043A}\x{0438}",
"b1abfaaepdrnnbgefbadotcwatmq2g4l", 0, 0, 1, 1 ],
["Spanish",
"\x{0050}\x{006F}\x{0072}\x{0071}\x{0075}\x{00E9}\x{006E}\x{006F}".
"\x{0070}\x{0075}\x{0065}\x{0064}\x{0065}\x{006E}\x{0073}\x{0069}\x{006D}".
"\x{0070}\x{006C}\x{0065}\x{006D}\x{0065}\x{006E}\x{0074}\x{0065}\x{0068}".
"\x{0061}\x{0062}\x{006C}\x{0061}\x{0072}\x{0065}\x{006E}\x{0045}\x{0073}".
"\x{0070}\x{0061}\x{00F1}\x{006F}\x{006C}",
"PorqunopuedensimplementehablarenEspaol-fmd56a", 0, 0, 1 ],
["Vietnamese",
"\x{0054}\x{1EA1}\x{0069}\x{0073}\x{0061}\x{006F}\x{0068}\x{1ECD}".
"\x{006B}\x{0068}\x{00F4}\x{006E}\x{0067}\x{0074}\x{0068}\x{1EC3}\x{0063}".
"\x{0068}\x{1EC9}\x{006E}\x{00F3}\x{0069}\x{0074}\x{0069}\x{1EBF}\x{006E}".
"\x{0067}\x{0056}\x{0069}\x{1EC7}\x{0074}",
"TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g", 0, 0, 1 ],
["Japanese",
"\x{0033}\x{5E74}\x{0042}\x{7D44}\x{91D1}\x{516B}\x{5148}\x{751F}",
"3B-ww4c5e180e575a65lsy2b", 0, 0, 1, 1 ],
["Japanese",
"\x{5B89}\x{5BA4}\x{5948}\x{7F8E}\x{6075}\x{002D}\x{0077}\x{0069}".
"\x{0074}\x{0068}\x{002D}\x{0053}\x{0055}\x{0050}\x{0045}\x{0052}\x{002D}".
"\x{004D}\x{004F}\x{004E}\x{004B}\x{0045}\x{0059}\x{0053}",
"-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n", 0, 0, 1 ],
["Japanese",
"\x{0048}\x{0065}\x{006C}\x{006C}\x{006F}\x{002D}\x{0041}\x{006E}".
"\x{006F}\x{0074}\x{0068}\x{0065}\x{0072}\x{002D}\x{0057}\x{0061}\x{0079}".
"\x{002D}\x{305D}\x{308C}\x{305E}\x{308C}\x{306E}\x{5834}\x{6240}",
"Hello-Another-Way--fc4qua05auwb3674vfr0b", 0, 0, 1 ],
["Japanese",
"\x{3072}\x{3068}\x{3064}\x{5C4B}\x{6839}\x{306E}\x{4E0B}\x{0032}",
"2-u9tlzr9756bt3uc0v", 0, 0, 1, 1 ],
["Japanese",
"\x{004D}\x{0061}\x{006A}\x{0069}\x{3067}\x{004B}\x{006F}\x{0069}".
"\x{3059}\x{308B}\x{0035}\x{79D2}\x{524D}",
"MajiKoi5-783gue6qz075azm5e", 0, 0, 1, 1 ],
["Japanese",
"\x{30D1}\x{30D5}\x{30A3}\x{30FC}\x{0064}\x{0065}\x{30EB}\x{30F3}".
"\x{30D0}",
"de-jg4avhby1noc0d", 0, 0, 1, 1 ],
["Japanese",
"\x{305D}\x{306E}\x{30B9}\x{30D4}\x{30FC}\x{30C9}\x{3067}",
"d9juau41awczczp", 0, 0, 1, 1 ],
["Greek",
"\x{03b5}\x{03bb}\x{03bb}\x{03b7}\x{03bd}\x{03b9}\x{03ba}\x{03ac}",
"hxargifdar", 0, 0, 1, 1 ],
["Maltese (Malti)",
"\x{0062}\x{006f}\x{006e}\x{0121}\x{0075}\x{0073}\x{0061}\x{0127}".
"\x{0127}\x{0061}",
"bonusaa-5bb1da", 0, 0, 1, 1 ],
["Russian (Cyrillic)",
"\x{043f}\x{043e}\x{0447}\x{0435}\x{043c}\x{0443}\x{0436}\x{0435}".
"\x{043e}\x{043d}\x{0438}\x{043d}\x{0435}\x{0433}\x{043e}\x{0432}\x{043e}".
"\x{0440}\x{044f}\x{0442}\x{043f}\x{043e}\x{0440}\x{0443}\x{0441}\x{0441}".
"\x{043a}\x{0438}",
"b1abfaaepdrnnbgefbadotcwatmq2g4l", 0, 0, 1, 1 ],
## Test vector from IdnaTest.txt
["Hebrew (combining)",
"\x{05D0}\x{0308}",
"ssa73l",
],
['U+094D',
"a\x{094D}b",
"ab-fsf",
],
);
my $tests = 2 * (scalar @idna);
plan tests => 1 + $tests;
foreach my $test (@idna)
{
my ($comment,$in,$out,$allowunassigned,$usestd3asciirules,$toascii,$tounicode) = @{$test};
is(encode_punycode($in), $out, $comment.' (encode_punycode)');
is(decode_punycode($out), $in, $comment.' (decode_punycode)');
}
# Test vectors extracted from:
#
# Nameprep and IDNA Test Vectors
# draft-josefsson-idn-test-vectors
#
# Copyright (C) The Internet Society (2003). All Rights Reserved.
#
# This document and translations of it may be copied and furnished
# to others, and derivative works that comment on or otherwise
# explain it or assist in its implementation may be prepared,
# copied, published and distributed, in whole or in part, without
# restriction of any kind, provided that the above copyright
# notice and this paragraph are included on all such copies and
# derivative works. However, this document itself may not be
# modified in any way, such as by removing the copyright notice or
# references to the Internet Society or other Internet
# organizations, except as needed for the purpose of developing
# Internet standards in which case the procedures for copyrights
# defined in the Internet Standards process must be followed, or
# as required to translate it into languages other than English.

15
t/uts46_api_call.t Normal file
View File

@ -0,0 +1,15 @@
use bytes;
use strict;
use Test::More tests => 1+6;
use Test::NoWarnings;
use Net::IDN::UTS46 qw(:all);
is(uts46_to_ascii('müller'),'xn--mller-kva');
is(Net::IDN::UTS46::to_ascii('müller'),'xn--mller-kva');
is(Net::IDN::UTS46::uts46_to_ascii('müller'),'xn--mller-kva');
is(uts46_to_unicode('xn--mller-kva'),'müller');
is(Net::IDN::UTS46::to_unicode('xn--mller-kva'),'müller');
is(Net::IDN::UTS46::uts46_to_unicode('xn--mller-kva'),'müller');

16
t/uts46_encode_bytes.t Normal file
View File

@ -0,0 +1,16 @@
use bytes;
use strict;
use Test::More tests => 1 + 6;
use Test::NoWarnings;
use Net::IDN::UTS46 qw(:all);
is(uts46_to_ascii('mueller'),'mueller');
is(uts46_to_ascii('xn--mller-kva'),'xn--mller-kva');
is(uts46_to_ascii('müller'),'xn--mller-kva');
is(uts46_to_unicode('mueller'),'mueller');
is(uts46_to_unicode('xn--mller-kva'),'müller');
is(uts46_to_unicode('müller'),'müller');

19
t/uts46_encode_utf8.t Normal file
View File

@ -0,0 +1,19 @@
use utf8;
use strict;
BEGIN { binmode STDOUT, ':utf8'; binmode STDERR, ':utf8'; }
use Test::More tests => 1+8;
use Test::NoWarnings;
use Net::IDN::UTS46 qw(:all);
is(uts46_to_ascii('mueller'),'mueller');
is(uts46_to_ascii('xn--mller-kva'),'xn--mller-kva');
is(uts46_to_ascii('müller'),'xn--mller-kva');
is(uts46_to_ascii('中央大学'),'xn--fiq80yua78t');
is(uts46_to_unicode('mueller'),'mueller');
is(uts46_to_unicode('xn--mller-kva'),'müller');
is(uts46_to_unicode('müller'),'müller');
is(uts46_to_unicode('xn--fiq80yua78t'),'中央大学');

6222
t/uts46_to_ascii-trans.t Normal file

File diff suppressed because it is too large Load Diff

6222
t/uts46_to_ascii.t Normal file

File diff suppressed because it is too large Load Diff

7787
t/uts46_to_unicode.t Normal file

File diff suppressed because it is too large Load Diff

29
t/xtra_pp.t Normal file
View File

@ -0,0 +1,29 @@
use strict;
use utf8;
use warnings;
BEGIN {
binmode STDOUT, ':utf8';
binmode STDERR, ':utf8';
}
use Test::More tests => 3 + 1;
use Test::NoWarnings;
{
$Net::IDN::Punycode::_NO_XS = 1;
}
use Net::IDN::UTS46 (':all');
no warnings 'utf8';
my %p = ("TransitionalProcessing" => "0");
is(eval{uts46_to_ascii("xn--0.pt", %p)}, undef, "to_ascii\(\'xn\-\-0\.pt\'\)\ throws\ error\ A3\ \[data\/IdnaTest\.txt\:256\]") or ($@ and diag($@));
is(eval{uts46_to_unicode("xn--0.pt", %p)}, undef, "to_unicode\(\'xn\-\-0\.pt\'\)\ throws\ error\ A3\ \[data\/IdnaTest\.txt\:256\]") or ($@ and diag($@));
is(eval{Net::IDN::Punycode::decode_punycode(0)}, undef, "decode_punycode(0) throws error") or ($@ and diag($@));
# Ignore warnings generated by perl core modules on old perl
Test::NoWarnings->clear_warnings if $^V lt v5.8.7;
exit(0);