Import Upstream version 2.500
This commit is contained in:
commit
193fb07d34
|
@ -0,0 +1,51 @@
|
|||
require 5.008005;
|
||||
|
||||
use strict;
|
||||
use utf8;
|
||||
|
||||
use Module::Build;
|
||||
|
||||
my $b = Module::Build->new(
|
||||
'module_name' => 'Net::IDN::Encode',
|
||||
|
||||
'license' => 'perl',
|
||||
'dist_author' => 'Claus Färber <CFAERBER@cpan.org>',
|
||||
'dist_abstract' => 'Internationalizing Domain Names in Applications (UTS #46)',
|
||||
|
||||
'sign' => 1,
|
||||
'create_license' => 1,
|
||||
'create_makefile_pl' => 'traditional',
|
||||
|
||||
'requires' => {
|
||||
'Unicode::Normalize' => 0,
|
||||
'perl' => 5.008005,
|
||||
},
|
||||
'test_requires' => {
|
||||
'Test::More' => 0,
|
||||
'Test::NoWarnings' => 0,
|
||||
},
|
||||
'needs_compiler' => undef,
|
||||
|
||||
'PL_Files' => {
|
||||
'lib/Unicode/UTS46/_Mapping.PL' => [
|
||||
'lib/Net/IDN/UTS46/_Mapping.pm',
|
||||
],
|
||||
'lib/Unicode/UTS46/GenTests.PL' => [
|
||||
't/uts46_to_ascii.t',
|
||||
't/uts46_to_ascii-trans.t',
|
||||
't/uts46_to_unicode.t',
|
||||
],
|
||||
},
|
||||
'no_index' => {
|
||||
'directory' => ['eg', 'data']
|
||||
},
|
||||
'meta_add' => {
|
||||
'resources' => {
|
||||
'homepage' => 'https://metacpan.org/release/Net-IDN-Encode',
|
||||
'bugtracker' => 'https://rt.cpan.org/Public/Dist/Display.html?Name=Net-IDN-Encode',
|
||||
'repository' => 'http://github.com/cfaerber/Net-IDN-Encode',
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
$b->create_build_script;
|
|
@ -0,0 +1,206 @@
|
|||
Revision history for Perl extension Net::IDN::Encode
|
||||
|
||||
2.500 2018-10-06
|
||||
- update to Unicode 10.0.0
|
||||
- Net::IDN::UTS46: remove workarounds for pre-9.0.0 test vectors; the
|
||||
module now more closely follows the written spec
|
||||
- Net::IDN::UTS46: fix validation for some non-valid characters
|
||||
- Net::IDN::UTS46: for perl 5.8.x/5.10.x, include workaround for bidi
|
||||
validation where some labels would incorrectly be marked as invalid
|
||||
[B1] because of a bug in perl's Unicode implementation
|
||||
- Net::IDN::Punycode: fix for warnings under perl ≤ 5.8.7 (EXPERIMENTAL)
|
||||
|
||||
2.401 2018-09-20
|
||||
- FIXES: #127056: [PATCH 1/2] Fix domain_to_ascii AllowUnassigned param
|
||||
(report and patch by SKJM)
|
||||
- FIXES: #127057: [PATCH 2/2] Fix domain_to_unicode AllowUnassigned
|
||||
param (report and patch by SKJM)
|
||||
|
||||
2.400 2017-01-01
|
||||
- update to Unicode 9.0.0
|
||||
- FIXES #119468: [PATCH] spelling fixes (reported by GREGOA)
|
||||
- possible fix for utf8 warnings under perl 5.8.x
|
||||
- changed generation of tests so that TODO is no longer required when
|
||||
the module author's perl doesn't support the newest Unicode version
|
||||
- remove author tests
|
||||
- more spelling fixes
|
||||
|
||||
2.303 2016-12-10
|
||||
- FIXES: warnings when compiling lib/Net/IDN/Punycode.xs
|
||||
(reported/patch provided by paul@c***-***.org)
|
||||
|
||||
2.302 2016-12-07
|
||||
- Fixes memory bug introduced by fix for #118924
|
||||
|
||||
2.301 2016-12-03
|
||||
- FIXES: #118924: encode_punycode heap overflow
|
||||
(reported by Alexander Bluhm)
|
||||
|
||||
2.300 2015-06-17
|
||||
- update to Unicode 8.0.0
|
||||
|
||||
2.202 2015-04-18
|
||||
- use updated IdnaTest.txt from Unicode 7.0.0 database
|
||||
FIXES: #96749: Fails with bleadperl
|
||||
- documentation updates, point to perl Unicode tutorials
|
||||
- tests for domain xn--zcaa.de;
|
||||
REJECTED: #103205 for Net-IDN-Encode: conversion of domain name
|
||||
|
||||
2.201 2014-08-30
|
||||
- correct handling of uppercase a-labels in
|
||||
domain_to_{ascii,unicode}
|
||||
- FIXES: #98354: Capitalized ACE prefix does not work (reported
|
||||
by victor@*****.ru)
|
||||
|
||||
2.200 2014-06-21
|
||||
- Net::IDN::UTS46: update to Unicode® 7.0.0 and UTS #46 r13
|
||||
- typo and metadata fixes from dstreinbrunner
|
||||
|
||||
2.100 2013-12-30
|
||||
- Net::IDN::Encode: preserve case in pure-ASCII labels (bypass
|
||||
en-/decoding)
|
||||
FIXES: #91059: case not preserved (reported by DMUEY)
|
||||
- Net::IDN::Encode: simplify scalar-via-blob syntax (pull req.
|
||||
by DMUEY)
|
||||
- Net::IDN::Encode: add SMALL COMMERCIAL AT to list of possible
|
||||
@ signs (pull req. by DMUEY)
|
||||
|
||||
- Net::IDN::UTS46: update to Unicode® 6.3.0 and UTS #46 r11:
|
||||
* new UTS #46 test vectors in data/IdnaTest.txt
|
||||
* built on perl-blead (5.19.7) for support of Unicode® 6.3.0
|
||||
in tests generated from data/IdnaTest.txt
|
||||
* tweaks and fixes regarding edge cases not clearly described
|
||||
in UTS #46
|
||||
- Net::IDN::UTS46: test vectors supposed to fail due to
|
||||
Unassigned characters are no longer skipped.
|
||||
|
||||
- Net::IDN::Punycode: use utf8_to_uvchr_buf instead of deprecated
|
||||
utf8_to_uvuni (perl 5.15.9 and higher; utf8_to_uvuni_buf is
|
||||
deprecated from perl 5.19.5)
|
||||
|
||||
2.005 2013-11-03
|
||||
- better documentation for unassigned characters,
|
||||
FIXES: #89750: Can't create IDN for a special domain
|
||||
(reported by felix.*****@*****.de)
|
||||
- FIXES: #89270: [PATCH] fix spelling errors in the docs
|
||||
(reported by cstamas@*****.hu)
|
||||
|
||||
2.004 2013-08-12
|
||||
- FIXES: #85552 3 uts46 tests FAIL under perl-5.18.0
|
||||
(reported by d.thomas@*****.au)
|
||||
|
||||
2.003 2012-01-22
|
||||
- FIXES required version of Unicode::Normalize in UTS46.pm
|
||||
(reported by CPAN testers)
|
||||
|
||||
2.002 2012-01-18
|
||||
- FIXES dependencies/required perl version
|
||||
- FIXES: #74021 Makefile.PL bad value for
|
||||
version-requirement
|
||||
|
||||
2.001 2012-01-12
|
||||
- FIXES XS_VERSION mismatch
|
||||
- FIXES depencency on Unicode::Normalize (was 1.000 or higher,
|
||||
but this is not needed).
|
||||
|
||||
2.000 2012-01-08
|
||||
- switch to Unicode Technical Standard #46 (previously,
|
||||
IDNA2003 has been used, which is now available as
|
||||
Net::IDN::IDNA2003):
|
||||
- add Net::IDN::UTS46 + test vectors from UTS #46
|
||||
- remove Net::IDN::Nameprep (only required for IDNA2003)
|
||||
- add documentation about IDNA Standards and IDNA module
|
||||
Overview/Roadmad
|
||||
- allow NON-LDH labels (e.g. for SRV records), even if
|
||||
UseSTD3Rules=true (parameter now only applies to
|
||||
U-labels and A-labels, i.e. labels that are converted by
|
||||
IDNA).
|
||||
- FIXES potential portability problems in
|
||||
Net::IDN::Punycode XS 1.999_20120108
|
||||
- FIXES 'wide character' warning with tests if tests
|
||||
fail/if TB2 is used on modern perl installations.
|
||||
- FIXES decoding bug in Net::IDN::Punycode::PP (discovered
|
||||
through UTS #46 test vectors)
|
||||
|
||||
1.101 2011-12-08
|
||||
- FIXES: #72615 faulty data in Build.PL causes a lack of
|
||||
meta files, which breaks carton.
|
||||
|
||||
1.100 2010-06-08
|
||||
[patch by Loïc Etienne]
|
||||
- new parameters AllowUnassigned/UseSTD3ASCIIRules for
|
||||
to_ascii, to_unicode (RFC 3490)
|
||||
domain_to_ascii, domain_to_unicode
|
||||
- case insensitive ACE prefix (RFC 3490)
|
||||
- new length 255 check in domain_to_ascii (RFC 1034)
|
||||
- length 63 check moved to to_ascii
|
||||
- dots replacement only in domain_to_ascii
|
||||
(domain_to_unicode does not require it)
|
||||
- o-modifier in regexs
|
||||
- _domain replaced by domain_to_unicode and domain_to_ascii
|
||||
- _nameprep replaced by Net::IDN::Nameprep
|
||||
|
||||
1.000 2010-01-13
|
||||
- clean-up
|
||||
- release
|
||||
|
||||
0.999_20090112 2010-01-10
|
||||
- add XS for decode_punycode
|
||||
|
||||
0.999_20090110 2010-01-10
|
||||
- add XS for encode_punycode
|
||||
|
||||
- include Net::IDN::Nameprep into Net::IDN::Encode *sigh*
|
||||
- drop IDNA::Punycode
|
||||
|
||||
0.99_20091231 2009-12-31
|
||||
- depend on perl 5.8.3
|
||||
- optimise Net::IDN::Punycode
|
||||
|
||||
0.99_20091226 2009-12-26
|
||||
- some clean-ups
|
||||
|
||||
0.99_20091216 2009-12-16
|
||||
- switch to Module::Build
|
||||
- switch to Github, remove svn:keywords, add .gitignore
|
||||
|
||||
- use ASCII in POD, fixes FAILs with perl 5.6.x
|
||||
- add examples in eg/
|
||||
|
||||
0.99_20080913 2009-09-13
|
||||
- fixed perl 5.6.x (no warnings 'utf8')
|
||||
|
||||
0.99_20080913 2009-09-13
|
||||
- require perl version 5.6.0 instead of 5.6.6
|
||||
- skip more tests in lower perl versions
|
||||
|
||||
- removed Encode::Punycode; Encode is only available from
|
||||
perl 5.7.3
|
||||
- renamed back to Net::IDN::Encode; without
|
||||
Encode::Punycode, the new name does not make sense.
|
||||
|
||||
0.99_20071012 2007-10-12
|
||||
- renamed Net-IDN-Encode distribution to Net-IDN-tools
|
||||
|
||||
- includes Net::IDN::Punycode (from IDNA::Punycode v0.02)
|
||||
- includes Net::IDN::Nameprep (complete rewrite, uses
|
||||
Unicode::Stringprep)
|
||||
- includes IDNA::Punycode (deprecated, new version based
|
||||
on Net::IDN::Punycode/::Encode)
|
||||
- includes Encode::Punycode (new version based on
|
||||
Net::IDN::Punycode)
|
||||
- uses Unicode::Stringprep
|
||||
|
||||
- more tests, including test vectors from Internet Draft
|
||||
draft-josefsson-idn-test-vectors-00.
|
||||
|
||||
- FIXES: #16150: Net::IDN::Encode depends on non-modulelist module IDNA::Punycode
|
||||
- FIXES: #16145: IDNA::Punycode 0.03
|
||||
- FIXES: #28123: Undeclared dependency on Unicode::Stringprep (reported by ANDK)
|
||||
- FIXES WARNING: v-string in use/require non-portable (Net::IDN::Nameprep[::*])
|
||||
|
||||
0.02 2004-06-20
|
||||
- fixed handling of incomplete/empty email addresses
|
||||
|
||||
0.01 2004-05-30
|
||||
- first release
|
|
@ -0,0 +1,379 @@
|
|||
This software is copyright (c) 2018 by Claus Färber <CFAERBER@cpan.org>.
|
||||
|
||||
This is free software; you can redistribute it and/or modify it under
|
||||
the same terms as the Perl 5 programming language system itself.
|
||||
|
||||
Terms of the Perl programming language system itself
|
||||
|
||||
a) the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 1, or (at your option) any
|
||||
later version, or
|
||||
b) the "Artistic License"
|
||||
|
||||
--- The GNU General Public License, Version 1, February 1989 ---
|
||||
|
||||
This software is Copyright (c) 2018 by Claus Färber <CFAERBER@cpan.org>.
|
||||
|
||||
This is free software, licensed under:
|
||||
|
||||
The GNU General Public License, Version 1, February 1989
|
||||
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
Version 1, February 1989
|
||||
|
||||
Copyright (C) 1989 Free Software Foundation, Inc.
|
||||
51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
Preamble
|
||||
|
||||
The license agreements of most software companies try to keep users
|
||||
at the mercy of those companies. By contrast, our General Public
|
||||
License is intended to guarantee your freedom to share and change free
|
||||
software--to make sure the software is free for all its users. The
|
||||
General Public License applies to the Free Software Foundation's
|
||||
software and to any other program whose authors commit to using it.
|
||||
You can use it for your programs, too.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Specifically, the General Public License is designed to make
|
||||
sure that you have the freedom to give away or sell copies of free
|
||||
software, that you receive source code or can get it if you want it,
|
||||
that you can change the software or use pieces of it in new free
|
||||
programs; and that you know you can do these things.
|
||||
|
||||
To protect your rights, we need to make restrictions that forbid
|
||||
anyone to deny you these rights or to ask you to surrender the rights.
|
||||
These restrictions translate to certain responsibilities for you if you
|
||||
distribute copies of the software, or if you modify it.
|
||||
|
||||
For example, if you distribute copies of a such a program, whether
|
||||
gratis or for a fee, you must give the recipients all the rights that
|
||||
you have. You must make sure that they, too, receive or can get the
|
||||
source code. And you must tell them their rights.
|
||||
|
||||
We protect your rights with two steps: (1) copyright the software, and
|
||||
(2) offer you this license which gives you legal permission to copy,
|
||||
distribute and/or modify the software.
|
||||
|
||||
Also, for each author's protection and ours, we want to make certain
|
||||
that everyone understands that there is no warranty for this free
|
||||
software. If the software is modified by someone else and passed on, we
|
||||
want its recipients to know that what they have is not the original, so
|
||||
that any problems introduced by others will not reflect on the original
|
||||
authors' reputations.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||
|
||||
0. This License Agreement applies to any program or other work which
|
||||
contains a notice placed by the copyright holder saying it may be
|
||||
distributed under the terms of this General Public License. The
|
||||
"Program", below, refers to any such program or work, and a "work based
|
||||
on the Program" means either the Program or any work containing the
|
||||
Program or a portion of it, either verbatim or with modifications. Each
|
||||
licensee is addressed as "you".
|
||||
|
||||
1. You may copy and distribute verbatim copies of the Program's source
|
||||
code as you receive it, in any medium, provided that you conspicuously and
|
||||
appropriately publish on each copy an appropriate copyright notice and
|
||||
disclaimer of warranty; keep intact all the notices that refer to this
|
||||
General Public License and to the absence of any warranty; and give any
|
||||
other recipients of the Program a copy of this General Public License
|
||||
along with the Program. You may charge a fee for the physical act of
|
||||
transferring a copy.
|
||||
|
||||
2. You may modify your copy or copies of the Program or any portion of
|
||||
it, and copy and distribute such modifications under the terms of Paragraph
|
||||
1 above, provided that you also do the following:
|
||||
|
||||
a) cause the modified files to carry prominent notices stating that
|
||||
you changed the files and the date of any change; and
|
||||
|
||||
b) cause the whole of any work that you distribute or publish, that
|
||||
in whole or in part contains the Program or any part thereof, either
|
||||
with or without modifications, to be licensed at no charge to all
|
||||
third parties under the terms of this General Public License (except
|
||||
that you may choose to grant warranty protection to some or all
|
||||
third parties, at your option).
|
||||
|
||||
c) If the modified program normally reads commands interactively when
|
||||
run, you must cause it, when started running for such interactive use
|
||||
in the simplest and most usual way, to print or display an
|
||||
announcement including an appropriate copyright notice and a notice
|
||||
that there is no warranty (or else, saying that you provide a
|
||||
warranty) and that users may redistribute the program under these
|
||||
conditions, and telling the user how to view a copy of this General
|
||||
Public License.
|
||||
|
||||
d) You may charge a fee for the physical act of transferring a
|
||||
copy, and you may at your option offer warranty protection in
|
||||
exchange for a fee.
|
||||
|
||||
Mere aggregation of another independent work with the Program (or its
|
||||
derivative) on a volume of a storage or distribution medium does not bring
|
||||
the other work under the scope of these terms.
|
||||
|
||||
3. You may copy and distribute the Program (or a portion or derivative of
|
||||
it, under Paragraph 2) in object code or executable form under the terms of
|
||||
Paragraphs 1 and 2 above provided that you also do one of the following:
|
||||
|
||||
a) accompany it with the complete corresponding machine-readable
|
||||
source code, which must be distributed under the terms of
|
||||
Paragraphs 1 and 2 above; or,
|
||||
|
||||
b) accompany it with a written offer, valid for at least three
|
||||
years, to give any third party free (except for a nominal charge
|
||||
for the cost of distribution) a complete machine-readable copy of the
|
||||
corresponding source code, to be distributed under the terms of
|
||||
Paragraphs 1 and 2 above; or,
|
||||
|
||||
c) accompany it with the information you received as to where the
|
||||
corresponding source code may be obtained. (This alternative is
|
||||
allowed only for noncommercial distribution and only if you
|
||||
received the program in object code or executable form alone.)
|
||||
|
||||
Source code for a work means the preferred form of the work for making
|
||||
modifications to it. For an executable file, complete source code means
|
||||
all the source code for all modules it contains; but, as a special
|
||||
exception, it need not include source code for modules which are standard
|
||||
libraries that accompany the operating system on which the executable
|
||||
file runs, or for standard header files or definitions files that
|
||||
accompany that operating system.
|
||||
|
||||
4. You may not copy, modify, sublicense, distribute or transfer the
|
||||
Program except as expressly provided under this General Public License.
|
||||
Any attempt otherwise to copy, modify, sublicense, distribute or transfer
|
||||
the Program is void, and will automatically terminate your rights to use
|
||||
the Program under this License. However, parties who have received
|
||||
copies, or rights to use copies, from you under this General Public
|
||||
License will not have their licenses terminated so long as such parties
|
||||
remain in full compliance.
|
||||
|
||||
5. By copying, distributing or modifying the Program (or any work based
|
||||
on the Program) you indicate your acceptance of this license to do so,
|
||||
and all its terms and conditions.
|
||||
|
||||
6. Each time you redistribute the Program (or any work based on the
|
||||
Program), the recipient automatically receives a license from the original
|
||||
licensor to copy, distribute or modify the Program subject to these
|
||||
terms and conditions. You may not impose any further restrictions on the
|
||||
recipients' exercise of the rights granted herein.
|
||||
|
||||
7. The Free Software Foundation may publish revised and/or new versions
|
||||
of the General Public License from time to time. Such new versions will
|
||||
be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the Program
|
||||
specifies a version number of the license which applies to it and "any
|
||||
later version", you have the option of following the terms and conditions
|
||||
either of that version or of any later version published by the Free
|
||||
Software Foundation. If the Program does not specify a version number of
|
||||
the license, you may choose any version ever published by the Free Software
|
||||
Foundation.
|
||||
|
||||
8. If you wish to incorporate parts of the Program into other free
|
||||
programs whose distribution conditions are different, write to the author
|
||||
to ask for permission. For software which is copyrighted by the Free
|
||||
Software Foundation, write to the Free Software Foundation; we sometimes
|
||||
make exceptions for this. Our decision will be guided by the two goals
|
||||
of preserving the free status of all derivatives of our free software and
|
||||
of promoting the sharing and reuse of software generally.
|
||||
|
||||
NO WARRANTY
|
||||
|
||||
9. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
|
||||
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
|
||||
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
|
||||
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
|
||||
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
|
||||
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
|
||||
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
|
||||
REPAIR OR CORRECTION.
|
||||
|
||||
10. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
|
||||
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
|
||||
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
|
||||
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
|
||||
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
|
||||
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
|
||||
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGES.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
Appendix: How to Apply These Terms to Your New Programs
|
||||
|
||||
If you develop a new program, and you want it to be of the greatest
|
||||
possible use to humanity, the best way to achieve this is to make it
|
||||
free software which everyone can redistribute and change under these
|
||||
terms.
|
||||
|
||||
To do so, attach the following notices to the program. It is safest to
|
||||
attach them to the start of each source file to most effectively convey
|
||||
the exclusion of warranty; and each file should have at least the
|
||||
"copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
<one line to give the program's name and a brief idea of what it does.>
|
||||
Copyright (C) 19yy <name of author>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 1, or (at your option)
|
||||
any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA
|
||||
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If the program is interactive, make it output a short notice like this
|
||||
when it starts in an interactive mode:
|
||||
|
||||
Gnomovision version 69, Copyright (C) 19xx name of author
|
||||
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||
This is free software, and you are welcome to redistribute it
|
||||
under certain conditions; type `show c' for details.
|
||||
|
||||
The hypothetical commands `show w' and `show c' should show the
|
||||
appropriate parts of the General Public License. Of course, the
|
||||
commands you use may be called something other than `show w' and `show
|
||||
c'; they could even be mouse-clicks or menu items--whatever suits your
|
||||
program.
|
||||
|
||||
You should also get your employer (if you work as a programmer) or your
|
||||
school, if any, to sign a "copyright disclaimer" for the program, if
|
||||
necessary. Here a sample; alter the names:
|
||||
|
||||
Yoyodyne, Inc., hereby disclaims all copyright interest in the
|
||||
program `Gnomovision' (a program to direct compilers to make passes
|
||||
at assemblers) written by James Hacker.
|
||||
|
||||
<signature of Ty Coon>, 1 April 1989
|
||||
Ty Coon, President of Vice
|
||||
|
||||
That's all there is to it!
|
||||
|
||||
|
||||
--- The Artistic License 1.0 ---
|
||||
|
||||
This software is Copyright (c) 2018 by Claus Färber <CFAERBER@cpan.org>.
|
||||
|
||||
This is free software, licensed under:
|
||||
|
||||
The Artistic License 1.0
|
||||
|
||||
The Artistic License
|
||||
|
||||
Preamble
|
||||
|
||||
The intent of this document is to state the conditions under which a Package
|
||||
may be copied, such that the Copyright Holder maintains some semblance of
|
||||
artistic control over the development of the package, while giving the users of
|
||||
the package the right to use and distribute the Package in a more-or-less
|
||||
customary fashion, plus the right to make reasonable modifications.
|
||||
|
||||
Definitions:
|
||||
|
||||
- "Package" refers to the collection of files distributed by the Copyright
|
||||
Holder, and derivatives of that collection of files created through
|
||||
textual modification.
|
||||
- "Standard Version" refers to such a Package if it has not been modified,
|
||||
or has been modified in accordance with the wishes of the Copyright
|
||||
Holder.
|
||||
- "Copyright Holder" is whoever is named in the copyright or copyrights for
|
||||
the package.
|
||||
- "You" is you, if you're thinking about copying or distributing this Package.
|
||||
- "Reasonable copying fee" is whatever you can justify on the basis of media
|
||||
cost, duplication charges, time of people involved, and so on. (You will
|
||||
not be required to justify it to the Copyright Holder, but only to the
|
||||
computing community at large as a market that must bear the fee.)
|
||||
- "Freely Available" means that no fee is charged for the item itself, though
|
||||
there may be fees involved in handling the item. It also means that
|
||||
recipients of the item may redistribute it under the same conditions they
|
||||
received it.
|
||||
|
||||
1. You may make and give away verbatim copies of the source form of the
|
||||
Standard Version of this Package without restriction, provided that you
|
||||
duplicate all of the original copyright notices and associated disclaimers.
|
||||
|
||||
2. You may apply bug fixes, portability fixes and other modifications derived
|
||||
from the Public Domain or from the Copyright Holder. A Package modified in such
|
||||
a way shall still be considered the Standard Version.
|
||||
|
||||
3. You may otherwise modify your copy of this Package in any way, provided that
|
||||
you insert a prominent notice in each changed file stating how and when you
|
||||
changed that file, and provided that you do at least ONE of the following:
|
||||
|
||||
a) place your modifications in the Public Domain or otherwise make them
|
||||
Freely Available, such as by posting said modifications to Usenet or an
|
||||
equivalent medium, or placing the modifications on a major archive site
|
||||
such as ftp.uu.net, or by allowing the Copyright Holder to include your
|
||||
modifications in the Standard Version of the Package.
|
||||
|
||||
b) use the modified Package only within your corporation or organization.
|
||||
|
||||
c) rename any non-standard executables so the names do not conflict with
|
||||
standard executables, which must also be provided, and provide a separate
|
||||
manual page for each non-standard executable that clearly documents how it
|
||||
differs from the Standard Version.
|
||||
|
||||
d) make other distribution arrangements with the Copyright Holder.
|
||||
|
||||
4. You may distribute the programs of this Package in object code or executable
|
||||
form, provided that you do at least ONE of the following:
|
||||
|
||||
a) distribute a Standard Version of the executables and library files,
|
||||
together with instructions (in the manual page or equivalent) on where to
|
||||
get the Standard Version.
|
||||
|
||||
b) accompany the distribution with the machine-readable source of the Package
|
||||
with your modifications.
|
||||
|
||||
c) accompany any non-standard executables with their corresponding Standard
|
||||
Version executables, giving the non-standard executables non-standard
|
||||
names, and clearly documenting the differences in manual pages (or
|
||||
equivalent), together with instructions on where to get the Standard
|
||||
Version.
|
||||
|
||||
d) make other distribution arrangements with the Copyright Holder.
|
||||
|
||||
5. You may charge a reasonable copying fee for any distribution of this
|
||||
Package. You may charge any fee you choose for support of this Package. You
|
||||
may not charge a fee for this Package itself. However, you may distribute this
|
||||
Package in aggregate with other (possibly commercial) programs as part of a
|
||||
larger (possibly commercial) software distribution provided that you do not
|
||||
advertise this Package as a product of your own.
|
||||
|
||||
6. The scripts and library files supplied as input to or produced as output
|
||||
from the programs of this Package do not automatically fall under the copyright
|
||||
of this Package, but belong to whomever generated them, and may be sold
|
||||
commercially, and may be aggregated with this Package.
|
||||
|
||||
7. C or perl subroutines supplied by you and linked into this Package shall not
|
||||
be considered part of this Package.
|
||||
|
||||
8. The name of the Copyright Holder may not be used to endorse or promote
|
||||
products derived from this software without specific prior written permission.
|
||||
|
||||
9. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
|
||||
WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
||||
MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
The End
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
Build.PL
|
||||
Changes
|
||||
eg/hello_idn.pl
|
||||
eg/hello_idn_email.pl
|
||||
lib/Net/IDN/Encode.pm
|
||||
lib/Net/IDN/Overview.pod
|
||||
lib/Net/IDN/Punycode.pm
|
||||
lib/Net/IDN/Punycode.xs
|
||||
lib/Net/IDN/Punycode/PP.pm
|
||||
lib/Net/IDN/Standards.pod
|
||||
lib/Net/IDN/UTS46.pm
|
||||
lib/Net/IDN/UTS46/_Mapping.pm
|
||||
LICENSE
|
||||
Makefile.PL
|
||||
MANIFEST
|
||||
META.json
|
||||
META.yml
|
||||
README
|
||||
t/00use.t
|
||||
t/domain_to_ascii.t
|
||||
t/domain_to_unicode.t
|
||||
t/encode_bytes.t
|
||||
t/encode_utf8.t
|
||||
t/punycode_vec-pp.t
|
||||
t/punycode_vec-xs.t
|
||||
t/uts46_api_call.t
|
||||
t/uts46_encode_bytes.t
|
||||
t/uts46_encode_utf8.t
|
||||
t/uts46_to_ascii-trans.t
|
||||
t/uts46_to_ascii.t
|
||||
t/uts46_to_unicode.t
|
||||
t/xtra_pp.t
|
||||
SIGNATURE Added here by Module::Build
|
|
@ -0,0 +1,58 @@
|
|||
{
|
||||
"abstract" : "Internationalizing Domain Names in Applications (UTS #46)",
|
||||
"author" : [
|
||||
"Claus Färber <CFAERBER@cpan.org>"
|
||||
],
|
||||
"dynamic_config" : 1,
|
||||
"generated_by" : "Module::Build version 0.4224",
|
||||
"license" : [
|
||||
"perl_5"
|
||||
],
|
||||
"meta-spec" : {
|
||||
"url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
|
||||
"version" : "2"
|
||||
},
|
||||
"name" : "Net-IDN-Encode",
|
||||
"no_index" : {
|
||||
"directory" : [
|
||||
"eg",
|
||||
"data"
|
||||
]
|
||||
},
|
||||
"prereqs" : {
|
||||
"build" : {
|
||||
"requires" : {
|
||||
"ExtUtils::CBuilder" : "0"
|
||||
}
|
||||
},
|
||||
"configure" : {
|
||||
"requires" : {
|
||||
"Module::Build" : "0.42"
|
||||
}
|
||||
},
|
||||
"runtime" : {
|
||||
"requires" : {
|
||||
"Unicode::Normalize" : "0",
|
||||
"perl" : "5.008005"
|
||||
}
|
||||
},
|
||||
"test" : {
|
||||
"requires" : {
|
||||
"Test::More" : "0",
|
||||
"Test::NoWarnings" : "0"
|
||||
}
|
||||
}
|
||||
},
|
||||
"release_status" : "stable",
|
||||
"resources" : {
|
||||
"bugtracker" : {
|
||||
"web" : "https://rt.cpan.org/Public/Dist/Display.html?Name=Net-IDN-Encode"
|
||||
},
|
||||
"homepage" : "https://metacpan.org/release/Net-IDN-Encode",
|
||||
"repository" : {
|
||||
"url" : "http://github.com/cfaerber/Net-IDN-Encode"
|
||||
}
|
||||
},
|
||||
"version" : "2.500",
|
||||
"x_serialization_backend" : "JSON::PP version 2.27400_02"
|
||||
}
|
|
@ -0,0 +1,30 @@
|
|||
---
|
||||
abstract: 'Internationalizing Domain Names in Applications (UTS #46)'
|
||||
author:
|
||||
- 'Claus Färber <CFAERBER@cpan.org>'
|
||||
build_requires:
|
||||
ExtUtils::CBuilder: '0'
|
||||
Test::More: '0'
|
||||
Test::NoWarnings: '0'
|
||||
configure_requires:
|
||||
Module::Build: '0.42'
|
||||
dynamic_config: 1
|
||||
generated_by: 'Module::Build version 0.4224, CPAN::Meta::Converter version 2.150010'
|
||||
license: perl
|
||||
meta-spec:
|
||||
url: http://module-build.sourceforge.net/META-spec-v1.4.html
|
||||
version: '1.4'
|
||||
name: Net-IDN-Encode
|
||||
no_index:
|
||||
directory:
|
||||
- eg
|
||||
- data
|
||||
requires:
|
||||
Unicode::Normalize: '0'
|
||||
perl: '5.008005'
|
||||
resources:
|
||||
bugtracker: https://rt.cpan.org/Public/Dist/Display.html?Name=Net-IDN-Encode
|
||||
homepage: https://metacpan.org/release/Net-IDN-Encode
|
||||
repository: http://github.com/cfaerber/Net-IDN-Encode
|
||||
version: '2.500'
|
||||
x_serialization_backend: 'CPAN::Meta::YAML version 0.018'
|
|
@ -0,0 +1,16 @@
|
|||
# Note: this file was auto-generated by Module::Build::Compat version 0.4224
|
||||
require 5.008005;
|
||||
use ExtUtils::MakeMaker;
|
||||
WriteMakefile
|
||||
(
|
||||
'PL_FILES' => {},
|
||||
'NAME' => 'Net::IDN::Encode',
|
||||
'EXE_FILES' => [],
|
||||
'VERSION_FROM' => 'lib/Net/IDN/Encode.pm',
|
||||
'INSTALLDIRS' => 'site',
|
||||
'PREREQ_PM' => {
|
||||
'Unicode::Normalize' => 0,
|
||||
'ExtUtils::CBuilder' => 0
|
||||
}
|
||||
)
|
||||
;
|
|
@ -0,0 +1,34 @@
|
|||
OVERVIEW
|
||||
|
||||
Net::IDN::Encode -- Internationalized Domain Names in
|
||||
Applications (IDNA)
|
||||
|
||||
Net::IDN::UTS46 -- Unicode IDNA Compatibility Processing
|
||||
(UTS #46)
|
||||
|
||||
Net::IDN::Punycode -- ASCII-compatible encoding of Unicode
|
||||
(Punycode, RFC 3492)
|
||||
|
||||
INSTALLATION
|
||||
|
||||
To install this module type the following:
|
||||
|
||||
perl Build.PL
|
||||
./Build
|
||||
./Build test
|
||||
./Build install
|
||||
|
||||
DEPENDENCIES
|
||||
|
||||
This module requires these other modules and libraries:
|
||||
|
||||
Unicode::Normalize
|
||||
|
||||
AUTHOR
|
||||
|
||||
Claus Färber <CFAERBER@cpan.org>
|
||||
|
||||
ACKNOWLEDGMENTS
|
||||
|
||||
Tatsuhiko Miyagawa <miyagawa@bulknews.net>
|
||||
Robert Urban <urban@UNIX-Beratung.de>
|
|
@ -0,0 +1,59 @@
|
|||
This file contains message digests of all files listed in MANIFEST,
|
||||
signed via the Module::Signature module, version 0.83.
|
||||
|
||||
To verify the content in this distribution, first make sure you have
|
||||
Module::Signature installed, then type:
|
||||
|
||||
% cpansign -v
|
||||
|
||||
It will check each file's integrity, as well as the signature's
|
||||
validity. If "==> Signature verified OK! <==" is not displayed,
|
||||
the distribution may already have been compromised, and you should
|
||||
not run its Makefile.PL or Build.PL.
|
||||
|
||||
-----BEGIN PGP SIGNED MESSAGE-----
|
||||
Hash: SHA256
|
||||
|
||||
SHA256 4693fdce53a610d9aa8759433e8d9e1b881b886cfa0d0d4dc3696d1d0b45c7a6 Build.PL
|
||||
SHA256 61751a3382cd64ae05f5fb7e256259ad60f4671bf9c91a1e6a432102e2dd5581 Changes
|
||||
SHA256 4bbb4460302739506858a117c0419f5323b6bd33a5da277ccf5b7fe327aa1147 LICENSE
|
||||
SHA256 423c1d1953556eaacc7dde323b31a9e9b2beb009b5368d5778442401e52d22dd MANIFEST
|
||||
SHA256 94c7a0317780509b6e0708b7477f9f0d7119dbecf70e85c756949954df6e8254 META.json
|
||||
SHA256 10dd10188276a0b8833a2a402f75595961f9aa9b60820deda3e12405374a5f4e META.yml
|
||||
SHA256 f319d964112761fb8122ae02bdd52e154a7e932daa8436a35ddce12a833a4132 Makefile.PL
|
||||
SHA256 cc56c62a1f4ec596550474677be680e59357e6bcc85c47a3980c969c6b30140a README
|
||||
SHA256 17766e80f51841d87fde559de98074c5f54a9b5c58aea9209916b0253cf44238 eg/hello_idn.pl
|
||||
SHA256 04bbc370a82d2a83cd7f6c8b05997fd503e615b8db97d3e6aa90278db82875c8 eg/hello_idn_email.pl
|
||||
SHA256 f3286492fcae83495a5f72a06de94ea1e2227855f6e70e919cac5b931fda62d7 lib/Net/IDN/Encode.pm
|
||||
SHA256 73187c982aab7aecd5affa041640970f20afa93194399274920ce07d61cf5ca8 lib/Net/IDN/Overview.pod
|
||||
SHA256 c461e5cd16a13cf54836839863ec0c300b9526a5d2079e4891c991d283f469c9 lib/Net/IDN/Punycode.pm
|
||||
SHA256 86237c6f390dc10f79f97769250bff5c4be7f72352d602c0af34fa71cc7d6d7d lib/Net/IDN/Punycode.xs
|
||||
SHA256 eb3a9e4cc58845a310d88d4ed6ab64f9ac7a1c605857e0c31d6683582b8bc90e lib/Net/IDN/Punycode/PP.pm
|
||||
SHA256 1220a53f28ad7934425a4e7d1aa64dd9d04338220f3cd0463b5ce08fd2b12d9a lib/Net/IDN/Standards.pod
|
||||
SHA256 eb22e6af3552da94e467fa8a028d6f80d3a18e09da2570e4518ef12248302cd7 lib/Net/IDN/UTS46.pm
|
||||
SHA256 39d83fe9f8a53bbca49e41c5e5acefa53cd6bd8b0f53a35739fd31998782db8c lib/Net/IDN/UTS46/_Mapping.pm
|
||||
SHA256 86bc523e0e50ab0aa8ee82f269251b962bc33a8240202d3d9fe60921c829ce14 t/00use.t
|
||||
SHA256 23e70f56f7cd4d6be661d0a2adb75864c79c6da67a2cdc05bd44b3a8f94ab33d t/domain_to_ascii.t
|
||||
SHA256 3cbe0ec5076c6e312b071772adeb05bd852d01084883071cd5640f7741fc2059 t/domain_to_unicode.t
|
||||
SHA256 8bc1406f117c71d42e8d32b045a6848ee6bd8c8795a2083d8776df9660ac5dae t/encode_bytes.t
|
||||
SHA256 9e7b8f0f75afc0f0eba2d9336db3c6258ca7a176be1dc5b6e1e8d4e639e1d361 t/encode_utf8.t
|
||||
SHA256 7b43ef649f6ed8d190112fda703dca176431bf64d0527128a7f2b4cb1bed8b58 t/punycode_vec-pp.t
|
||||
SHA256 16c6d0b535307b666d79280ee239ab27e1d559e95febfef4c2b483c2b2455986 t/punycode_vec-xs.t
|
||||
SHA256 de38cf9a35faf24d3ce081caaff55690502d51608c782d26667890d385a70eb7 t/uts46_api_call.t
|
||||
SHA256 231ae5a43577ada176e21b92984f4d1b5ba8234349d049f5fbbf2abb392f568c t/uts46_encode_bytes.t
|
||||
SHA256 6fa4c3d49561bfb59815d8bc4a1f21c44fa10a15f94e97f5bdba3ee1efbb63bf t/uts46_encode_utf8.t
|
||||
SHA256 16a3373aac4a377aa19ef6a703444d1eca3d52fa56075802a23a77da571bd138 t/uts46_to_ascii-trans.t
|
||||
SHA256 bf5effc197d5b7e641d303ab116a24c060d4e9a3a575d5c1ee12d42a6cd8deee t/uts46_to_ascii.t
|
||||
SHA256 c232df2fea5fefa7e5299a8430ef83aa909cd6820cefc55187dc375a59b04d0e t/uts46_to_unicode.t
|
||||
SHA256 038fb1201517afa4bba421f4d031395225e51fb900ce447a4985d7c594893d03 t/xtra_pp.t
|
||||
-----BEGIN PGP SIGNATURE-----
|
||||
|
||||
iQEzBAEBCAAdFiEEMNwyPmkVmFwPfD8AsRl4Qp8hpEYFAlu4AZcACgkQsRl4Qp8h
|
||||
pEbd3gf8DCr86hNujt20VyRb/EuvULx/fFlNwyp2KVEjtGpJcmRXTAm4Jn8pB4c6
|
||||
psa5kiuLqwSXrUIEw9JCj5h9AfKTeH/FD4SBYi+vRVgG7BS1b20IrJi9utJhzZGZ
|
||||
WvKN14rvaWUPdifj2t0KZ2jRuf2ZPPRjzcP62Rq9jEq/XUbZAjVI7v3LBHQIYLVE
|
||||
vnnZP3RsuglS6GdpIzGJ0CVUAsVBtfi+5asenMncX6HoQXqdS5G+1CvcYn8yLnJh
|
||||
7m/F6xweklxoke+Rv1nhDXNpp8MDYq7qa+dawnNHrSY+q43eGl0+cIraHmY40PTK
|
||||
0i93JopUhn7qSb5OSlmuI+bGVCNqow==
|
||||
=4+qi
|
||||
-----END PGP SIGNATURE-----
|
|
@ -0,0 +1,18 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
use strict;
|
||||
use utf8;
|
||||
|
||||
binmode STDOUT, ":utf8";
|
||||
|
||||
use Net::IDN::Encode;
|
||||
|
||||
my @domain = (
|
||||
'例.テスト',
|
||||
'müller.example.net',
|
||||
);
|
||||
|
||||
foreach (@domain) {
|
||||
printf "%s: toASCII=<%s>, toUnicode=<%s>\n",
|
||||
$_, domain_to_ascii($_), domain_to_unicode($_);
|
||||
}
|
|
@ -0,0 +1,18 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
use strict;
|
||||
use utf8;
|
||||
|
||||
binmode STDOUT, ":utf8";
|
||||
|
||||
use Net::IDN::Encode;
|
||||
|
||||
my @email = (
|
||||
'postmaster@例.テスト',
|
||||
'info@müller.example.net',
|
||||
);
|
||||
|
||||
foreach (@email) {
|
||||
printf "%s: toASCII=<%s>, toUnicode=<%s>\n",
|
||||
$_, email_to_ascii($_), email_to_unicode($_);
|
||||
}
|
|
@ -0,0 +1,347 @@
|
|||
package Net::IDN::Encode;
|
||||
|
||||
require 5.006;
|
||||
|
||||
use strict;
|
||||
use utf8;
|
||||
use warnings;
|
||||
|
||||
our $VERSION = "2.500";
|
||||
$VERSION = eval $VERSION;
|
||||
|
||||
use Carp;
|
||||
use Exporter;
|
||||
|
||||
our @ISA = ('Exporter');
|
||||
our @EXPORT = ();
|
||||
our %EXPORT_TAGS = (
|
||||
'all' => [
|
||||
'to_ascii',
|
||||
'to_unicode',
|
||||
'domain_to_ascii',
|
||||
'domain_to_unicode',
|
||||
'email_to_ascii',
|
||||
'email_to_unicode',
|
||||
],
|
||||
'_var' => [
|
||||
'$IDNA_PREFIX',
|
||||
'IsIDNADot',
|
||||
'IsIDNAAtsign',
|
||||
]
|
||||
);
|
||||
Exporter::export_ok_tags(keys %EXPORT_TAGS);
|
||||
|
||||
use Net::IDN::Punycode 1.102 ();
|
||||
|
||||
our $IDNA_PREFIX = 'xn--';
|
||||
sub IsIDNADot { "002E\n3002\nFF0E\nFF61" }
|
||||
sub IsIDNAAtsign{ "0040\nFE6B\nFF20" }
|
||||
|
||||
require Net::IDN::UTS46; # after declaration of vars!
|
||||
|
||||
sub to_ascii {
|
||||
my($label,%param) = @_;
|
||||
croak 'Invalid label' if $label =~ m/\p{IsIDNADot}/o;
|
||||
|
||||
if($label =~ m/\P{ASCII}/o) {
|
||||
$label = Net::IDN::UTS46::to_ascii(@_);
|
||||
} else {
|
||||
croak 'label empty' if length($label) < 1;
|
||||
croak 'label too long' if length($label) > 63;
|
||||
}
|
||||
return $label;
|
||||
}
|
||||
|
||||
sub to_unicode {
|
||||
my($label,%param) = @_;
|
||||
croak 'Invalid label' if $label =~ m/\p{IsIDNADot}/o;
|
||||
|
||||
if($label =~ m/\P{ASCII}|^(?:(?i)$IDNA_PREFIX)/o) {
|
||||
$label = Net::IDN::UTS46::to_unicode(@_);
|
||||
}
|
||||
return $label;
|
||||
}
|
||||
|
||||
sub _domain {
|
||||
my ($domain,$to_function,$ascii,%param) = @_;
|
||||
$param{'UseSTD3ASCIIRules'} = 1 unless exists $param{'UseSTD3ASCIIRules'};
|
||||
|
||||
my $even_odd = 1;
|
||||
return join '',
|
||||
map { $even_odd++ % 2 ? $to_function->($_, %param) : $ascii ? '.' : $_ }
|
||||
split /(\p{IsIDNADot})/o, $domain;
|
||||
}
|
||||
|
||||
sub _email {
|
||||
my ($email,$to_function,$ascii,%param) = @_;
|
||||
return $email if !defined($email) || $email eq '';
|
||||
|
||||
$email =~ m/^(
|
||||
(?(?!\p{IsIDNAAtsign}|").|(?!))+
|
||||
|
|
||||
"(?:(?:[^"]|\\.)*[^\\])?"
|
||||
)
|
||||
(?:
|
||||
(\p{IsIDNAAtsign})
|
||||
(?:([^\[\]]*)|(\[.*\]))?
|
||||
)?$/xo || croak "Invalid email address";
|
||||
my($local_part,$at,$domain,$domain_literal) = ($1,$2,$3);
|
||||
|
||||
$local_part =~ m/\P{ASCII}/ && croak "Non-ASCII characters in local-part";
|
||||
$domain_literal =~ m/\P{ASCII}/ && croak "Non-ASCII characters in domain-literal" if $domain_literal;
|
||||
|
||||
$domain = $to_function->($domain,%param) if $domain;
|
||||
$at = '@' if $ascii;
|
||||
|
||||
return ($domain || $domain_literal)
|
||||
? ($local_part.$at.($domain || $domain_literal))
|
||||
: ($local_part);
|
||||
}
|
||||
|
||||
sub domain_to_ascii { _domain(shift, \&to_ascii, 1, @_) }
|
||||
sub domain_to_unicode { _domain(shift, \&to_unicode, 0, @_) }
|
||||
|
||||
sub email_to_ascii { _email(shift, \&domain_to_ascii, 1, @_) }
|
||||
sub email_to_unicode { _email(shift, \&domain_to_unicode, 0, @_) }
|
||||
|
||||
1;
|
||||
|
||||
__END__
|
||||
|
||||
=encoding utf8
|
||||
|
||||
=head1 NAME
|
||||
|
||||
Net::IDN::Encode - Internationalizing Domain Names in Applications (IDNA)
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
use Net::IDN::Encode ':all';
|
||||
my $a = domain_to_ascii("müller.example.org");
|
||||
my $e = email_to_ascii("POSTMASTER@例。テスト");
|
||||
my $u = domain_to_unicode('EXAMPLE.XN--11B5BS3A9AJ6G');
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This module provides an easy-to-use interface for encoding and
|
||||
decoding Internationalized Domain Names (IDNs).
|
||||
|
||||
IDNs use characters drawn from a large repertoire (Unicode), but
|
||||
IDNA allows the non-ASCII characters to be represented using only
|
||||
the ASCII characters already allowed in so-called host names today
|
||||
(letter-digit-hyphen, C</[A-Z0-9-]/i>).
|
||||
|
||||
Use this module if you just want to convert domain names (or email addresses),
|
||||
using whatever IDNA standard is the best choice at the moment.
|
||||
|
||||
You should be familiar with Unicode support in perl, as this module expects
|
||||
correctly encoded input. See L<perlunitut>, L<perluniintro> and L<perlunicode>
|
||||
for details.
|
||||
|
||||
=head1 UNICODE VERSION
|
||||
|
||||
To convert labels correctly between Unicode and ASCII, each character in the
|
||||
label must be present in the Unicode version supported by your perl.
|
||||
Consequently, this module will refuse to convert labels with new Unicode
|
||||
characters on older perl versions (see below).
|
||||
|
||||
=head1 FUNCTIONS
|
||||
|
||||
By default, this module does not export any subroutines. You may
|
||||
use the C<:all> tag to import everything. You can also use regular
|
||||
expressions such as C</^to_/> or C</^email_/> to select some of
|
||||
the functions, see L<Exporter> for details.
|
||||
|
||||
The following functions are available:
|
||||
|
||||
=over
|
||||
|
||||
=item to_ascii( $label, %param )
|
||||
|
||||
Converts a single label C<$label> to ASCII. Will throw an exception on invalid
|
||||
input. If C<$label> is already a valid ASCII domain label (including most
|
||||
NON-LDH labels such as those used for SRV records and fake A-labels), this
|
||||
function will never fail but return C<$label> as-is if conversion would fail.
|
||||
|
||||
This function takes the following optional parameters (C<%param>):
|
||||
|
||||
=over
|
||||
|
||||
=item AllowUnassigned
|
||||
|
||||
(boolean) If set to a true value, code points that are unassigned in the
|
||||
Unicode version supported by your perl are allowed. This is an extension over
|
||||
UTS #46.
|
||||
|
||||
While this increases the number of labels that can be converted successfully
|
||||
(especially on older perls) and may thus maximizes the compatibility with
|
||||
domain names created under future versions of Unicode, it also introduces the
|
||||
risk of incorrect conversions. Characters added in later versions of Unicode
|
||||
might have properties that affect the conversion; if these properties are not
|
||||
known on your version of perl, you might therefore end up with an incorrect
|
||||
conversion.
|
||||
|
||||
The default is false.
|
||||
|
||||
=item UseSTD3ASCIIRules
|
||||
|
||||
(boolean) If set to a true value, checks the label for compliance with S<STD 3>
|
||||
(S<RFC 1123>) syntax for host name parts. The exact checks done depend on the
|
||||
IDNA standard used. Usually, you will want to set this to true.
|
||||
|
||||
Please note that UseSTD3ASCIIRules only affects the conversion between ASCII
|
||||
labels (A-labels) and Unicode labels (U-labels). Labels that are in ASCII may
|
||||
still be passed-through as-is.
|
||||
|
||||
For historical reasons, the default is false (unlike C<domain_to_ascii>).
|
||||
|
||||
=item TransitionalProcessing
|
||||
|
||||
(boolean) If set to true, the conversion will be compatible with IDNA2003. This
|
||||
only affects four characters: C<'ß'> (U+00DF), 'ς' (U+03C2), ZWJ (U+200D) and
|
||||
ZWNJ (U+200C). Usually, you will want to set this to false.
|
||||
|
||||
The default is false.
|
||||
|
||||
=back
|
||||
|
||||
This function does not handle strings that consist of multiple labels (such as
|
||||
domain names). Use C<domain_to_ascii> instead.
|
||||
|
||||
=item to_unicode( $label, %param )
|
||||
|
||||
Converts a single label C<$label> to Unicode. Will throw an exception on
|
||||
invalid input. If C<$label> is an ASCII label (including most NON-LDH labels
|
||||
such as those used for SRV records), this function will not fail but return
|
||||
C<$label> as-is if conversion would fail.
|
||||
|
||||
This function takes the same optional parameters as C<to_ascii>,
|
||||
with the same defaults.
|
||||
|
||||
If C<$label> is already in ASCII, this function will never fail but return
|
||||
C<$label> as is as a last resort (i.e. pass-through).
|
||||
|
||||
This function takes the following optional parameters (C<%param>):
|
||||
|
||||
=over
|
||||
|
||||
=item AllowUnassigned
|
||||
|
||||
=item UseSTD3ASCIIRules
|
||||
|
||||
See C<to_unicode> above. Please note that there is no need for
|
||||
C<TransitionalProcessing> for C<to_unicode>.
|
||||
|
||||
=back
|
||||
|
||||
This function does not handle strings that consist of multiple labels (such as
|
||||
domain names). Use C<domain_to_unicode> instead.
|
||||
|
||||
=item domain_to_ascii( $label, %param )
|
||||
|
||||
Converts all labels of the hostname C<$domain> (with labels separated by dots)
|
||||
to ASCII (using C<to_ascii>). Will throw an exception on invalid input.
|
||||
|
||||
This function takes the following optional parameters (C<%param>):
|
||||
|
||||
=over
|
||||
|
||||
=item AllowUnassigned
|
||||
|
||||
=item TransitionalProcessing
|
||||
|
||||
See C<to_unicode> above.
|
||||
|
||||
=item UseSTD3ASCIIRules
|
||||
|
||||
(boolean) If set to a true value, checks the label for compliance with S<STD 3>
|
||||
(S<RFC 1123>) syntax for host name parts.
|
||||
|
||||
The default is true (unlike C<to_ascii>).
|
||||
|
||||
=back
|
||||
|
||||
This function will convert all dots to ASCII, i.e. to U+002E (full stop). The
|
||||
following characters are recognized as dots: U+002E (full stop), U+3002
|
||||
(ideographic full stop), U+FF0E (fullwidth full stop), U+FF61 (halfwidth
|
||||
ideographic full stop).
|
||||
|
||||
=item domain_to_unicode( $domain, %param )
|
||||
|
||||
Converts all labels of the hostname C<$domain> (with labels separated by dots)
|
||||
to Unicode. Will throw an exception on invalid input.
|
||||
|
||||
This function takes the same optional parameters as C<domain_to_ascii>,
|
||||
with the same defaults.
|
||||
|
||||
This function takes the following optional parameters (C<%param>):
|
||||
|
||||
=over
|
||||
|
||||
=item AllowUnassigned
|
||||
|
||||
=item UseSTD3ASCIIRules
|
||||
|
||||
See C<domain_to_unicode> above. Please note that there is no C<TransitionalProcessing>
|
||||
for C<domain_to_unicode>.
|
||||
|
||||
=back
|
||||
|
||||
This function will preserve the original version of dots. The following
|
||||
characters are recognized as dots: U+002E (full stop), U+3002 (ideographic full
|
||||
stop), U+FF0E (fullwidth full stop), U+FF61 (halfwidth ideographic full stop).
|
||||
|
||||
=item email_to_ascii( $email, %param )
|
||||
|
||||
Converts the domain part (right hand side, separated by an at sign) of an S<RFC
|
||||
2821>/2822 email address to ASCII, using C<domain_to_ascii>. May throw an
|
||||
exception on invalid input.
|
||||
|
||||
It takes the same parameters as C<domain_to_ascii>.
|
||||
|
||||
This function currently does not handle internationalization of the local-part
|
||||
(left hand side). Future versions of this module might implement an ASCII
|
||||
conversion for the local-part, should one be standardized.
|
||||
|
||||
This function will convert the at sign to ASCII, i.e. to U+0040 (commercial
|
||||
at), as well as label separators. The following characters are recognized as at
|
||||
signs: U+0040 (commercial at), U+FE6B (small commercial at) and U+FF20
|
||||
(fullwidth commercial at).
|
||||
|
||||
=item email_to_unicode( $email, %param )
|
||||
|
||||
Converts the domain part (right hand side, separated by an at sign) of an S<RFC
|
||||
2821>/2822 email address to Unicode, using C<domain_to_unicode>. May throw an
|
||||
exception on invalid input.
|
||||
|
||||
It takes the same parameters as C<domain_to_unicode>.
|
||||
|
||||
This function currently does not handle internationalization of the local-part
|
||||
(left hand side). Future versions of this module might implement a conversion
|
||||
from ASCII for the local-part, should one be standardized.
|
||||
|
||||
This function will preserve the original version of at signs (and label
|
||||
separators). The following characters are recognized as at signs: U+0040
|
||||
(commercial at), U+FE6B (small commercial at) and U+FF20 (fullwidth commercial
|
||||
at).
|
||||
|
||||
=back
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Claus FE<auml>rber <CFAERBER@cpan.org>
|
||||
|
||||
=head1 LICENSE
|
||||
|
||||
Copyright 2007-2014 Claus FE<auml>rber.
|
||||
|
||||
This library is free software; you can redistribute it and/or modify
|
||||
it under the same terms as Perl itself.
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
L<Net::IDN::Punycode>, L<Net::IDN::UTS46>, L<Net::IDN::IDNA2003>,
|
||||
L<Net::IDN::IDNA2008>, S<UTS #46> (L<http://www.unicode.org/reports/tr46/>),
|
||||
S<RFC 5890> (L<http://tools.ietf.org/html/rfc5890>).
|
||||
|
||||
=cut
|
|
@ -0,0 +1,160 @@
|
|||
=encoding utf8
|
||||
|
||||
=head1 NAME
|
||||
|
||||
Net::IDN::Overwiew - Internationalized Domain Names for Applications (IDNA)
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
The C<Net::IDN::*> modules provide a framework for the handling of
|
||||
Internationalized Domain Names for Applications (IDNA) in perl programmes.
|
||||
|
||||
This document provides an overview of the available modules in order to
|
||||
allow you to choose the best module for the task at hand.
|
||||
|
||||
=head2 AVAILABLE MODULES
|
||||
|
||||
=head3 HIGH-LEVEL (USE THIS)
|
||||
|
||||
=over
|
||||
|
||||
=item L<Net::IDN::Encode>
|
||||
|
||||
provides a high-level interface for converting domain names (and
|
||||
for convenience, email addresses).
|
||||
|
||||
Use this module if you just want to convert domain names and don't
|
||||
care about how this is done internally.
|
||||
|
||||
Currently, this module uses L<Net::IDN::UTS46>. However, this
|
||||
might change in the future if another specification (e.g. a
|
||||
revision of IDNA2008) becomes more appropriate.
|
||||
|
||||
The author aims for Net::IDN::Encode to always use the specification that will
|
||||
provide the "least surprising" results.
|
||||
|
||||
=back
|
||||
|
||||
=head3 STANDARD-SPECIFIC
|
||||
|
||||
These modules implement different versions of the the IDNA
|
||||
specifications. Use one of these modules only if you require
|
||||
compatibility with a specific incarnation of IDNA.
|
||||
|
||||
=over
|
||||
|
||||
=item L<Net::IDN::IDNA2003>
|
||||
|
||||
implements the original IDNA specification, released in 2003
|
||||
(IDNA2003), which is now obsolete.
|
||||
|
||||
IDNA2003 is defined in RFC 3490 L<http://tools.ietf.org/rfc/3490>
|
||||
and related documents.
|
||||
|
||||
=begin comment
|
||||
|
||||
=item L<Net::IDN::IDNA2008>
|
||||
|
||||
implements the current IDNA specification, released in early 2010
|
||||
(IDNA2008 or IDNAbis).
|
||||
|
||||
Please note that this module will not allow you to convert some
|
||||
domain names, such as C<√.com> or C<I♥NY.com>, which were allowed
|
||||
in IDNA2003 but are disallowed in IDNA2008.
|
||||
|
||||
IDNA2008 is defined in RFC 5890 L<http://tools.ietf.org/rfc/5890>
|
||||
and related documents.
|
||||
|
||||
=end comment
|
||||
|
||||
=item L<Net::IDN::UTS46>
|
||||
|
||||
implements Unicode Technical Standard #46 (UTS #46
|
||||
L<http://unicode.org/reports/tr46/>), Unicode IDNA Compatibility
|
||||
Processing. This specification supports all domain names allowed
|
||||
under either IDNA2003 or IDNA2008.
|
||||
|
||||
=back
|
||||
|
||||
=head3 ENCODING
|
||||
|
||||
=over
|
||||
|
||||
=item L<Net::IDN::Punycode>
|
||||
|
||||
performs the actual conversion between the ASCII and Unicode form
|
||||
of strings. Punycode is defined in RFC 3492
|
||||
L<http://tools.ietf.org/rfc/3492> and related documents.
|
||||
|
||||
Usually, it is not a good idea to use this module directly. If you
|
||||
convert domain labels (or other strings) without proper
|
||||
preparation, you may end up with an ASCII encoding that is not
|
||||
interoperable or poses security issues due to spoofing.
|
||||
|
||||
Even if you think that your domain names are valid and in
|
||||
already-mapped format, you might be fooled by different Unicode
|
||||
normalization forms (for example, some environments might
|
||||
automatically convert your data to NFD, which breaks IDNA).
|
||||
|
||||
=back
|
||||
|
||||
=head3 DEPRECATED/COMPATIBILITY
|
||||
|
||||
These modules are only maintained in order to not break
|
||||
applications that might rely on them
|
||||
|
||||
=over
|
||||
|
||||
=item L<Encode::Punycode>
|
||||
|
||||
provides an L<Encode> plugin for Punycode. As Punycode is not a
|
||||
general-purpose encoding, there are limited applications.
|
||||
|
||||
=item L<IDNA::Punycode>
|
||||
|
||||
has an API depending on global variables. Don't use this module.
|
||||
|
||||
=back
|
||||
|
||||
=head2 DISTRIBUTIONS
|
||||
|
||||
=over
|
||||
|
||||
=item Net-IDN-Encode
|
||||
|
||||
is the main distribution covering the most common cases for
|
||||
converting domain names between ASCII and Unicode.
|
||||
|
||||
The author tries to keep the dependency chain as small as possible; currently
|
||||
this distribution only depends on perl 5.8.5 (including the core module
|
||||
L<Unicode::Normalize> ).
|
||||
|
||||
=item Net-IDN-IDNA2003
|
||||
|
||||
provides the L<Net::IDN::IDNA2003> module. This is separate
|
||||
because it has an dependency on L<Unicode::Stringprep> (through
|
||||
L<Net::IDN::Nameprep>).
|
||||
|
||||
=begin comment
|
||||
|
||||
=item Net-IDN-IDNA2008
|
||||
|
||||
provides the L<Net::IDN::IDNA2008> module. This is separate because it has an
|
||||
dependency on perl 5.10 or higher (through L<Unicode::Precis>).
|
||||
|
||||
=end comment
|
||||
|
||||
=item Encode-Punycode
|
||||
|
||||
=item IDNA-Punycode
|
||||
|
||||
are separate because they are of limited use to the average
|
||||
user/perl programmer.
|
||||
|
||||
=back
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Claus FE<auml>rber <CFAERBER@cpan.org>
|
||||
|
||||
=cut
|
|
@ -0,0 +1,117 @@
|
|||
package Net::IDN::Punycode;
|
||||
|
||||
use 5.006;
|
||||
|
||||
use strict;
|
||||
use utf8;
|
||||
use warnings;
|
||||
|
||||
use Exporter;
|
||||
|
||||
our $VERSION = "2.500";
|
||||
$VERSION = eval $VERSION;
|
||||
|
||||
our @ISA = qw(Exporter);
|
||||
our @EXPORT = ();
|
||||
our @EXPORT_OK = ();
|
||||
our %EXPORT_TAGS = ( 'all' => [ qw(encode_punycode decode_punycode) ], );
|
||||
Exporter::export_ok_tags(keys %EXPORT_TAGS);
|
||||
our $_NO_XS;
|
||||
|
||||
eval {
|
||||
die if $_NO_XS;
|
||||
require XSLoader;
|
||||
XSLoader::load('Net::IDN::Punycode');
|
||||
};
|
||||
|
||||
if (!defined(&encode_punycode)) {
|
||||
require Net::IDN::Punycode::PP;
|
||||
Net::IDN::Punycode::PP->import(qw(:all));
|
||||
}
|
||||
|
||||
1;
|
||||
__END__
|
||||
|
||||
=head1 NAME
|
||||
|
||||
Net::IDN::Punycode - A Bootstring encoding of Unicode for IDNA (S<RFC 3492>)
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
use Net::IDN::Punycode qw(:all);
|
||||
$punycode = encode_punycode($unicode);
|
||||
$unicode = decode_punycode($punycode);
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This module implements the Punycode encoding, and only the Punycode encoding.
|
||||
|
||||
This module does not implement any other steps required for converting
|
||||
internationalized domain names (IDNs) to and from ASCII. In particular, it does
|
||||
not do any string preparation as specified by I<Nameprep>/I<IDNA2008>/I<PRECIS>
|
||||
and does not add nor remove the ACE prefix (C<xn-->). Thus, use
|
||||
L<Net::IDN::Encode> if you want to convert domain names.
|
||||
|
||||
Punycode is an instance of a more general algorithm called Bootstring, which
|
||||
allows strings composed from a small set of "basic" code points to uniquely
|
||||
represent any string of code points drawn from a larger set. Punycode is
|
||||
Bootstring with particular parameter values appropriate for IDNA.
|
||||
|
||||
=head1 WARNING
|
||||
|
||||
You may be tempted to use this module directly and add/remove the ACE prefix
|
||||
(C<xn-->) in your code for performance reasons. Usually, this is not a good
|
||||
idea. If you convert domain labels (or other strings) without proper
|
||||
preparation, you may end up with an ASCII encoding that is not interoperable or
|
||||
even poses security issues due to spoofing.
|
||||
|
||||
Even if you think that your domain names are valid and already mapped to the
|
||||
correct form, this may not be true. For example, some environments might
|
||||
automatically convert your perfectly valid domain names to a different but
|
||||
equivalent Unicode normalization form (e.g., NFD instead of NFC), which already
|
||||
breaks IDNA.
|
||||
|
||||
=head1 FUNCTIONS
|
||||
|
||||
No functions are exported by default. You can use the tag C<:all>
|
||||
or import them individually.
|
||||
|
||||
The following functions are available:
|
||||
|
||||
=over
|
||||
|
||||
=item encode_punycode($input)
|
||||
|
||||
Encodes C<$input> with Punycode and returns the result.
|
||||
|
||||
This function will throw an exception on invalid/unencodable input.
|
||||
|
||||
=item decode_punycode($input)
|
||||
|
||||
Decodes C<$input> with Punycode and returns the result.
|
||||
|
||||
This function will throw an exception on invalid input.
|
||||
|
||||
=back
|
||||
|
||||
=head1 AUTHORS
|
||||
|
||||
Tatsuhiko Miyagawa E<lt>miyagawa@bulknews.netE<gt> (versions 0.01 to 0.02)
|
||||
|
||||
Claus FE<auml>rber E<lt>CFAERBER@cpan.orgE<gt> (versions 1.000 and higher)
|
||||
|
||||
=head1 LICENSE
|
||||
|
||||
Copyright 2002-2004 Tatsuhiko Miyagawa E<lt>miyagawa@bulknews.netE<gt>
|
||||
|
||||
Copyright 2007-2014 Claus FE<auml>rber E<lt>CFAERBER@cpan.orgE<gt>
|
||||
|
||||
This library is free software; you can redistribute it and/or modify
|
||||
it under the same terms as Perl itself.
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
S<RFC 3492> (L<http://www.ietf.org/rfc/rfc3492.txt>),
|
||||
L<IETF::ACE>, L<Convert::RACE>
|
||||
|
||||
=cut
|
|
@ -0,0 +1,264 @@
|
|||
#include "EXTERN.h"
|
||||
#include "perl.h"
|
||||
#include "XSUB.h"
|
||||
|
||||
#ifdef XS_VERSION
|
||||
#undef XS_VERSION
|
||||
#endif
|
||||
#define XS_VERSION "2.500"
|
||||
|
||||
#define BASE 36
|
||||
#define TMIN 1
|
||||
#define TMAX 26
|
||||
#define SKEW 38
|
||||
#define DAMP 700
|
||||
#define INITIAL_BIAS 72
|
||||
#define INITIAL_N 128
|
||||
|
||||
#define isBASE(x) UTF8_IS_INVARIANT((unsigned char)x)
|
||||
#define DELIM '-'
|
||||
|
||||
#define TMIN_MAX(t) (((t) < TMIN) ? (TMIN) : ((t) > TMAX) ? (TMAX) : (t))
|
||||
|
||||
#ifndef utf8_to_uvchr_buf
|
||||
#define utf8_to_uvchr_buf(in_p,in_e,u8) utf8_to_uvchr(in_p,u8);
|
||||
#endif
|
||||
|
||||
static char enc_digit[BASE] = {
|
||||
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
|
||||
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
|
||||
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
|
||||
};
|
||||
|
||||
static IV dec_digit[0x80] = {
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 00..0F */
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 10..1F */
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 20..2F */
|
||||
26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1, /* 30..3F */
|
||||
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 40..4F */
|
||||
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, /* 50..5F */
|
||||
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 60..6F */
|
||||
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, /* 70..7F */
|
||||
};
|
||||
|
||||
static int adapt(int delta, int numpoints, int first) {
|
||||
int k;
|
||||
|
||||
delta /= first ? DAMP : 2;
|
||||
delta += delta/numpoints;
|
||||
|
||||
for(k=0; delta > ((BASE-TMIN) * TMAX)/2; k += BASE)
|
||||
delta /= BASE-TMIN;
|
||||
|
||||
return k + (((BASE-TMIN+1) * delta) / (delta+SKEW));
|
||||
};
|
||||
|
||||
static void
|
||||
grow_string(SV *const sv, char **start, char **current, char **end, STRLEN add)
|
||||
{
|
||||
STRLEN len;
|
||||
|
||||
if(*current + add <= *end)
|
||||
return;
|
||||
|
||||
len = (*current - *start);
|
||||
*start = SvGROW(sv, (len + add + 15) & ~15);
|
||||
*current = *start + len;
|
||||
*end = *start + SvLEN(sv);
|
||||
}
|
||||
|
||||
MODULE = Net::IDN::Punycode PACKAGE = Net::IDN::Punycode
|
||||
|
||||
SV*
|
||||
encode_punycode(input)
|
||||
SV * input
|
||||
PREINIT:
|
||||
UV c, m, n = INITIAL_N;
|
||||
int k, q, t;
|
||||
int bias = INITIAL_BIAS;
|
||||
int delta = 0, skip_delta;
|
||||
|
||||
const char *in_s, *in_p, *in_e, *skip_p;
|
||||
char *re_s, *re_p, *re_e;
|
||||
int first = 1;
|
||||
STRLEN length_guess, len, h, u8;
|
||||
|
||||
CODE:
|
||||
in_s = in_p = SvPVutf8(input, len);
|
||||
in_e = in_s + len;
|
||||
|
||||
length_guess = len;
|
||||
if(length_guess < 64) length_guess = 64; /* optimise for maximum length of domain names */
|
||||
length_guess += 2; /* plus DELIM + '\0' */
|
||||
|
||||
RETVAL = NEWSV('P',length_guess);
|
||||
SvPOK_only(RETVAL);
|
||||
re_s = re_p = SvPV_nolen(RETVAL);
|
||||
re_e = re_s + SvLEN(RETVAL);
|
||||
h = 0;
|
||||
|
||||
/* copy basic code points */
|
||||
while(in_p < in_e) {
|
||||
if( isBASE(*in_p) ) {
|
||||
grow_string(RETVAL, &re_s, &re_p, &re_e, sizeof(char));
|
||||
*re_p++ = *in_p;
|
||||
h++;
|
||||
}
|
||||
in_p++;
|
||||
}
|
||||
|
||||
/* add DELIM if needed */
|
||||
if(h) {
|
||||
grow_string(RETVAL, &re_s, &re_p, &re_e, sizeof(char));
|
||||
*re_p++ = DELIM;
|
||||
}
|
||||
|
||||
for(;;) {
|
||||
/* find smallest code point not yet handled */
|
||||
m = UV_MAX;
|
||||
q = skip_delta = 0;
|
||||
|
||||
for(in_p = skip_p = in_s; in_p < in_e;) {
|
||||
c = utf8_to_uvchr_buf((U8*)in_p, (U8*)in_e, &u8);
|
||||
c = NATIVE_TO_UNI(c);
|
||||
|
||||
if(c >= n && c < m) {
|
||||
m = c;
|
||||
skip_p = in_p;
|
||||
skip_delta = q;
|
||||
}
|
||||
if(c < n)
|
||||
++q;
|
||||
in_p += u8;
|
||||
}
|
||||
if(m == UV_MAX)
|
||||
break;
|
||||
|
||||
/* increase delta to the state corresponding to
|
||||
the m code point at the beginning of the string */
|
||||
delta += (m-n) * (h+1);
|
||||
n = m;
|
||||
|
||||
/* now find the chars to be encoded in this round */
|
||||
|
||||
delta += skip_delta;
|
||||
for(in_p = skip_p; in_p < in_e;) {
|
||||
c = utf8_to_uvchr_buf((U8*)in_p, (U8*)in_e, &u8);
|
||||
c = NATIVE_TO_UNI(c);
|
||||
|
||||
if(c < n) {
|
||||
++delta;
|
||||
} else if( c == n ) {
|
||||
q = delta;
|
||||
|
||||
for(k = BASE;; k += BASE) {
|
||||
t = TMIN_MAX(k - bias);
|
||||
if(q < t) break;
|
||||
grow_string(RETVAL, &re_s, &re_p, &re_e, sizeof(char));
|
||||
*re_p++ = enc_digit[t + ((q-t) % (BASE-t))];
|
||||
q = (q-t) / (BASE-t);
|
||||
}
|
||||
if(q > BASE) croak("input exceeds punycode limit");
|
||||
grow_string(RETVAL, &re_s, &re_p, &re_e, sizeof(char));
|
||||
*re_p++ = enc_digit[q];
|
||||
bias = adapt(delta, h+1, first);
|
||||
delta = first = 0;
|
||||
++h;
|
||||
}
|
||||
in_p += u8;
|
||||
}
|
||||
++delta;
|
||||
++n;
|
||||
}
|
||||
grow_string(RETVAL, &re_s, &re_p, &re_e, sizeof(char));
|
||||
*re_p = 0;
|
||||
SvCUR_set(RETVAL, re_p - re_s);
|
||||
OUTPUT:
|
||||
RETVAL
|
||||
|
||||
SV*
|
||||
decode_punycode(input)
|
||||
SV * input
|
||||
PREINIT:
|
||||
UV c, n = INITIAL_N;
|
||||
IV dc;
|
||||
int i = 0, oldi, j, k, t, w;
|
||||
|
||||
int bias = INITIAL_BIAS;
|
||||
int delta = 0, skip_delta;
|
||||
|
||||
const char *in_s, *in_p, *in_e, *skip_p;
|
||||
char *re_s, *re_p, *re_e;
|
||||
int first = 1;
|
||||
STRLEN length_guess, len, h, u8;
|
||||
|
||||
CODE:
|
||||
in_s = in_p = SvPV_nolen(input);
|
||||
in_e = SvEND(input);
|
||||
|
||||
length_guess = SvCUR(input) * 2;
|
||||
if(length_guess < 256) length_guess = 256;
|
||||
|
||||
RETVAL = NEWSV('D',length_guess);
|
||||
SvPOK_only(RETVAL);
|
||||
re_s = re_p = SvPV_nolen(RETVAL);
|
||||
re_e = re_s + SvLEN(RETVAL);
|
||||
|
||||
skip_p = NULL;
|
||||
for(in_p = in_s; in_p < in_e; in_p++) {
|
||||
c = *in_p; /* we don't care whether it's UTF-8 */
|
||||
if(!isBASE(c)) croak("non-base character in input for decode_punycode");
|
||||
if(c == DELIM) skip_p = in_p;
|
||||
grow_string(RETVAL, &re_s, &re_p, &re_e, 1);
|
||||
*re_p++ = c; /* copy it */
|
||||
}
|
||||
|
||||
if(skip_p) {
|
||||
h = skip_p - in_s; /* base chars handled */
|
||||
re_p = re_s + h; /* points to end of base chars */
|
||||
skip_p++; /* skip over DELIM */
|
||||
} else {
|
||||
h = 0; /* no base chars */
|
||||
re_p = re_s;
|
||||
skip_p = in_s; /* read everything */
|
||||
}
|
||||
|
||||
for(in_p = skip_p; in_p < in_e; i++) {
|
||||
oldi = i;
|
||||
w = 1;
|
||||
|
||||
for(k = BASE;; k+= BASE) {
|
||||
if(!(in_p < in_e)) croak("incomplete encoded code point in decode_punycode");
|
||||
dc = dec_digit[*in_p++]; /* we already know it's in 0..127 */
|
||||
if(dc < 0) croak("invalid digit in input for decode_punycode");
|
||||
c = (UV)dc;
|
||||
i += c * w;
|
||||
t = TMIN_MAX(k - bias);
|
||||
if(c < t) break;
|
||||
w *= BASE-t;
|
||||
}
|
||||
h++;
|
||||
bias = adapt(i-oldi, h, first);
|
||||
first = 0;
|
||||
n += i / h; /* code point n to insert */
|
||||
i = i % h; /* at position i */
|
||||
|
||||
u8 = UNISKIP(n); /* how many bytes we need */
|
||||
|
||||
j = i;
|
||||
for(skip_p = re_s; j > 0; j--) /* find position in UTF-8 */
|
||||
skip_p+=UTF8SKIP(skip_p);
|
||||
|
||||
grow_string(RETVAL, &re_s, &re_p, &re_e, u8);
|
||||
if(skip_p < re_p) /* move succeeding chars */
|
||||
Move(skip_p, skip_p + u8, re_p - skip_p, char);
|
||||
re_p += u8;
|
||||
uvuni_to_utf8_flags((U8*)skip_p, n, UNICODE_ALLOW_ANY);
|
||||
}
|
||||
|
||||
if(!first) SvUTF8_on(RETVAL); /* UTF-8 chars have been inserted */
|
||||
grow_string(RETVAL, &re_s, &re_p, &re_e, 1);
|
||||
*re_p = 0;
|
||||
SvCUR_set(RETVAL, re_p - re_s);
|
||||
OUTPUT:
|
||||
RETVAL
|
|
@ -0,0 +1,195 @@
|
|||
package Net::IDN::Punycode::PP;
|
||||
|
||||
use 5.008;
|
||||
|
||||
use strict;
|
||||
use utf8;
|
||||
use warnings;
|
||||
|
||||
use Carp;
|
||||
use Exporter;
|
||||
|
||||
our $VERSION = "2.500";
|
||||
|
||||
our @ISA = qw(Exporter);
|
||||
our @EXPORT = ();
|
||||
our @EXPORT_OK = qw(encode_punycode decode_punycode);
|
||||
our %EXPORT_TAGS = ( 'all' => \@EXPORT_OK );
|
||||
|
||||
use integer;
|
||||
|
||||
use constant BASE => 36;
|
||||
use constant TMIN => 1;
|
||||
use constant TMAX => 26;
|
||||
use constant SKEW => 38;
|
||||
use constant DAMP => 700;
|
||||
use constant INITIAL_BIAS => 72;
|
||||
use constant INITIAL_N => 128;
|
||||
|
||||
use constant UNICODE_MIN => 0;
|
||||
use constant UNICODE_MAX => 0x10FFFF;
|
||||
|
||||
my $Delimiter = chr 0x2D;
|
||||
my $BasicRE = "\x00-\x7f";
|
||||
my $PunyRE = "A-Za-z0-9";
|
||||
|
||||
sub _adapt {
|
||||
my($delta, $numpoints, $firsttime) = @_;
|
||||
$delta = int($firsttime ? $delta / DAMP : $delta / 2);
|
||||
$delta += int($delta / $numpoints);
|
||||
my $k = 0;
|
||||
while ($delta > int(((BASE - TMIN) * TMAX) / 2)) {
|
||||
$delta /= BASE - TMIN;
|
||||
$k += BASE;
|
||||
}
|
||||
return $k + (((BASE - TMIN + 1) * $delta) / ($delta + SKEW));
|
||||
}
|
||||
|
||||
sub decode_punycode {
|
||||
die("Usage: Net::IDN::Punycode::decode_punycode(input)") unless @_;
|
||||
no warnings 'utf8';
|
||||
|
||||
my $input = shift;
|
||||
|
||||
my $n = INITIAL_N;
|
||||
my $i = 0;
|
||||
my $bias = INITIAL_BIAS;
|
||||
my @output;
|
||||
|
||||
return undef unless defined $input;
|
||||
return '' unless length $input;
|
||||
|
||||
if($input =~ s/(.*)$Delimiter//os) {
|
||||
my $base_chars = $1;
|
||||
croak("non-base character in input for decode_punycode")
|
||||
if $base_chars =~ m/[^$BasicRE]/os;
|
||||
push @output, split //, $base_chars;
|
||||
}
|
||||
my $code = $input;
|
||||
|
||||
croak('invalid digit in input for decode_punycode') if $code =~ m/[^$PunyRE]/os;
|
||||
|
||||
utf8::downgrade($input); ## handling failure of downgrade is more expensive than
|
||||
## doing the above regexp w/ utf8 semantics
|
||||
|
||||
while(length $code)
|
||||
{
|
||||
my $oldi = $i;
|
||||
my $w = 1;
|
||||
LOOP:
|
||||
for (my $k = BASE; 1; $k += BASE) {
|
||||
my $cp = substr($code, 0, 1, '');
|
||||
croak("incomplete encoded code point in decode_punycode") if !defined $cp;
|
||||
my $digit = ord $cp;
|
||||
|
||||
## NB: this depends on the PunyRE catching invalid digit characters
|
||||
## before they turn up here
|
||||
##
|
||||
$digit = $digit < 0x40 ? $digit + (26-0x30) : ($digit & 0x1f) -1;
|
||||
|
||||
$i += $digit * $w;
|
||||
my $t = $k - $bias;
|
||||
$t = $t < TMIN ? TMIN : $t > TMAX ? TMAX : $t;
|
||||
|
||||
last LOOP if $digit < $t;
|
||||
$w *= (BASE - $t);
|
||||
}
|
||||
$bias = _adapt($i - $oldi, @output + 1, $oldi == 0);
|
||||
$n += $i / (@output + 1);
|
||||
$i = $i % (@output + 1);
|
||||
croak('invalid code point') if $n < UNICODE_MIN or $n > UNICODE_MAX;
|
||||
splice(@output, $i, 0, chr($n));
|
||||
$i++;
|
||||
}
|
||||
return join '', @output;
|
||||
}
|
||||
|
||||
sub encode_punycode {
|
||||
die("Usage: Net::IDN::Punycode::encode_punycode(input)") unless @_;
|
||||
no warnings 'utf8';
|
||||
|
||||
my $input = shift;
|
||||
my $input_length = length $input;
|
||||
|
||||
## my $output = join '', $input =~ m/([$BasicRE]+)/og; ## slower
|
||||
my $output = $input; $output =~ s/[^$BasicRE]+//ogs;
|
||||
|
||||
my $h = my $bb = length $output;
|
||||
$output .= $Delimiter if $bb > 0;
|
||||
utf8::downgrade($output); ## no unnecessary use of utf8 semantics
|
||||
|
||||
my @input = map ord, split //, $input;
|
||||
my @chars = sort { $a<=> $b } grep { $_ >= INITIAL_N } @input;
|
||||
|
||||
my $n = INITIAL_N;
|
||||
my $delta = 0;
|
||||
my $bias = INITIAL_BIAS;
|
||||
|
||||
foreach my $m (@chars) {
|
||||
next if $m < $n;
|
||||
$delta += ($m - $n) * ($h + 1);
|
||||
$n = $m;
|
||||
for(my $i = 0; $i < $input_length; $i++)
|
||||
{
|
||||
my $c = $input[$i];
|
||||
$delta++ if $c < $n;
|
||||
if ($c == $n) {
|
||||
my $q = $delta;
|
||||
LOOP:
|
||||
for (my $k = BASE; 1; $k += BASE) {
|
||||
my $t = $k - $bias;
|
||||
$t = $t < TMIN ? TMIN : $t > TMAX ? TMAX : $t;
|
||||
|
||||
last LOOP if $q < $t;
|
||||
|
||||
my $o = $t + (($q - $t) % (BASE - $t));
|
||||
$output .= chr $o + ($o < 26 ? 0x61 : 0x30-26);
|
||||
|
||||
$q = int(($q - $t) / (BASE - $t));
|
||||
}
|
||||
croak("input exceeds punycode limit") if $q > BASE;
|
||||
$output .= chr $q + ($q < 26 ? 0x61 : 0x30-26);
|
||||
|
||||
$bias = _adapt($delta, $h + 1, $h == $bb);
|
||||
$delta = 0;
|
||||
$h++;
|
||||
}
|
||||
}
|
||||
$delta++;
|
||||
$n++;
|
||||
}
|
||||
return $output;
|
||||
}
|
||||
|
||||
1;
|
||||
__END__
|
||||
|
||||
=head1 NAME
|
||||
|
||||
Net::IDN::Punycode::PP - pure-perl implementation of Net::IDN::Punycode
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
See L<Net::IDN::Punycode>.
|
||||
|
||||
=head1 AUTHORS
|
||||
|
||||
Tatsuhiko Miyagawa E<lt>miyagawa@bulknews.netE<gt> (versions 0.01 to 0.02)
|
||||
|
||||
Claus FE<auml>rber E<lt>CFAERBER@cpan.orgE<gt> (from version 1.00)
|
||||
|
||||
=head1 LICENSE
|
||||
|
||||
Copyright 2002-2004 Tatsuhiko Miyagawa E<lt>miyagawa@bulknews.netE<gt>
|
||||
|
||||
Copyright 2007-2018 Claus FE<auml>rber E<lt>CFAERBER@cpan.orgE<gt>
|
||||
|
||||
This library is free software; you can redistribute it and/or modify
|
||||
it under the same terms as Perl itself.
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
S<RFC 3492> (L<http://www.ietf.org/rfc/rfc3492.txt>),
|
||||
L<IETF::ACE>, L<Convert::RACE>
|
||||
|
||||
=cut
|
|
@ -0,0 +1,96 @@
|
|||
=encoding utf8
|
||||
|
||||
=head1 NAME
|
||||
|
||||
Net::IDN::Standards -- Internationalized Domain Names for Applications (IDNA)
|
||||
|
||||
=head1 INTRODUCTION
|
||||
|
||||
Historically, domain names and host names were restricted to a
|
||||
limited repertoire of ASCII characters, i.e. letters, digits and
|
||||
the hyphen (i.e. C</[A-Z0-9-]/i>). Words and names from languages
|
||||
that require additional characters (such as diacritics or special
|
||||
characters) or other scripts could not be used.
|
||||
|
||||
Internationalized Domain Names (IDNs) extend the character
|
||||
repertoire for domain names from ASCII to Unicode while
|
||||
maintaining backwards compatibility with software that only
|
||||
expects and handles ASCII characters.
|
||||
|
||||
In order to do so, Unicode domain names are converted to ASCII
|
||||
using an ASCII-compatible encoding (ACE) called Punycode. On the
|
||||
wire, converted domain names start with C<xn-->, followed by the
|
||||
ASCII encoding of the Unicode string. The Unicode version is
|
||||
typically only shown in applications presenting the domain to the
|
||||
user (hence Internationalized Domain Names for Applications,
|
||||
IDNA). Internationalized Resource Identifiers (IRIs), the
|
||||
Unicode version of URLs, may also include domain names in their
|
||||
Unicode form.
|
||||
|
||||
The IDNA specifications, however, do not only cover the actual
|
||||
Punycode conversion but also include extensive rules for
|
||||
preparation (mapping and/or validation) of input strings. They
|
||||
typically define two functions, C<ToASCII> and C<ToUnicode>, which
|
||||
prepare and convert a domain name to the ACE version or the
|
||||
Unicode version.
|
||||
|
||||
=head1 DIFFERENT STANDARDS
|
||||
|
||||
"The nice thing about standards is that you have so many to
|
||||
choose from."
|
||||
-- Andrew S. Tanenbaum
|
||||
|
||||
While the actual Punycode conversion is stable, there are different
|
||||
specifications regarding mapping and/or validation (preparation):
|
||||
|
||||
=head2 IDNA2003
|
||||
|
||||
IDNA2003, which is defined in S<RFC 3490>
|
||||
(L<http://tools.ietf.org/html/rfc3490>) and related documents, was
|
||||
the original specification for the internationalization of domain
|
||||
names.
|
||||
|
||||
However, some issues were subsequently identified with IDNA2003:
|
||||
The specification was tied to Unicode 3.2 and therefore did not
|
||||
allow characters added in newer versions of Unicode (without
|
||||
updating the specifications).
|
||||
|
||||
Furthermore, a few characters were mapped to other characters or
|
||||
deleted although they would carry meaning in some languages (i.e.
|
||||
'ß' and 'ς' were mapped to 'ss' and 'σ'; ZWJ and ZWNJ were always
|
||||
mapped to nothing, although some scripts like Arabic require them
|
||||
for correct display).
|
||||
|
||||
=head2 IDNA2008
|
||||
|
||||
IDNA2008, which is defined in S<RFC 5890>
|
||||
(L<http://tools.ietf.org/html/rfc5890>) and related documents, resolves the
|
||||
issues found in IDNA2003.
|
||||
|
||||
This was done by allowing some characters that would either be
|
||||
mapped to other characters, mapped to zero and/or cause the
|
||||
preparation to fail. The new domain names would not be accessible
|
||||
by IDNA2003 implementations, of course.
|
||||
|
||||
However, IDNA2008 also disallowed a large number of characters
|
||||
that had been allowed in IDNA2003 (mostly symbols). An
|
||||
implementation of IDNA2008 would therefore no longer be able to
|
||||
access domain names such as C<√.com>, which had been registered
|
||||
under IDNA2003.
|
||||
|
||||
=head2 UTS #46
|
||||
|
||||
Unicode Technical Standard #46 (UTS #46,
|
||||
L<http://unicode.org/reports/tr46/>) solves this problem by
|
||||
allowing domain names that are valid in either IDNA2003 or
|
||||
IDNA2008.
|
||||
|
||||
This makes UTS #46 the perfect fit for domain lookup (be liberal
|
||||
in what you accept) but unsuitable for validating domain names
|
||||
prior to registration (be conservative in what you send).
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Claus FE<auml>rber <CFAERBER@cpan.org>
|
||||
|
||||
=cut
|
|
@ -0,0 +1,449 @@
|
|||
package Net::IDN::UTS46;
|
||||
|
||||
require 5.008005; # Unicode BiDi classes
|
||||
|
||||
use strict;
|
||||
use utf8;
|
||||
use warnings;
|
||||
|
||||
use Carp;
|
||||
|
||||
our $VERSION = "2.500";
|
||||
$VERSION = eval $VERSION;
|
||||
|
||||
our @ISA = ('Exporter');
|
||||
our @EXPORT = ();
|
||||
our @EXPORT_OK = ('uts46_to_ascii', 'uts46_to_unicode');
|
||||
our %EXPORT_TAGS = ( 'all' => \@EXPORT_OK );
|
||||
|
||||
use Unicode::Normalize ();
|
||||
|
||||
use Net::IDN::Punycode 1.1 (':all');
|
||||
use Net::IDN::Encode 2.100 (':_var');
|
||||
use Net::IDN::UTS46::_Mapping 5.002 ('/^(Is|Map).*/'); # UTS #46 is only defined from Unicode 5.2.0
|
||||
|
||||
sub uts46_to_unicode {
|
||||
my ($label, %param) = @_;
|
||||
croak "Transitional processing is not defined for ToUnicode" if $param{'TransitionalProcessing'};
|
||||
|
||||
splice @_, 1, 0, undef;
|
||||
goto &_process;
|
||||
}
|
||||
|
||||
sub uts46_to_ascii {
|
||||
my ($label, %param) = @_;
|
||||
|
||||
splice @_, 1, 0, sub {
|
||||
local $_ = shift;
|
||||
if(m/\P{ASCII}/) {
|
||||
eval { $_ = $IDNA_PREFIX . encode_punycode($_) };
|
||||
croak "$@ [A3]" if $@;
|
||||
}
|
||||
return $_;
|
||||
};
|
||||
goto &_process;
|
||||
}
|
||||
|
||||
*to_unicode = \&uts46_to_unicode;
|
||||
*to_ascii = \&uts46_to_ascii;
|
||||
|
||||
sub _process {
|
||||
my ($label, $to_ascii, %param) = @_;
|
||||
no warnings 'utf8';
|
||||
croak "The following parameter is invalid: $_"
|
||||
foreach(grep { !m/^(?:TransitionalProcessing|UseSTD3ASCIIRules|AllowUnassigned)$/ } keys %param);
|
||||
|
||||
$param{'TransitionalProcessing'} = 0 unless exists $param{'TransitionalProcessing'};
|
||||
$param{'UseSTD3ASCIIRules'} = 1 unless exists $param{'UseSTD3ASCIIRules'};
|
||||
$param{'AllowUnassigned'} = 0 unless exists $param{'AllowUnassigned'};
|
||||
|
||||
# 1. Map
|
||||
# - disallowed
|
||||
#
|
||||
if($param{'AllowUnassigned'}) {
|
||||
$label =~ m/(\p{Is_DisallowedAssigned})/ and croak sprintf('disallowed character U+%04X', ord($1));
|
||||
} else {
|
||||
$label =~ m/(\p{IsDisallowed})/ and croak sprintf('disallowed character U+%04X', ord($1));
|
||||
}
|
||||
|
||||
if($param{'UseSTD3ASCIIRules'}) {
|
||||
$label =~ m/(\p{IsDisallowedSTD3Valid})/ and croak sprintf('disallowed_STD3_valid character U+%04X', ord($1));
|
||||
$label =~ m/(\p{IsDisallowedSTD3Mapped})/ and croak sprintf('disallowed_STD3_mapped character U+%04X', ord($1));
|
||||
};
|
||||
|
||||
# - ignored
|
||||
#
|
||||
$label = MapIgnored($label);
|
||||
## $label = MapDisallowedSTD3Ignored($label) if(!$param{'UseSTD3ASCIIRules'});
|
||||
|
||||
# - mapped
|
||||
#
|
||||
$label = MapMapped($label);
|
||||
$label = MapDisallowedSTD3Mapped($label) if(!$param{'UseSTD3ASCIIRules'});
|
||||
|
||||
# - deviation
|
||||
$label = MapDeviation($label) if($param{'TransitionalProcessing'});
|
||||
|
||||
# 2. Normalize
|
||||
#
|
||||
$label = Unicode::Normalize::NFC($label);
|
||||
|
||||
# 3. Break
|
||||
#
|
||||
my @ll = split /\./, $label, -1;
|
||||
|
||||
## IDNA test vectors: an empty label at the end (separating the root domain
|
||||
## "", if present) must be preserved. It is not checked for
|
||||
## the minumum length criteria and the dot separting it is
|
||||
## not included in the maximum length of the domain.
|
||||
##
|
||||
my $rooted = @ll && length($ll[$#ll]) < 1; pop @ll if $rooted;
|
||||
my $is_bidi = 0;
|
||||
|
||||
# 4. Convert/Validate
|
||||
#
|
||||
foreach my $l (@ll) {
|
||||
if($l =~ m/^(?:(?i)$IDNA_PREFIX)(\p{ASCII}+)$/o) {
|
||||
eval { $l = decode_punycode($1); };
|
||||
croak 'Invalid Punycode sequence [P4]' if $@;
|
||||
|
||||
_validate_label($l, %param,
|
||||
'TransitionalProcessing' => 0,
|
||||
) unless $@;
|
||||
} else {
|
||||
_validate_label($l,%param,'_AssumeNFC' => 1);
|
||||
}
|
||||
|
||||
$is_bidi = 1 if !$is_bidi && $l =~ m/[\p{Bc:R}\p{Bc:AL}\p{Bc:AN}]/;
|
||||
}
|
||||
|
||||
foreach my $l (@ll) {
|
||||
_validate_bidi($l,%param) if $is_bidi;
|
||||
_validate_contextj($l,%param);
|
||||
|
||||
if(defined $to_ascii) {
|
||||
$l = $to_ascii->($l, %param);
|
||||
}
|
||||
|
||||
## IDNA test vectors: labels have to be checked for the minimum length of 1 (but not for the
|
||||
## maximum length of 63) even in to_unicode.
|
||||
##
|
||||
croak "empty label [A4_2]" if length($l) < 1;
|
||||
croak "label too long [A4_2]" if length($l) > 63 and defined $to_ascii;
|
||||
}
|
||||
|
||||
my $domain = join('.', @ll);
|
||||
|
||||
## IDNA test vectors: domains have to be checked for the minimum length of 1 (but not for the
|
||||
## maximum length of 253 excluding a final dot) even in to_unicode.
|
||||
##
|
||||
croak "empty domain name [A4_1]" if length($domain) < 1;
|
||||
croak "domain name too long [A4_1]" if length($domain) > 253 and defined $to_ascii;
|
||||
|
||||
$domain .= '.' if $rooted;
|
||||
|
||||
return $domain;
|
||||
}
|
||||
|
||||
sub _validate_label {
|
||||
my($l,%param) = @_;
|
||||
no warnings 'utf8';
|
||||
|
||||
$l eq Unicode::Normalize::NFC($l) or croak "not in Unicode Normalization Form NFC [V1]" unless $param{'_AssumeNFC'};
|
||||
|
||||
$l =~ m/^..--/ and croak "contains U+002D HYPHEN-MINUS in both third and forth position [V2]";
|
||||
$l =~ m/^-/ and croak "begins with U+002D HYPHEN-MINUS [V3]";
|
||||
$l =~ m/-$/ and croak "ends with U+002D HYPHEN-MINUS [V3]";
|
||||
$l =~ m/\./ and croak "contains U+0023 FULL STOP [V4]";
|
||||
$l =~ m/^\p{IsMark}/ and croak "begins with General_Category=Mark [V5]";
|
||||
|
||||
unless($param{'AllowUnassigned'}) {
|
||||
$l =~m/(\p{Unassigned})/ and croak sprintf "contains unassigned character U+%04X [V6]", ord $1;
|
||||
}
|
||||
|
||||
if($param{'UseSTD3ASCIIRules'}) {
|
||||
$l =~m/(\p{IsDisallowedSTD3Valid})/ and croak sprintf "contains disallowed_STD3_valid character U+%04X [V6]", ord $1;
|
||||
}
|
||||
|
||||
if($param{'TransitionalProcessing'}) {
|
||||
$l =~ m/(\p{IsDeviation})/ and croak sprintf "contains deviation character U+%04X [V6]", ord $1;
|
||||
}
|
||||
|
||||
$l =~ m/(\p{IsIgnored})/ and croak sprintf "contains ignored character U+%04X [V6]", ord $1;
|
||||
$l =~ m/(\p{IsMapped}|\p{IsDisallowedSTD3Mapped})/ and croak sprintf "contains mapped character U+%04X [V6]", ord $1;
|
||||
$l =~ m/(\p{IsDisallowed})/ and croak sprintf "contains disallowed character U+%04X [V6]", ord $1;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
# For perl versions < 5.11, there is a bug where Bc:L does not match some
|
||||
# character blocks that are not fully included in the main UnicodeData.txt file:
|
||||
#
|
||||
# 3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
|
||||
# 4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;
|
||||
# 4E00;<CJK Ideograph, First>;Lo;0;L;;;;;N;;;;;
|
||||
# 9FBB;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;;
|
||||
# AC00;<Hangul Syllable, First>;Lo;0;L;;;;;N;;;;;
|
||||
# D7A3;<Hangul Syllable, Last>;Lo;0;L;;;;;N;;;;;
|
||||
# 20000;<CJK Ideograph Extension B, First>;Lo;0;L;;;;;N;;;;;
|
||||
# 2A6D6;<CJK Ideograph Extension B, Last>;Lo;0;L;;;;;N;;;;;
|
||||
#
|
||||
my $_RE_BidiClass_L = $] >= 5.011 ? '\p{Bc:L}' : '\p{Bc:L}\x{3400}-\x{4DB5}\x{4E00}-\x{9FBB}\x{AC00}-\x{D7A3}\x{20000}-\x{2A6D6}';
|
||||
|
||||
sub _validate_bidi {
|
||||
my($l,%param) = @_;
|
||||
no warnings 'utf8';
|
||||
|
||||
return 1 unless length($l);
|
||||
|
||||
if( $l =~ m/^[$_RE_BidiClass_L]/o ) { # LTR (left-to-right)
|
||||
$l =~ m/[^$_RE_BidiClass_L\p{Bc:EN}\p{Bc:ES}\p{Bc:CS}\p{Bc:ET}\p{Bc:BN}\p{Bc:ON}\p{Bc:NSM}]/o and croak 'contains characters with wrong bidi class for LTR [B5]';
|
||||
$l =~ m/[$_RE_BidiClass_L\p{Bc:EN}][\p{Bc:NSM}\P{Assigned}]*$/o or croak 'ends with character of wrong bidi class for LTR [B6]';
|
||||
return 1;
|
||||
}
|
||||
|
||||
if( $l =~ m/^[\p{Bc:R}\p{Bc:AL}]/ ) { # RTL (right-to-left)
|
||||
$l =~ m/[^\p{Bc:R}\p{Bc:AL}\p{Bc:AN}\p{Bc:EN}\p{Bc:ES}\p{Bc:CS}\p{Bc:ET}\p{Bc:ON}\p{Bc:BN}\p{Bc:NSM}]/ and croak 'contains characters with wrong bidi class for RTL [B2]';
|
||||
$l =~ m/[\p{Bc:R}\p{Bc:AL}\p{Bc:EN}\p{Bc:AN}][\p{Bc:NSM}\P{Assigned}]*$/ or croak 'ends with character of wrong bidi class for RTL [B3]';
|
||||
$l =~ m/\p{Bc:EN}.*\p{Bc:AN}|\p{Bc:AN}.*\p{Bc:EN}/ and croak 'contains characters with both bidi class EN and AN [B4]';
|
||||
return 1;
|
||||
}
|
||||
|
||||
croak 'starts with character of wrong bidi class [B1]';
|
||||
}
|
||||
|
||||
# For perl versions < 5.11, some Unicode properties such as Ccc or Joining_Type
|
||||
# are not supported. Instead, we use a conrete list of characters; this is safe
|
||||
# because the Unicode version supported by theses perl versions will not be
|
||||
# updated. For newer perl versions, we use the Unicode property (which is
|
||||
# supported from 5.11), so we will always be up-to-date with the Unicode
|
||||
# version supported by our underlying perl.
|
||||
#
|
||||
my $_RE_Ccc_Virama = $] >= 5.011 ? qr/\p{Ccc:Virama}/ : qr/[\x{094D}\x{09CD}\x{0A4D}\x{0ACD}\x{0B4D}\x{0BCD}\x{0C4D}\x{0CCD}\x{0D4D}\x{0DCA}\x{0E3A}\x{0F84}\x{1039}\x{103A}\x{1714}\x{1734}\x{17D2}\x{1A60}\x{1B44}\x{1BAA}\x{1BF2}\x{1BF3}\x{2D7F}\x{A806}\x{A8C4}\x{A953}\x{A9C0}\x{ABED}\x{00010A3F}\x{00011046}\x{000110B9}]/;
|
||||
my $_RE_JoiningType_L = $] >= 5.011 ? qr/\p{Joining_Type:L}/ : qr/(?!)/;
|
||||
my $_RE_JoiningType_R = $] >= 5.011 ? qr/\p{Joining_Type:R}/ : qr/[\x{0622}-\x{0625}\x{0627}\x{0629}\x{062F}-\x{0632}\x{0648}\x{0671}-\x{0673}\x{0675}-\x{0677}\x{0688}-\x{0699}\x{06C0}\x{06C3}-\x{06CB}\x{06CD}\x{06CF}\x{06D2}\x{06D3}\x{06D5}\x{06EE}\x{06EF}\x{0710}\x{0715}-\x{0719}\x{071E}\x{0728}\x{072A}\x{072C}\x{072F}\x{074D}\x{0759}-\x{075B}\x{076B}\x{076C}\x{0771}\x{0773}\x{0774}\x{0778}\x{0779}]/;
|
||||
my $_RE_JoiningType_D = $] >= 5.011 ? qr/\p{Joining_Type:D}/ : qr/[\x{0620}\x{0626}\x{0628}\x{062A}-\x{062E}\x{0633}-\x{063F}\x{0641}-\x{0647}\x{0649}\x{064A}\x{066E}\x{066F}\x{0678}-\x{0687}\x{069A}-\x{06BF}\x{06C1}\x{06C2}\x{06CC}\x{06CE}\x{06D0}\x{06D1}\x{06FA}-\x{06FC}\x{06FF}\x{0712}-\x{0714}\x{071A}-\x{071D}\x{071F}-\x{0727}\x{0729}\x{072B}\x{072D}\x{072E}\x{074E}-\x{0758}\x{075C}-\x{076A}\x{076D}-\x{0770}\x{0772}\x{0775}-\x{0777}\x{077A}-\x{077F}\x{07CA}-\x{07EA}]/;
|
||||
my $_RE_JoiningType_T = $] >= 5.011 ? qr/\p{Joining_Type:T}/ : qr/[\x{00AD}\x{0300}-\x{036F}\x{0483}-\x{0489}\x{0591}-\x{05BD}\x{05BF}\x{05C1}\x{05C2}\x{05C4}\x{05C5}\x{05C7}\x{0610}-\x{061A}\x{064B}-\x{065F}\x{0670}\x{06D6}-\x{06DC}\x{06DF}-\x{06E4}\x{06E7}\x{06E8}\x{06EA}-\x{06ED}\x{070F}\x{0711}\x{0730}-\x{074A}\x{07A6}-\x{07B0}\x{07EB}-\x{07F3}\x{0816}-\x{0819}\x{081B}-\x{0823}\x{0825}-\x{0827}\x{0829}-\x{082D}\x{0859}-\x{085B}\x{0900}-\x{0902}\x{093A}\x{093C}\x{0941}-\x{0948}\x{094D}\x{0951}-\x{0957}\x{0962}\x{0963}\x{0981}\x{09BC}\x{09C1}-\x{09C4}\x{09CD}\x{09E2}\x{09E3}\x{0A01}\x{0A02}\x{0A3C}\x{0A41}\x{0A42}\x{0A47}\x{0A48}\x{0A4B}-\x{0A4D}\x{0A51}\x{0A70}\x{0A71}\x{0A75}\x{0A81}\x{0A82}\x{0ABC}\x{0AC1}-\x{0AC5}\x{0AC7}\x{0AC8}\x{0ACD}\x{0AE2}\x{0AE3}\x{0B01}\x{0B3C}\x{0B3F}\x{0B41}-\x{0B44}\x{0B4D}\x{0B56}\x{0B62}\x{0B63}\x{0B82}\x{0BC0}\x{0BCD}\x{0C3E}-\x{0C40}\x{0C46}-\x{0C48}\x{0C4A}-\x{0C4D}\x{0C55}\x{0C56}\x{0C62}\x{0C63}\x{0CBC}\x{0CBF}\x{0CC6}\x{0CCC}\x{0CCD}\x{0CE2}\x{0CE3}\x{0D41}-\x{0D44}\x{0D4D}\x{0D62}\x{0D63}\x{0DCA}\x{0DD2}-\x{0DD4}\x{0DD6}\x{0E31}\x{0E34}-\x{0E3A}\x{0E47}-\x{0E4E}\x{0EB1}\x{0EB4}-\x{0EB9}\x{0EBB}\x{0EBC}\x{0EC8}-\x{0ECD}\x{0F18}\x{0F19}\x{0F35}\x{0F37}\x{0F39}\x{0F71}-\x{0F7E}\x{0F80}-\x{0F84}\x{0F86}\x{0F87}\x{0F8D}-\x{0F97}\x{0F99}-\x{0FBC}\x{0FC6}\x{102D}-\x{1030}\x{1032}-\x{1037}\x{1039}\x{103A}\x{103D}\x{103E}\x{1058}\x{1059}\x{105E}-\x{1060}\x{1071}-\x{1074}\x{1082}\x{1085}\x{1086}\x{108D}\x{109D}\x{135D}-\x{135F}\x{1712}-\x{1714}\x{1732}-\x{1734}\x{1752}\x{1753}\x{1772}\x{1773}\x{17B4}\x{17B5}\x{17B7}-\x{17BD}\x{17C6}\x{17C9}-\x{17D3}\x{17DD}\x{180B}-\x{180D}\x{18A9}\x{1920}-\x{1922}\x{1927}\x{1928}\x{1932}\x{1939}-\x{193B}\x{1A17}\x{1A18}\x{1A56}\x{1A58}-\x{1A5E}\x{1A60}\x{1A62}\x{1A65}-\x{1A6C}\x{1A73}-\x{1A7C}\x{1A7F}\x{1B00}-\x{1B03}\x{1B34}\x{1B36}-\x{1B3A}\x{1B3C}\x{1B42}\x{1B6B}-\x{1B73}\x{1B80}\x{1B81}\x{1BA2}-\x{1BA5}\x{1BA8}\x{1BA9}\x{1BE6}\x{1BE8}\x{1BE9}\x{1BED}\x{1BEF}-\x{1BF1}\x{1C2C}-\x{1C33}\x{1C36}\x{1C37}\x{1CD0}-\x{1CD2}\x{1CD4}-\x{1CE0}\x{1CE2}-\x{1CE8}\x{1CED}\x{1DC0}-\x{1DE6}\x{1DFC}-\x{1DFF}\x{200B}\x{200E}\x{200F}\x{202A}-\x{202E}\x{2060}-\x{2064}\x{206A}-\x{206F}\x{20D0}-\x{20F0}\x{2CEF}-\x{2CF1}\x{2D7F}\x{2DE0}-\x{2DFF}\x{302A}-\x{302F}\x{3099}\x{309A}\x{A66F}-\x{A672}\x{A67C}\x{A67D}\x{A6F0}\x{A6F1}\x{A802}\x{A806}\x{A80B}\x{A825}\x{A826}\x{A8C4}\x{A8E0}-\x{A8F1}\x{A926}-\x{A92D}\x{A947}-\x{A951}\x{A980}-\x{A982}\x{A9B3}\x{A9B6}-\x{A9B9}\x{A9BC}\x{AA29}-\x{AA2E}\x{AA31}\x{AA32}\x{AA35}\x{AA36}\x{AA43}\x{AA4C}\x{AAB0}\x{AAB2}-\x{AAB4}\x{AAB7}\x{AAB8}\x{AABE}\x{AABF}\x{AAC1}\x{ABE5}\x{ABE8}\x{ABED}\x{FB1E}\x{FE00}-\x{FE0F}\x{FE20}-\x{FE26}\x{FEFF}\x{FFF9}-\x{FFFB}\x{101FD}\x{10A01}-\x{10A03}\x{10A05}\x{10A06}\x{10A0C}-\x{10A0F}\x{10A38}-\x{10A3A}\x{10A3F}\x{11001}\x{11038}-\x{11046}\x{11080}\x{11081}\x{110B3}-\x{110B6}\x{110B9}\x{110BA}\x{110BD}\x{1D167}-\x{1D169}\x{1D173}-\x{1D182}\x{1D185}-\x{1D18B}\x{1D1AA}-\x{1D1AD}\x{1D242}-\x{1D244}\x{E0001}\x{E0020}-\x{E007F}\x{E0100}-\x{E01EF}]/;
|
||||
|
||||
sub _validate_contextj {
|
||||
my($l,%param) = @_;
|
||||
no warnings 'utf8';
|
||||
return 1 unless defined($l) && length($l);
|
||||
|
||||
# catch ContextJ characters without defined rule (as of Unicode 6.0.0, this cannot match)
|
||||
#
|
||||
$l =~ m/([^\x{200C}\x{200D}\P{Join_Control}])/ and croak sprintf "contains CONTEXTJ character U+%04X without defined rule [C1]", ord($1);
|
||||
|
||||
# RFC 5892, Appendix A.1. ZERO WIDTH NON-JOINER
|
||||
# Code point:
|
||||
# U+200C
|
||||
#
|
||||
# Overview:
|
||||
# This may occur in a formally cursive script (such as Arabic) in a
|
||||
# context where it breaks a cursive connection as required for
|
||||
# orthographic rules, as in the Persian language, for example. It
|
||||
# also may occur in Indic scripts in a consonant-conjunct context
|
||||
# (immediately following a virama), to control required display of
|
||||
# such conjuncts.
|
||||
#
|
||||
#
|
||||
# Lookup:
|
||||
# True
|
||||
#
|
||||
# Rule Set:
|
||||
# False;
|
||||
# If Canonical_Combining_Class(Before(cp)) .eq. Virama Then True;
|
||||
# If RegExpMatch((Joining_Type:{L,D})(Joining_Type:T)*\u200C
|
||||
# (Joining_Type:T)*(Joining_Type:{R,D})) Then True;
|
||||
|
||||
$l =~ m/
|
||||
$_RE_Ccc_Virama
|
||||
\x{200C}
|
||||
|
|
||||
(?: $_RE_JoiningType_L | $_RE_JoiningType_D) $_RE_JoiningType_T*
|
||||
\x{200C}
|
||||
$_RE_JoiningType_T*(?: $_RE_JoiningType_R | $_RE_JoiningType_D)
|
||||
|
|
||||
(\x{200C})
|
||||
/xo and defined($1) and croak sprintf "rule for CONTEXTJ character U+%04X not satisfied [C2]", ord($1);
|
||||
|
||||
# RFC 5892, Appendix A.2. ZERO WIDTH JOINER
|
||||
#
|
||||
# Code point:
|
||||
# U+200D
|
||||
#
|
||||
# Overview:
|
||||
# This may occur in Indic scripts in a consonant-conjunct context
|
||||
# (immediately following a virama), to control required display of
|
||||
# such conjuncts.
|
||||
#
|
||||
# Lookup:
|
||||
# True
|
||||
|
||||
# Rule Set:
|
||||
# False;
|
||||
# If Canonical_Combining_Class(Before(cp)) .eq. Virama Then True;
|
||||
|
||||
$l =~ m/
|
||||
$_RE_Ccc_Virama
|
||||
\x{200D}
|
||||
|
|
||||
(\x{200D})
|
||||
/xo and defined($1) and croak sprintf "rule for CONTEXTJ character U+%04X not satisfied [C2]", ord($1);
|
||||
}
|
||||
|
||||
1;
|
||||
|
||||
__END__
|
||||
|
||||
=encoding utf8
|
||||
|
||||
=head1 NAME
|
||||
|
||||
Net::IDN::UTS46 - Unicode IDNA Compatibility Processing (S<UTS #46>)
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
use Net::IDN:: ':all';
|
||||
my $a = uts46_to_ascii("müller.example.org");
|
||||
my $b = Net::IDN::UTS46::to_unicode('EXAMPLE.XN--11B5BS3A9AJ6G');
|
||||
|
||||
$domain =~ m/\P{Net::IDN::UTS46::IsDisallowed} and die 'oops';
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This module implements the Unicode Technical Standard #46 (Unicode IDNA
|
||||
Compatibility Processing). UTS #46 is one variant of Internationalized Domain
|
||||
Names (IDN), which aims to be compatible with domain names registered under
|
||||
either IDNA2003 or IDNA2008.
|
||||
|
||||
You should use this module if you want an exact implementation of the UTS #46
|
||||
specification.
|
||||
|
||||
However, if you just want to convert domain names and don't care which standard
|
||||
is used internally, you should use L<Net::IDN::Encode> instead.
|
||||
|
||||
=head1 FUNCTIONS
|
||||
|
||||
By default, this module does not export any subroutines. You may use the
|
||||
C<:all> tag to import everything.
|
||||
|
||||
You can omit the C<'uts46_'> prefix when accessing the functions with a
|
||||
full-qualified module name (e.g. you can access C<uts46_to_unicode> as
|
||||
C<Net::IDN::UTS46::uts46_to_unicode> or C<Net::IDN::UTS46::to_unicode>.
|
||||
|
||||
The following functions are available:
|
||||
|
||||
=over
|
||||
|
||||
=item uts46_to_ascii( $domain, %param )
|
||||
|
||||
Implements the "ToASCII" function from UTS #46, section 4.2. It converts a domain name to
|
||||
ASCII and throws an exception on invalid input.
|
||||
|
||||
This function takes the following optional parameters (C<%param>):
|
||||
|
||||
=over
|
||||
|
||||
=item AllowUnassigned
|
||||
|
||||
(boolean) If set to a true value, unassigned code points in the label are
|
||||
allowed. This is an extension over UTS #46.
|
||||
|
||||
The default is false.
|
||||
|
||||
=item UseSTD3ASCIIRules
|
||||
|
||||
(boolean) If set to a true value, checks the label for compliance with S<STD 3>
|
||||
(S<RFC 1123>) syntax for host name parts.
|
||||
|
||||
The default is true.
|
||||
|
||||
=item TransitionalProcessing
|
||||
|
||||
(boolean) If set to true, the conversion will be compatible with IDNA2003. This
|
||||
only affects four characters: C<'ß'> (U+00DF), 'ς' (U+03C2), ZWJ (U+200D) and
|
||||
ZWNJ (U+200C). Usually, you will want to set this to false.
|
||||
|
||||
The default is false.
|
||||
|
||||
=back
|
||||
|
||||
=item uts46_to_unicode( $label, %param )
|
||||
|
||||
Implements the "ToUnicode" function from UTS #46, section 4.3. It converts a domain name to
|
||||
Unicode and throws an exception on invalid input.
|
||||
|
||||
This function takes the following optional parameters (C<%param>):
|
||||
|
||||
=over
|
||||
|
||||
=item AllowUnassigned
|
||||
|
||||
see above.
|
||||
|
||||
=item UseSTD3ASCIIRules
|
||||
|
||||
see above.
|
||||
|
||||
=item TransitionalProcessing
|
||||
|
||||
(boolean) If given, this parameter must be false. The UTS #46 specification
|
||||
does not define transitional processing for ToUnicode.
|
||||
|
||||
=back
|
||||
|
||||
=back
|
||||
|
||||
=head1 UNICODE CHARACTER PROPERTIES
|
||||
|
||||
This module also defines the character properties listed below.
|
||||
|
||||
Each character has exactly one of the following properties:
|
||||
|
||||
=over
|
||||
|
||||
=item C<\p{Net::IDN::UTS46::IsValid}>
|
||||
|
||||
The code point is valid, and not modified (i.e. a deviation character) in UTS #46.
|
||||
|
||||
=item C<\p{Net::IDN::UTS46::IsIgnored}>
|
||||
|
||||
The code point is removed (i.e. mapped to an empty string) in UTS #46.
|
||||
|
||||
=item C<\p{Net::IDN::UTS46::IsMapped}>
|
||||
|
||||
The code point is replaced by another string in UTS #46.
|
||||
|
||||
=item C<\p{Net::IDN::UTS46::IsDeviation}>
|
||||
|
||||
The code point is either mapped or valid, depending on whether the processing is transitional or not.
|
||||
|
||||
=item C<\p{Net::IDN::UTS46::IsDisallowed}>
|
||||
|
||||
The code point is not allowed in UTS #46.
|
||||
|
||||
=item C<\p{Net::IDN::UTS46::IsDisallowedSTD3Ignored}>
|
||||
|
||||
The code point is not allowed in UTS #46 if C<UseSTDASCIIRules> are used but would be ignored otherwise.
|
||||
|
||||
=item C<\p{Net::IDN::UTS46::IsDisallowedSTD3Mapped}>
|
||||
|
||||
The code point is not allowed in UTS #46 if C<UseSTDASCIIRules> are used but would be mapped otherwise.
|
||||
|
||||
=back
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Claus FE<auml>rber <CFAERBER@cpan.org>
|
||||
|
||||
=head1 LICENSE
|
||||
|
||||
Copyright 2011-2018 Claus FE<auml>rber.
|
||||
|
||||
This library is free software; you can redistribute it and/or modify
|
||||
it under the same terms as Perl itself.
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
L<Net::IDN::UTS46::Mapping>, L<Net::IDN::Encode>, S<UTS #46> (L<http://www.unicode.org/reports/tr46/>)
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,12 @@
|
|||
use strict;
|
||||
use Test::More tests => 1 + 5;
|
||||
use Test::NoWarnings;
|
||||
|
||||
use_ok 'Net::IDN::Encode';
|
||||
use_ok 'Net::IDN::Punycode';
|
||||
use_ok 'Net::IDN::Punycode::PP';
|
||||
|
||||
use_ok 'Net::IDN::UTS46';
|
||||
use_ok 'Net::IDN::UTS46::_Mapping';
|
||||
|
||||
exit(0);
|
|
@ -0,0 +1,29 @@
|
|||
use utf8;
|
||||
use strict;
|
||||
|
||||
BEGIN { binmode STDOUT, ':utf8'; binmode STDERR, ':utf8'; }
|
||||
|
||||
use Net::IDN::Encode qw(:all);
|
||||
|
||||
use Test::More tests => 1 + 13;
|
||||
use Test::NoWarnings;
|
||||
|
||||
use Net::IDN::Encode qw(:all);
|
||||
|
||||
is(eval{domain_to_ascii('müller')} || $@, 'xn--mller-kva', 'single label (to_ascii)');
|
||||
is(eval{domain_to_ascii('XN--MLLER-KVA')} || $@, 'XN--MLLER-KVA', 'single uppercase label (to_ascii)');
|
||||
|
||||
is(eval{domain_to_ascii('www.jürg.xn--mller-kva.com', )} || $@, 'www.xn--jrg-hoa.xn--mller-kva.com', 'mixed utf8/ace/ascii');
|
||||
is(eval{domain_to_ascii('www.a.b。c.d。com', )} || $@, 'www.a.b.c.d.com', 'mixed dots');
|
||||
|
||||
is(eval{domain_to_ascii("www.\x{1F985}.example", AllowUnassigned => 1)} || $@, 'www.xn--4s9h.example', 'Unicode 9.0 emoji');
|
||||
|
||||
is(eval{domain_to_ascii('www.ä ö ü ß.example', 'UseSTD3ASCIIRules' => 0)}, 'www.xn-- -7kav3ivb.example', 'blank (without STD3 rules) (to_unicode)') or diag $@;
|
||||
is(eval{domain_to_ascii('www.ä ö ü ß.example', 'UseSTD3ASCIIRules' => 1)}, undef, 'blank (with STD3 rules) (to_unicode)') or diag $@;
|
||||
is(eval{domain_to_ascii('www.xn-- -7kav3ivb.example', 'UseSTD3ASCIIRules' => 0)}, 'www.xn-- -7kav3ivb.example', 'blank (without STD3 rules) (to_unicode pass-through)') or diag $@;
|
||||
is(eval{domain_to_ascii('www.xn-- -7kav3ivb.example', 'UseSTD3ASCIIRules' => 1)}, 'www.xn-- -7kav3ivb.example', 'blank (with STD3 rules) (to_unicode pass-through)') or diag $@;
|
||||
|
||||
is(eval{domain_to_ascii("I.\x{2665}.Perl.invalid")}, 'I.xn--g6h.Perl.invalid', 'mixed case');
|
||||
is(eval{domain_to_ascii("I.xn--g6h.Perl.invalid")}, 'I.xn--g6h.Perl.invalid', 'mixed case');
|
||||
is(eval{domain_to_ascii('www.xn--garbage')}, 'www.xn--garbage', 'Invalid A-label');
|
||||
is(eval{domain_to_ascii('_test._srv.müller.example.com')}, '_test._srv.xn--mller-kva.example.com', 'SRV record');
|
|
@ -0,0 +1,31 @@
|
|||
use utf8;
|
||||
use strict;
|
||||
|
||||
BEGIN { binmode STDOUT, ':utf8'; binmode STDERR, ':utf8'; }
|
||||
|
||||
use Test::More tests => 1 + 15;
|
||||
use Test::NoWarnings;
|
||||
|
||||
use Net::IDN::Encode qw(:all);
|
||||
|
||||
is(eval{domain_to_unicode('xn--mller-kva')} || $@, 'müller', 'single label (to_unicode)');
|
||||
is(eval{domain_to_unicode('XN--MLLER-KVA')} || $@, 'müller', 'single uppercase label (to_unicode)');
|
||||
|
||||
is(eval{domain_to_unicode('www.jürg.xn--mller-kva.com', )} || $@, 'www.jürg.müller.com', 'mixed utf8/ace/ascii (to_unicode)');
|
||||
is(eval{domain_to_unicode('www.a.b。c.d。com', )} || $@, 'www.a.b。c.d。com', 'mixed dots (to_unicode)');
|
||||
|
||||
is(eval{domain_to_unicode("www.xn--4s9h.example", AllowUnassigned => 1)} || $@, "www.\x{1F985}.example", 'Unicode 9.0 emoji');
|
||||
|
||||
is(eval{domain_to_unicode('www.ä ö ü ß.example', 'UseSTD3ASCIIRules' => 0)}, 'www.ä ö ü ß.example', 'blank (without STD3 rules) (to_unicode)') or diag $@;
|
||||
is(eval{domain_to_unicode('www.ä ö ü ß.example', 'UseSTD3ASCIIRules' => 1)}, undef, 'blank (without STD3 rules) (to_unicode pass-through)') or diag $@;
|
||||
is(eval{domain_to_unicode('www.xn-- -7kav3ivb.example', 'UseSTD3ASCIIRules' => 0)}, 'www.ä ö ü ß.example', 'blank (with STD3 rules) (to_unicode)') or diag $@;
|
||||
is(eval{domain_to_unicode('www.xn-- -7kav3ivb.example', 'UseSTD3ASCIIRules' => 1)}, undef, 'blank (with STD3 rules) (to_unicode pass-through)') or diag $@;
|
||||
|
||||
is(eval{domain_to_unicode("EXAMPLE.XN--11B5BS3A9AJ6G")}, 'EXAMPLE.परीक्षा', 'lowercase IDNA prefix') or diag $@;
|
||||
is(eval{domain_to_unicode("EXAMPLE.xn--11B5BS3A9AJ6G")}, 'EXAMPLE.परीक्षा', 'uppercase IDNA prefix') or diag $@;
|
||||
|
||||
is(eval{domain_to_unicode("I.\x{2665}.Perl.invalid")}, "I.\x{2665}.Perl.invalid", 'mixed case');
|
||||
is(eval{domain_to_unicode('I.xn--g6h.Perl.invalid')}, "I.\x{2665}.Perl.invalid", 'mixed case');
|
||||
is(eval{domain_to_unicode('_test._srv.xn--mller-kva.example.com')}, '_test._srv.müller.example.com', 'SRV record');
|
||||
|
||||
is(eval{domain_to_unicode('xn--zcaa.de')}, 'ßß.de', 'bare ßß');
|
|
@ -0,0 +1,36 @@
|
|||
use bytes;
|
||||
use strict;
|
||||
|
||||
use Test::More tests => 24;
|
||||
use Test::NoWarnings;
|
||||
|
||||
use Net::IDN::Encode qw(:all);
|
||||
|
||||
is(to_ascii('mueller'),'mueller');
|
||||
is(to_ascii('xn--mller-kva'),'xn--mller-kva');
|
||||
is(to_ascii('müller'),'xn--mller-kva');
|
||||
|
||||
is(to_unicode('mueller'),'mueller');
|
||||
is(to_unicode('xn--mller-kva'),'müller');
|
||||
is(to_unicode('müller'),'müller');
|
||||
|
||||
is(domain_to_ascii('mueller.example.com'),'mueller.example.com');
|
||||
is(domain_to_ascii('xn--mller-kva.example.com'),'xn--mller-kva.example.com');
|
||||
is(domain_to_ascii('müller.example.com'),'xn--mller-kva.example.com');
|
||||
|
||||
is(domain_to_unicode('mueller.example.com'),'mueller.example.com');
|
||||
is(domain_to_unicode('xn--mller-kva.example.com'),'müller.example.com');
|
||||
is(domain_to_unicode('müller.example.com'),'müller.example.com');
|
||||
|
||||
is(email_to_ascii('hans@mueller.example.com'),'hans@mueller.example.com');
|
||||
is(email_to_ascii('hans@xn--mller-kva.example.com'),'hans@xn--mller-kva.example.com');
|
||||
is(email_to_ascii('hans@müller.example.com'),'hans@xn--mller-kva.example.com');
|
||||
is(email_to_ascii(''), '');
|
||||
is(email_to_ascii(undef), undef);
|
||||
is(email_to_ascii('test'), 'test');
|
||||
|
||||
is(email_to_unicode('hans@mueller.example.com'),'hans@mueller.example.com');
|
||||
is(email_to_unicode('hans@xn--mller-kva.example.com'),'hans@müller.example.com');
|
||||
is(email_to_unicode(''),'');
|
||||
is(email_to_unicode(undef), undef);
|
||||
is(email_to_unicode('test'),'test');
|
|
@ -0,0 +1,46 @@
|
|||
use utf8;
|
||||
use strict;
|
||||
|
||||
BEGIN { binmode STDOUT, ':utf8'; binmode STDERR, ':utf8'; }
|
||||
|
||||
use Test::More tests => 32;
|
||||
use Test::NoWarnings;
|
||||
|
||||
use Net::IDN::Encode qw(:all);
|
||||
|
||||
is(to_ascii('mueller'),'mueller');
|
||||
is(to_ascii('xn--mller-kva'),'xn--mller-kva');
|
||||
is(to_ascii('müller'),'xn--mller-kva');
|
||||
is(to_ascii('中央大学'),'xn--fiq80yua78t');
|
||||
|
||||
is(to_unicode('mueller'),'mueller');
|
||||
is(to_unicode('xn--mller-kva'),'müller');
|
||||
is(to_unicode('müller'),'müller');
|
||||
is(to_unicode('xn--fiq80yua78t'),'中央大学');
|
||||
|
||||
is(domain_to_ascii('mueller.example.com'),'mueller.example.com');
|
||||
is(domain_to_ascii('xn--mller-kva.example.com'),'xn--mller-kva.example.com');
|
||||
is(domain_to_ascii('müller.example.com'),'xn--mller-kva.example.com');
|
||||
is(domain_to_ascii('中央大学.tw'),'xn--fiq80yua78t.tw');
|
||||
|
||||
is(domain_to_unicode('mueller.example.com'),'mueller.example.com');
|
||||
is(domain_to_unicode('xn--mller-kva.example.com'),'müller.example.com');
|
||||
is(domain_to_unicode('müller.example.com'),'müller.example.com');
|
||||
is(domain_to_unicode('xn--fiq80yua78t.tw'),'中央大学.tw');
|
||||
|
||||
is(email_to_ascii('hans@mueller.example.com'),'hans@mueller.example.com');
|
||||
is(email_to_ascii('hans@xn--mller-kva.example.com'),'hans@xn--mller-kva.example.com');
|
||||
is(email_to_ascii('hans@müller.example.com'),'hans@xn--mller-kva.example.com');
|
||||
is(email_to_ascii('test@中央大学.tw'),'test@xn--fiq80yua78t.tw');
|
||||
is(email_to_ascii(''), '');
|
||||
is(email_to_ascii(undef), undef);
|
||||
is(email_to_ascii('test'), 'test');
|
||||
|
||||
is(email_to_unicode('hans@mueller.example.com'),'hans@mueller.example.com');
|
||||
is(email_to_unicode('hans@mueller.example.com'),'hans@mueller.example.com');
|
||||
is(email_to_unicode('hans@xn--mller-kva.example.com'),'hans@müller.example.com');
|
||||
is(email_to_unicode('hans@xn--mller-kva.example.com'),'hans@müller.example.com');
|
||||
is(email_to_unicode('test@xn--fiq80yua78t.tw'),'test@中央大学.tw');
|
||||
is(email_to_unicode(''),'');
|
||||
is(email_to_unicode(undef), undef);
|
||||
is(email_to_unicode('test'),'test');
|
|
@ -0,0 +1,144 @@
|
|||
use strict;
|
||||
use utf8;
|
||||
|
||||
BEGIN { binmode STDOUT, ':utf8'; binmode STDERR, ':utf8'; }
|
||||
|
||||
use Test::More;
|
||||
use Test::NoWarnings;
|
||||
|
||||
use Net::IDN::Punycode::PP ':all';
|
||||
|
||||
our @idna = (
|
||||
["Arabic (Egyptian)",
|
||||
"\x{0644}\x{064A}\x{0647}\x{0645}\x{0627}\x{0628}\x{062A}\x{0643}".
|
||||
"\x{0644}\x{0645}\x{0648}\x{0634}\x{0639}\x{0631}\x{0628}\x{064A}\x{061F}",
|
||||
"egbpdaj6bu4bxfgehfvwxn", 0, 0, 1, 1 ],
|
||||
["Chinese (simplified)",
|
||||
"\x{4ED6}\x{4EEC}\x{4E3A}\x{4EC0}\x{4E48}\x{4E0D}\x{8BF4}\x{4E2D}".
|
||||
"\x{6587}",
|
||||
"ihqwcrb4cv8a8dqg056pqjye", 0, 0, 1, 1 ],
|
||||
["Chinese (traditional)",
|
||||
"\x{4ED6}\x{5011}\x{7232}\x{4EC0}\x{9EBD}\x{4E0D}\x{8AAA}\x{4E2D}".
|
||||
"\x{6587}",
|
||||
"ihqwctvzc91f659drss3x8bo0yb", 0, 0, 1, 1 ],
|
||||
["Czech",
|
||||
"\x{0050}\x{0072}\x{006F}\x{010D}\x{0070}\x{0072}\x{006F}\x{0073}".
|
||||
"\x{0074}\x{011B}\x{006E}\x{0065}\x{006D}\x{006C}\x{0075}\x{0076}\x{00ED}".
|
||||
"\x{010D}\x{0065}\x{0073}\x{006B}\x{0079}",
|
||||
"Proprostnemluvesky-uyb24dma41a", 0, 0, 1, 1 ],
|
||||
["Hebrew",
|
||||
"\x{05DC}\x{05DE}\x{05D4}\x{05D4}\x{05DD}\x{05E4}\x{05E9}\x{05D5}".
|
||||
"\x{05D8}\x{05DC}\x{05D0}\x{05DE}\x{05D3}\x{05D1}\x{05E8}\x{05D9}\x{05DD}".
|
||||
"\x{05E2}\x{05D1}\x{05E8}\x{05D9}\x{05EA}",
|
||||
"4dbcagdahymbxekheh6e0a7fei0b", 0, 0, 1, 1 ],
|
||||
["Hindi (Devanagari)",
|
||||
"\x{092F}\x{0939}\x{0932}\x{094B}\x{0917}\x{0939}\x{093F}\x{0928}".
|
||||
"\x{094D}\x{0926}\x{0940}\x{0915}\x{094D}\x{092F}\x{094B}\x{0902}\x{0928}".
|
||||
"\x{0939}\x{0940}\x{0902}\x{092C}\x{094B}\x{0932}\x{0938}\x{0915}\x{0924}".
|
||||
"\x{0947}\x{0939}\x{0948}\x{0902}",
|
||||
"i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd", 0, 0, 1 ],
|
||||
["Japanese (kanji and hiragana)",
|
||||
"\x{306A}\x{305C}\x{307F}\x{3093}\x{306A}\x{65E5}\x{672C}\x{8A9E}".
|
||||
"\x{3092}\x{8A71}\x{3057}\x{3066}\x{304F}\x{308C}\x{306A}\x{3044}\x{306E}".
|
||||
"\x{304B}",
|
||||
"n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa", 0, 0, 1 ],
|
||||
["Russian (Cyrillic)",
|
||||
"\x{043F}\x{043E}\x{0447}\x{0435}\x{043C}\x{0443}\x{0436}\x{0435}".
|
||||
"\x{043E}\x{043D}\x{0438}\x{043D}\x{0435}\x{0433}\x{043E}\x{0432}\x{043E}".
|
||||
"\x{0440}\x{044F}\x{0442}\x{043F}\x{043E}\x{0440}\x{0443}\x{0441}\x{0441}".
|
||||
"\x{043A}\x{0438}",
|
||||
"b1abfaaepdrnnbgefbadotcwatmq2g4l", 0, 0, 1, 1 ],
|
||||
["Spanish",
|
||||
"\x{0050}\x{006F}\x{0072}\x{0071}\x{0075}\x{00E9}\x{006E}\x{006F}".
|
||||
"\x{0070}\x{0075}\x{0065}\x{0064}\x{0065}\x{006E}\x{0073}\x{0069}\x{006D}".
|
||||
"\x{0070}\x{006C}\x{0065}\x{006D}\x{0065}\x{006E}\x{0074}\x{0065}\x{0068}".
|
||||
"\x{0061}\x{0062}\x{006C}\x{0061}\x{0072}\x{0065}\x{006E}\x{0045}\x{0073}".
|
||||
"\x{0070}\x{0061}\x{00F1}\x{006F}\x{006C}",
|
||||
"PorqunopuedensimplementehablarenEspaol-fmd56a", 0, 0, 1 ],
|
||||
["Vietnamese",
|
||||
"\x{0054}\x{1EA1}\x{0069}\x{0073}\x{0061}\x{006F}\x{0068}\x{1ECD}".
|
||||
"\x{006B}\x{0068}\x{00F4}\x{006E}\x{0067}\x{0074}\x{0068}\x{1EC3}\x{0063}".
|
||||
"\x{0068}\x{1EC9}\x{006E}\x{00F3}\x{0069}\x{0074}\x{0069}\x{1EBF}\x{006E}".
|
||||
"\x{0067}\x{0056}\x{0069}\x{1EC7}\x{0074}",
|
||||
"TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g", 0, 0, 1 ],
|
||||
["Japanese",
|
||||
"\x{0033}\x{5E74}\x{0042}\x{7D44}\x{91D1}\x{516B}\x{5148}\x{751F}",
|
||||
"3B-ww4c5e180e575a65lsy2b", 0, 0, 1, 1 ],
|
||||
["Japanese",
|
||||
"\x{5B89}\x{5BA4}\x{5948}\x{7F8E}\x{6075}\x{002D}\x{0077}\x{0069}".
|
||||
"\x{0074}\x{0068}\x{002D}\x{0053}\x{0055}\x{0050}\x{0045}\x{0052}\x{002D}".
|
||||
"\x{004D}\x{004F}\x{004E}\x{004B}\x{0045}\x{0059}\x{0053}",
|
||||
"-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n", 0, 0, 1 ],
|
||||
["Japanese",
|
||||
"\x{0048}\x{0065}\x{006C}\x{006C}\x{006F}\x{002D}\x{0041}\x{006E}".
|
||||
"\x{006F}\x{0074}\x{0068}\x{0065}\x{0072}\x{002D}\x{0057}\x{0061}\x{0079}".
|
||||
"\x{002D}\x{305D}\x{308C}\x{305E}\x{308C}\x{306E}\x{5834}\x{6240}",
|
||||
"Hello-Another-Way--fc4qua05auwb3674vfr0b", 0, 0, 1 ],
|
||||
["Japanese",
|
||||
"\x{3072}\x{3068}\x{3064}\x{5C4B}\x{6839}\x{306E}\x{4E0B}\x{0032}",
|
||||
"2-u9tlzr9756bt3uc0v", 0, 0, 1, 1 ],
|
||||
["Japanese",
|
||||
"\x{004D}\x{0061}\x{006A}\x{0069}\x{3067}\x{004B}\x{006F}\x{0069}".
|
||||
"\x{3059}\x{308B}\x{0035}\x{79D2}\x{524D}",
|
||||
"MajiKoi5-783gue6qz075azm5e", 0, 0, 1, 1 ],
|
||||
["Japanese",
|
||||
"\x{30D1}\x{30D5}\x{30A3}\x{30FC}\x{0064}\x{0065}\x{30EB}\x{30F3}".
|
||||
"\x{30D0}",
|
||||
"de-jg4avhby1noc0d", 0, 0, 1, 1 ],
|
||||
["Japanese",
|
||||
"\x{305D}\x{306E}\x{30B9}\x{30D4}\x{30FC}\x{30C9}\x{3067}",
|
||||
"d9juau41awczczp", 0, 0, 1, 1 ],
|
||||
["Greek",
|
||||
"\x{03b5}\x{03bb}\x{03bb}\x{03b7}\x{03bd}\x{03b9}\x{03ba}\x{03ac}",
|
||||
"hxargifdar", 0, 0, 1, 1 ],
|
||||
["Maltese (Malti)",
|
||||
"\x{0062}\x{006f}\x{006e}\x{0121}\x{0075}\x{0073}\x{0061}\x{0127}".
|
||||
"\x{0127}\x{0061}",
|
||||
"bonusaa-5bb1da", 0, 0, 1, 1 ],
|
||||
["Russian (Cyrillic)",
|
||||
"\x{043f}\x{043e}\x{0447}\x{0435}\x{043c}\x{0443}\x{0436}\x{0435}".
|
||||
"\x{043e}\x{043d}\x{0438}\x{043d}\x{0435}\x{0433}\x{043e}\x{0432}\x{043e}".
|
||||
"\x{0440}\x{044f}\x{0442}\x{043f}\x{043e}\x{0440}\x{0443}\x{0441}\x{0441}".
|
||||
"\x{043a}\x{0438}",
|
||||
"b1abfaaepdrnnbgefbadotcwatmq2g4l", 0, 0, 1, 1 ],
|
||||
|
||||
## Test vector from IdnaTest.txt
|
||||
|
||||
["Hebrew (combining)",
|
||||
"\x{05D0}\x{0308}",
|
||||
"ssa73l",
|
||||
]
|
||||
|
||||
);
|
||||
|
||||
plan tests => ($#idna+1)*2 + 1;
|
||||
|
||||
|
||||
foreach my $test (@idna)
|
||||
{
|
||||
my ($comment,$in,$out,$allowunassigned,$usestd3asciirules,$toascii,$tounicode) = @{$test};
|
||||
|
||||
is(encode_punycode($in), $out, $comment.' (encode_punycode)');
|
||||
is(decode_punycode($out), $in, $comment.' (decode_punycode)');
|
||||
}
|
||||
|
||||
# Test vectors extracted from:
|
||||
#
|
||||
# Nameprep and IDNA Test Vectors
|
||||
# draft-josefsson-idn-test-vectors
|
||||
#
|
||||
# Copyright (C) The Internet Society (2003). All Rights Reserved.
|
||||
#
|
||||
# This document and translations of it may be copied and furnished
|
||||
# to others, and derivative works that comment on or otherwise
|
||||
# explain it or assist in its implementation may be prepared,
|
||||
# copied, published and distributed, in whole or in part, without
|
||||
# restriction of any kind, provided that the above copyright
|
||||
# notice and this paragraph are included on all such copies and
|
||||
# derivative works. However, this document itself may not be
|
||||
# modified in any way, such as by removing the copyright notice or
|
||||
# references to the Internet Society or other Internet
|
||||
# organizations, except as needed for the purpose of developing
|
||||
# Internet standards in which case the procedures for copyrights
|
||||
# defined in the Internet Standards process must be followed, or
|
||||
# as required to translate it into languages other than English.
|
|
@ -0,0 +1,153 @@
|
|||
use strict;
|
||||
use utf8;
|
||||
|
||||
BEGIN { binmode STDOUT, ':utf8'; binmode STDERR, ':utf8'; }
|
||||
|
||||
use Test::More;
|
||||
use Net::IDN::Punycode ':all';
|
||||
|
||||
BEGIN {
|
||||
plan skip_all => 'no XS version' if eval {
|
||||
\&Net::IDN::Punycode::encode_punycode ==
|
||||
\&Net::IDN::Punycode::PP::encode_punycode; }
|
||||
}
|
||||
|
||||
use Test::NoWarnings;
|
||||
|
||||
our @idna = (
|
||||
["Arabic (Egyptian)",
|
||||
"\x{0644}\x{064A}\x{0647}\x{0645}\x{0627}\x{0628}\x{062A}\x{0643}".
|
||||
"\x{0644}\x{0645}\x{0648}\x{0634}\x{0639}\x{0631}\x{0628}\x{064A}\x{061F}",
|
||||
"egbpdaj6bu4bxfgehfvwxn", 0, 0, 1, 1 ],
|
||||
["Chinese (simplified)",
|
||||
"\x{4ED6}\x{4EEC}\x{4E3A}\x{4EC0}\x{4E48}\x{4E0D}\x{8BF4}\x{4E2D}".
|
||||
"\x{6587}",
|
||||
"ihqwcrb4cv8a8dqg056pqjye", 0, 0, 1, 1 ],
|
||||
["Chinese (traditional)",
|
||||
"\x{4ED6}\x{5011}\x{7232}\x{4EC0}\x{9EBD}\x{4E0D}\x{8AAA}\x{4E2D}".
|
||||
"\x{6587}",
|
||||
"ihqwctvzc91f659drss3x8bo0yb", 0, 0, 1, 1 ],
|
||||
["Czech",
|
||||
"\x{0050}\x{0072}\x{006F}\x{010D}\x{0070}\x{0072}\x{006F}\x{0073}".
|
||||
"\x{0074}\x{011B}\x{006E}\x{0065}\x{006D}\x{006C}\x{0075}\x{0076}\x{00ED}".
|
||||
"\x{010D}\x{0065}\x{0073}\x{006B}\x{0079}",
|
||||
"Proprostnemluvesky-uyb24dma41a", 0, 0, 1, 1 ],
|
||||
["Hebrew",
|
||||
"\x{05DC}\x{05DE}\x{05D4}\x{05D4}\x{05DD}\x{05E4}\x{05E9}\x{05D5}".
|
||||
"\x{05D8}\x{05DC}\x{05D0}\x{05DE}\x{05D3}\x{05D1}\x{05E8}\x{05D9}\x{05DD}".
|
||||
"\x{05E2}\x{05D1}\x{05E8}\x{05D9}\x{05EA}",
|
||||
"4dbcagdahymbxekheh6e0a7fei0b", 0, 0, 1, 1 ],
|
||||
["Hindi (Devanagari)",
|
||||
"\x{092F}\x{0939}\x{0932}\x{094B}\x{0917}\x{0939}\x{093F}\x{0928}".
|
||||
"\x{094D}\x{0926}\x{0940}\x{0915}\x{094D}\x{092F}\x{094B}\x{0902}\x{0928}".
|
||||
"\x{0939}\x{0940}\x{0902}\x{092C}\x{094B}\x{0932}\x{0938}\x{0915}\x{0924}".
|
||||
"\x{0947}\x{0939}\x{0948}\x{0902}",
|
||||
"i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd", 0, 0, 1 ],
|
||||
["Japanese (kanji and hiragana)",
|
||||
"\x{306A}\x{305C}\x{307F}\x{3093}\x{306A}\x{65E5}\x{672C}\x{8A9E}".
|
||||
"\x{3092}\x{8A71}\x{3057}\x{3066}\x{304F}\x{308C}\x{306A}\x{3044}\x{306E}".
|
||||
"\x{304B}",
|
||||
"n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa", 0, 0, 1 ],
|
||||
["Russian (Cyrillic)",
|
||||
"\x{043F}\x{043E}\x{0447}\x{0435}\x{043C}\x{0443}\x{0436}\x{0435}".
|
||||
"\x{043E}\x{043D}\x{0438}\x{043D}\x{0435}\x{0433}\x{043E}\x{0432}\x{043E}".
|
||||
"\x{0440}\x{044F}\x{0442}\x{043F}\x{043E}\x{0440}\x{0443}\x{0441}\x{0441}".
|
||||
"\x{043A}\x{0438}",
|
||||
"b1abfaaepdrnnbgefbadotcwatmq2g4l", 0, 0, 1, 1 ],
|
||||
["Spanish",
|
||||
"\x{0050}\x{006F}\x{0072}\x{0071}\x{0075}\x{00E9}\x{006E}\x{006F}".
|
||||
"\x{0070}\x{0075}\x{0065}\x{0064}\x{0065}\x{006E}\x{0073}\x{0069}\x{006D}".
|
||||
"\x{0070}\x{006C}\x{0065}\x{006D}\x{0065}\x{006E}\x{0074}\x{0065}\x{0068}".
|
||||
"\x{0061}\x{0062}\x{006C}\x{0061}\x{0072}\x{0065}\x{006E}\x{0045}\x{0073}".
|
||||
"\x{0070}\x{0061}\x{00F1}\x{006F}\x{006C}",
|
||||
"PorqunopuedensimplementehablarenEspaol-fmd56a", 0, 0, 1 ],
|
||||
["Vietnamese",
|
||||
"\x{0054}\x{1EA1}\x{0069}\x{0073}\x{0061}\x{006F}\x{0068}\x{1ECD}".
|
||||
"\x{006B}\x{0068}\x{00F4}\x{006E}\x{0067}\x{0074}\x{0068}\x{1EC3}\x{0063}".
|
||||
"\x{0068}\x{1EC9}\x{006E}\x{00F3}\x{0069}\x{0074}\x{0069}\x{1EBF}\x{006E}".
|
||||
"\x{0067}\x{0056}\x{0069}\x{1EC7}\x{0074}",
|
||||
"TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g", 0, 0, 1 ],
|
||||
["Japanese",
|
||||
"\x{0033}\x{5E74}\x{0042}\x{7D44}\x{91D1}\x{516B}\x{5148}\x{751F}",
|
||||
"3B-ww4c5e180e575a65lsy2b", 0, 0, 1, 1 ],
|
||||
["Japanese",
|
||||
"\x{5B89}\x{5BA4}\x{5948}\x{7F8E}\x{6075}\x{002D}\x{0077}\x{0069}".
|
||||
"\x{0074}\x{0068}\x{002D}\x{0053}\x{0055}\x{0050}\x{0045}\x{0052}\x{002D}".
|
||||
"\x{004D}\x{004F}\x{004E}\x{004B}\x{0045}\x{0059}\x{0053}",
|
||||
"-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n", 0, 0, 1 ],
|
||||
["Japanese",
|
||||
"\x{0048}\x{0065}\x{006C}\x{006C}\x{006F}\x{002D}\x{0041}\x{006E}".
|
||||
"\x{006F}\x{0074}\x{0068}\x{0065}\x{0072}\x{002D}\x{0057}\x{0061}\x{0079}".
|
||||
"\x{002D}\x{305D}\x{308C}\x{305E}\x{308C}\x{306E}\x{5834}\x{6240}",
|
||||
"Hello-Another-Way--fc4qua05auwb3674vfr0b", 0, 0, 1 ],
|
||||
["Japanese",
|
||||
"\x{3072}\x{3068}\x{3064}\x{5C4B}\x{6839}\x{306E}\x{4E0B}\x{0032}",
|
||||
"2-u9tlzr9756bt3uc0v", 0, 0, 1, 1 ],
|
||||
["Japanese",
|
||||
"\x{004D}\x{0061}\x{006A}\x{0069}\x{3067}\x{004B}\x{006F}\x{0069}".
|
||||
"\x{3059}\x{308B}\x{0035}\x{79D2}\x{524D}",
|
||||
"MajiKoi5-783gue6qz075azm5e", 0, 0, 1, 1 ],
|
||||
["Japanese",
|
||||
"\x{30D1}\x{30D5}\x{30A3}\x{30FC}\x{0064}\x{0065}\x{30EB}\x{30F3}".
|
||||
"\x{30D0}",
|
||||
"de-jg4avhby1noc0d", 0, 0, 1, 1 ],
|
||||
["Japanese",
|
||||
"\x{305D}\x{306E}\x{30B9}\x{30D4}\x{30FC}\x{30C9}\x{3067}",
|
||||
"d9juau41awczczp", 0, 0, 1, 1 ],
|
||||
["Greek",
|
||||
"\x{03b5}\x{03bb}\x{03bb}\x{03b7}\x{03bd}\x{03b9}\x{03ba}\x{03ac}",
|
||||
"hxargifdar", 0, 0, 1, 1 ],
|
||||
["Maltese (Malti)",
|
||||
"\x{0062}\x{006f}\x{006e}\x{0121}\x{0075}\x{0073}\x{0061}\x{0127}".
|
||||
"\x{0127}\x{0061}",
|
||||
"bonusaa-5bb1da", 0, 0, 1, 1 ],
|
||||
["Russian (Cyrillic)",
|
||||
"\x{043f}\x{043e}\x{0447}\x{0435}\x{043c}\x{0443}\x{0436}\x{0435}".
|
||||
"\x{043e}\x{043d}\x{0438}\x{043d}\x{0435}\x{0433}\x{043e}\x{0432}\x{043e}".
|
||||
"\x{0440}\x{044f}\x{0442}\x{043f}\x{043e}\x{0440}\x{0443}\x{0441}\x{0441}".
|
||||
"\x{043a}\x{0438}",
|
||||
"b1abfaaepdrnnbgefbadotcwatmq2g4l", 0, 0, 1, 1 ],
|
||||
|
||||
## Test vector from IdnaTest.txt
|
||||
|
||||
["Hebrew (combining)",
|
||||
"\x{05D0}\x{0308}",
|
||||
"ssa73l",
|
||||
],
|
||||
['U+094D',
|
||||
"a\x{094D}b",
|
||||
"ab-fsf",
|
||||
],
|
||||
);
|
||||
|
||||
my $tests = 2 * (scalar @idna);
|
||||
plan tests => 1 + $tests;
|
||||
|
||||
foreach my $test (@idna)
|
||||
{
|
||||
my ($comment,$in,$out,$allowunassigned,$usestd3asciirules,$toascii,$tounicode) = @{$test};
|
||||
|
||||
is(encode_punycode($in), $out, $comment.' (encode_punycode)');
|
||||
is(decode_punycode($out), $in, $comment.' (decode_punycode)');
|
||||
}
|
||||
|
||||
# Test vectors extracted from:
|
||||
#
|
||||
# Nameprep and IDNA Test Vectors
|
||||
# draft-josefsson-idn-test-vectors
|
||||
#
|
||||
# Copyright (C) The Internet Society (2003). All Rights Reserved.
|
||||
#
|
||||
# This document and translations of it may be copied and furnished
|
||||
# to others, and derivative works that comment on or otherwise
|
||||
# explain it or assist in its implementation may be prepared,
|
||||
# copied, published and distributed, in whole or in part, without
|
||||
# restriction of any kind, provided that the above copyright
|
||||
# notice and this paragraph are included on all such copies and
|
||||
# derivative works. However, this document itself may not be
|
||||
# modified in any way, such as by removing the copyright notice or
|
||||
# references to the Internet Society or other Internet
|
||||
# organizations, except as needed for the purpose of developing
|
||||
# Internet standards in which case the procedures for copyrights
|
||||
# defined in the Internet Standards process must be followed, or
|
||||
# as required to translate it into languages other than English.
|
|
@ -0,0 +1,15 @@
|
|||
use bytes;
|
||||
use strict;
|
||||
|
||||
use Test::More tests => 1+6;
|
||||
use Test::NoWarnings;
|
||||
|
||||
use Net::IDN::UTS46 qw(:all);
|
||||
|
||||
is(uts46_to_ascii('müller'),'xn--mller-kva');
|
||||
is(Net::IDN::UTS46::to_ascii('müller'),'xn--mller-kva');
|
||||
is(Net::IDN::UTS46::uts46_to_ascii('müller'),'xn--mller-kva');
|
||||
|
||||
is(uts46_to_unicode('xn--mller-kva'),'müller');
|
||||
is(Net::IDN::UTS46::to_unicode('xn--mller-kva'),'müller');
|
||||
is(Net::IDN::UTS46::uts46_to_unicode('xn--mller-kva'),'müller');
|
|
@ -0,0 +1,16 @@
|
|||
use bytes;
|
||||
use strict;
|
||||
|
||||
use Test::More tests => 1 + 6;
|
||||
use Test::NoWarnings;
|
||||
|
||||
use Net::IDN::UTS46 qw(:all);
|
||||
|
||||
is(uts46_to_ascii('mueller'),'mueller');
|
||||
is(uts46_to_ascii('xn--mller-kva'),'xn--mller-kva');
|
||||
is(uts46_to_ascii('müller'),'xn--mller-kva');
|
||||
|
||||
is(uts46_to_unicode('mueller'),'mueller');
|
||||
is(uts46_to_unicode('xn--mller-kva'),'müller');
|
||||
is(uts46_to_unicode('müller'),'müller');
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
use utf8;
|
||||
use strict;
|
||||
|
||||
BEGIN { binmode STDOUT, ':utf8'; binmode STDERR, ':utf8'; }
|
||||
|
||||
use Test::More tests => 1+8;
|
||||
use Test::NoWarnings;
|
||||
|
||||
use Net::IDN::UTS46 qw(:all);
|
||||
|
||||
is(uts46_to_ascii('mueller'),'mueller');
|
||||
is(uts46_to_ascii('xn--mller-kva'),'xn--mller-kva');
|
||||
is(uts46_to_ascii('müller'),'xn--mller-kva');
|
||||
is(uts46_to_ascii('中央大学'),'xn--fiq80yua78t');
|
||||
|
||||
is(uts46_to_unicode('mueller'),'mueller');
|
||||
is(uts46_to_unicode('xn--mller-kva'),'müller');
|
||||
is(uts46_to_unicode('müller'),'müller');
|
||||
is(uts46_to_unicode('xn--fiq80yua78t'),'中央大学');
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,29 @@
|
|||
use strict;
|
||||
use utf8;
|
||||
use warnings;
|
||||
|
||||
BEGIN {
|
||||
binmode STDOUT, ':utf8';
|
||||
binmode STDERR, ':utf8';
|
||||
}
|
||||
|
||||
use Test::More tests => 3 + 1;
|
||||
use Test::NoWarnings;
|
||||
|
||||
{
|
||||
$Net::IDN::Punycode::_NO_XS = 1;
|
||||
}
|
||||
use Net::IDN::UTS46 (':all');
|
||||
|
||||
no warnings 'utf8';
|
||||
|
||||
my %p = ("TransitionalProcessing" => "0");
|
||||
|
||||
is(eval{uts46_to_ascii("xn--0.pt", %p)}, undef, "to_ascii\(\'xn\-\-0\.pt\'\)\ throws\ error\ A3\ \[data\/IdnaTest\.txt\:256\]") or ($@ and diag($@));
|
||||
is(eval{uts46_to_unicode("xn--0.pt", %p)}, undef, "to_unicode\(\'xn\-\-0\.pt\'\)\ throws\ error\ A3\ \[data\/IdnaTest\.txt\:256\]") or ($@ and diag($@));
|
||||
is(eval{Net::IDN::Punycode::decode_punycode(0)}, undef, "decode_punycode(0) throws error") or ($@ and diag($@));
|
||||
|
||||
# Ignore warnings generated by perl core modules on old perl
|
||||
Test::NoWarnings->clear_warnings if $^V lt v5.8.7;
|
||||
|
||||
exit(0);
|
Loading…
Reference in New Issue