Import Upstream version 0.009
This commit is contained in:
commit
21094f86cd
|
@ -0,0 +1,33 @@
|
||||||
|
Revision history for PerlIO-utf8_strict
|
||||||
|
|
||||||
|
0.009 2022-01-08 17:26:23+01:00 Europe/Amsterdam
|
||||||
|
- Mark functions that croak as noreturn
|
||||||
|
|
||||||
|
0.008 2020-09-19 00:11:59+02:00 Europe/Amsterdam
|
||||||
|
- Make unread by :crlf on top of :utf8_strict reliable
|
||||||
|
|
||||||
|
0.007 2017-04-06 14:58:37+02:00 Europe/Amsterdam
|
||||||
|
- Adapt to dot no longer being in @INC
|
||||||
|
- Adapt to unicode syswrite being deprecated
|
||||||
|
|
||||||
|
0.006 2015-05-01 11:36:21+02:00 Europe/Amsterdam
|
||||||
|
Move to MakeMaker
|
||||||
|
|
||||||
|
0.005 2014-11-06 00:56:54+01:00 Europe/Amsterdam
|
||||||
|
Add PPPort to compile on perl <5.8.9
|
||||||
|
|
||||||
|
0.004 2013-02-24 10:59:59 Europe/Amsterdam
|
||||||
|
Fix compilation on 5.8
|
||||||
|
|
||||||
|
0.003 2013-02-21 01:30:44 Europe/Amsterdam
|
||||||
|
Fixed conversion to code points in noncharacter error reporting
|
||||||
|
PerlIOBase_flush_linebuf is unresolved on some platforms, pull in a copy
|
||||||
|
|
||||||
|
0.002 2012-05-30 20:43:21 Europe/Amsterdam
|
||||||
|
Fix bug with non-buffered lower layers
|
||||||
|
FIX C89 conformance
|
||||||
|
Documentation update
|
||||||
|
|
||||||
|
|
||||||
|
0.001 2012-04-08 22:08:08 Europe/Amsterdam
|
||||||
|
Initial release
|
|
@ -0,0 +1,72 @@
|
||||||
|
This is the Perl distribution PerlIO-utf8_strict.
|
||||||
|
|
||||||
|
Installing PerlIO-utf8_strict is straightforward.
|
||||||
|
|
||||||
|
## Installation with cpanm
|
||||||
|
|
||||||
|
If you have cpanm, you only need one line:
|
||||||
|
|
||||||
|
% cpanm PerlIO::utf8_strict
|
||||||
|
|
||||||
|
If it does not have permission to install modules to the current perl, cpanm
|
||||||
|
will automatically set up and install to a local::lib in your home directory.
|
||||||
|
See the local::lib documentation (https://metacpan.org/pod/local::lib) for
|
||||||
|
details on enabling it in your environment.
|
||||||
|
|
||||||
|
## Installing with the CPAN shell
|
||||||
|
|
||||||
|
Alternatively, if your CPAN shell is set up, you should just be able to do:
|
||||||
|
|
||||||
|
% cpan PerlIO::utf8_strict
|
||||||
|
|
||||||
|
## Manual installation
|
||||||
|
|
||||||
|
As a last resort, you can manually install it. Download the tarball, untar it,
|
||||||
|
install configure prerequisites (see below), then build it:
|
||||||
|
|
||||||
|
% perl Makefile.PL
|
||||||
|
% make && make test
|
||||||
|
|
||||||
|
Then install it:
|
||||||
|
|
||||||
|
% make install
|
||||||
|
|
||||||
|
On Windows platforms, you should use `dmake` or `nmake`, instead of `make`.
|
||||||
|
|
||||||
|
If your perl is system-managed, you can create a local::lib in your home
|
||||||
|
directory to install modules to. For details, see the local::lib documentation:
|
||||||
|
https://metacpan.org/pod/local::lib
|
||||||
|
|
||||||
|
The prerequisites of this distribution will also have to be installed manually. The
|
||||||
|
prerequisites are listed in one of the files: `MYMETA.yml` or `MYMETA.json` generated
|
||||||
|
by running the manual build process described above.
|
||||||
|
|
||||||
|
## Configure Prerequisites
|
||||||
|
|
||||||
|
This distribution requires other modules to be installed before this
|
||||||
|
distribution's installer can be run. They can be found under the
|
||||||
|
"configure_requires" key of META.yml or the
|
||||||
|
"{prereqs}{configure}{requires}" key of META.json.
|
||||||
|
|
||||||
|
## Other Prerequisites
|
||||||
|
|
||||||
|
This distribution may require additional modules to be installed after running
|
||||||
|
Makefile.PL.
|
||||||
|
Look for prerequisites in the following phases:
|
||||||
|
|
||||||
|
* to run make, PHASE = build
|
||||||
|
* to use the module code itself, PHASE = runtime
|
||||||
|
* to run tests, PHASE = test
|
||||||
|
|
||||||
|
They can all be found in the "PHASE_requires" key of MYMETA.yml or the
|
||||||
|
"{prereqs}{PHASE}{requires}" key of MYMETA.json.
|
||||||
|
|
||||||
|
## Documentation
|
||||||
|
|
||||||
|
PerlIO-utf8_strict documentation is available as POD.
|
||||||
|
You can run `perldoc` from a shell to read the documentation:
|
||||||
|
|
||||||
|
% perldoc PerlIO::utf8_strict
|
||||||
|
|
||||||
|
For more information on installing Perl modules via CPAN, please see:
|
||||||
|
https://www.cpan.org/modules/INSTALL.html
|
|
@ -0,0 +1,379 @@
|
||||||
|
This software is copyright (c) 2012 by Leon Timmermans, Christian Hansen.
|
||||||
|
|
||||||
|
This is free software; you can redistribute it and/or modify it under
|
||||||
|
the same terms as the Perl 5 programming language system itself.
|
||||||
|
|
||||||
|
Terms of the Perl programming language system itself
|
||||||
|
|
||||||
|
a) the GNU General Public License as published by the Free
|
||||||
|
Software Foundation; either version 1, or (at your option) any
|
||||||
|
later version, or
|
||||||
|
b) the "Artistic License"
|
||||||
|
|
||||||
|
--- The GNU General Public License, Version 1, February 1989 ---
|
||||||
|
|
||||||
|
This software is Copyright (c) 2012 by Leon Timmermans, Christian Hansen.
|
||||||
|
|
||||||
|
This is free software, licensed under:
|
||||||
|
|
||||||
|
The GNU General Public License, Version 1, February 1989
|
||||||
|
|
||||||
|
GNU GENERAL PUBLIC LICENSE
|
||||||
|
Version 1, February 1989
|
||||||
|
|
||||||
|
Copyright (C) 1989 Free Software Foundation, Inc.
|
||||||
|
51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
|
||||||
|
Everyone is permitted to copy and distribute verbatim copies
|
||||||
|
of this license document, but changing it is not allowed.
|
||||||
|
|
||||||
|
Preamble
|
||||||
|
|
||||||
|
The license agreements of most software companies try to keep users
|
||||||
|
at the mercy of those companies. By contrast, our General Public
|
||||||
|
License is intended to guarantee your freedom to share and change free
|
||||||
|
software--to make sure the software is free for all its users. The
|
||||||
|
General Public License applies to the Free Software Foundation's
|
||||||
|
software and to any other program whose authors commit to using it.
|
||||||
|
You can use it for your programs, too.
|
||||||
|
|
||||||
|
When we speak of free software, we are referring to freedom, not
|
||||||
|
price. Specifically, the General Public License is designed to make
|
||||||
|
sure that you have the freedom to give away or sell copies of free
|
||||||
|
software, that you receive source code or can get it if you want it,
|
||||||
|
that you can change the software or use pieces of it in new free
|
||||||
|
programs; and that you know you can do these things.
|
||||||
|
|
||||||
|
To protect your rights, we need to make restrictions that forbid
|
||||||
|
anyone to deny you these rights or to ask you to surrender the rights.
|
||||||
|
These restrictions translate to certain responsibilities for you if you
|
||||||
|
distribute copies of the software, or if you modify it.
|
||||||
|
|
||||||
|
For example, if you distribute copies of a such a program, whether
|
||||||
|
gratis or for a fee, you must give the recipients all the rights that
|
||||||
|
you have. You must make sure that they, too, receive or can get the
|
||||||
|
source code. And you must tell them their rights.
|
||||||
|
|
||||||
|
We protect your rights with two steps: (1) copyright the software, and
|
||||||
|
(2) offer you this license which gives you legal permission to copy,
|
||||||
|
distribute and/or modify the software.
|
||||||
|
|
||||||
|
Also, for each author's protection and ours, we want to make certain
|
||||||
|
that everyone understands that there is no warranty for this free
|
||||||
|
software. If the software is modified by someone else and passed on, we
|
||||||
|
want its recipients to know that what they have is not the original, so
|
||||||
|
that any problems introduced by others will not reflect on the original
|
||||||
|
authors' reputations.
|
||||||
|
|
||||||
|
The precise terms and conditions for copying, distribution and
|
||||||
|
modification follow.
|
||||||
|
|
||||||
|
GNU GENERAL PUBLIC LICENSE
|
||||||
|
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||||
|
|
||||||
|
0. This License Agreement applies to any program or other work which
|
||||||
|
contains a notice placed by the copyright holder saying it may be
|
||||||
|
distributed under the terms of this General Public License. The
|
||||||
|
"Program", below, refers to any such program or work, and a "work based
|
||||||
|
on the Program" means either the Program or any work containing the
|
||||||
|
Program or a portion of it, either verbatim or with modifications. Each
|
||||||
|
licensee is addressed as "you".
|
||||||
|
|
||||||
|
1. You may copy and distribute verbatim copies of the Program's source
|
||||||
|
code as you receive it, in any medium, provided that you conspicuously and
|
||||||
|
appropriately publish on each copy an appropriate copyright notice and
|
||||||
|
disclaimer of warranty; keep intact all the notices that refer to this
|
||||||
|
General Public License and to the absence of any warranty; and give any
|
||||||
|
other recipients of the Program a copy of this General Public License
|
||||||
|
along with the Program. You may charge a fee for the physical act of
|
||||||
|
transferring a copy.
|
||||||
|
|
||||||
|
2. You may modify your copy or copies of the Program or any portion of
|
||||||
|
it, and copy and distribute such modifications under the terms of Paragraph
|
||||||
|
1 above, provided that you also do the following:
|
||||||
|
|
||||||
|
a) cause the modified files to carry prominent notices stating that
|
||||||
|
you changed the files and the date of any change; and
|
||||||
|
|
||||||
|
b) cause the whole of any work that you distribute or publish, that
|
||||||
|
in whole or in part contains the Program or any part thereof, either
|
||||||
|
with or without modifications, to be licensed at no charge to all
|
||||||
|
third parties under the terms of this General Public License (except
|
||||||
|
that you may choose to grant warranty protection to some or all
|
||||||
|
third parties, at your option).
|
||||||
|
|
||||||
|
c) If the modified program normally reads commands interactively when
|
||||||
|
run, you must cause it, when started running for such interactive use
|
||||||
|
in the simplest and most usual way, to print or display an
|
||||||
|
announcement including an appropriate copyright notice and a notice
|
||||||
|
that there is no warranty (or else, saying that you provide a
|
||||||
|
warranty) and that users may redistribute the program under these
|
||||||
|
conditions, and telling the user how to view a copy of this General
|
||||||
|
Public License.
|
||||||
|
|
||||||
|
d) You may charge a fee for the physical act of transferring a
|
||||||
|
copy, and you may at your option offer warranty protection in
|
||||||
|
exchange for a fee.
|
||||||
|
|
||||||
|
Mere aggregation of another independent work with the Program (or its
|
||||||
|
derivative) on a volume of a storage or distribution medium does not bring
|
||||||
|
the other work under the scope of these terms.
|
||||||
|
|
||||||
|
3. You may copy and distribute the Program (or a portion or derivative of
|
||||||
|
it, under Paragraph 2) in object code or executable form under the terms of
|
||||||
|
Paragraphs 1 and 2 above provided that you also do one of the following:
|
||||||
|
|
||||||
|
a) accompany it with the complete corresponding machine-readable
|
||||||
|
source code, which must be distributed under the terms of
|
||||||
|
Paragraphs 1 and 2 above; or,
|
||||||
|
|
||||||
|
b) accompany it with a written offer, valid for at least three
|
||||||
|
years, to give any third party free (except for a nominal charge
|
||||||
|
for the cost of distribution) a complete machine-readable copy of the
|
||||||
|
corresponding source code, to be distributed under the terms of
|
||||||
|
Paragraphs 1 and 2 above; or,
|
||||||
|
|
||||||
|
c) accompany it with the information you received as to where the
|
||||||
|
corresponding source code may be obtained. (This alternative is
|
||||||
|
allowed only for noncommercial distribution and only if you
|
||||||
|
received the program in object code or executable form alone.)
|
||||||
|
|
||||||
|
Source code for a work means the preferred form of the work for making
|
||||||
|
modifications to it. For an executable file, complete source code means
|
||||||
|
all the source code for all modules it contains; but, as a special
|
||||||
|
exception, it need not include source code for modules which are standard
|
||||||
|
libraries that accompany the operating system on which the executable
|
||||||
|
file runs, or for standard header files or definitions files that
|
||||||
|
accompany that operating system.
|
||||||
|
|
||||||
|
4. You may not copy, modify, sublicense, distribute or transfer the
|
||||||
|
Program except as expressly provided under this General Public License.
|
||||||
|
Any attempt otherwise to copy, modify, sublicense, distribute or transfer
|
||||||
|
the Program is void, and will automatically terminate your rights to use
|
||||||
|
the Program under this License. However, parties who have received
|
||||||
|
copies, or rights to use copies, from you under this General Public
|
||||||
|
License will not have their licenses terminated so long as such parties
|
||||||
|
remain in full compliance.
|
||||||
|
|
||||||
|
5. By copying, distributing or modifying the Program (or any work based
|
||||||
|
on the Program) you indicate your acceptance of this license to do so,
|
||||||
|
and all its terms and conditions.
|
||||||
|
|
||||||
|
6. Each time you redistribute the Program (or any work based on the
|
||||||
|
Program), the recipient automatically receives a license from the original
|
||||||
|
licensor to copy, distribute or modify the Program subject to these
|
||||||
|
terms and conditions. You may not impose any further restrictions on the
|
||||||
|
recipients' exercise of the rights granted herein.
|
||||||
|
|
||||||
|
7. The Free Software Foundation may publish revised and/or new versions
|
||||||
|
of the General Public License from time to time. Such new versions will
|
||||||
|
be similar in spirit to the present version, but may differ in detail to
|
||||||
|
address new problems or concerns.
|
||||||
|
|
||||||
|
Each version is given a distinguishing version number. If the Program
|
||||||
|
specifies a version number of the license which applies to it and "any
|
||||||
|
later version", you have the option of following the terms and conditions
|
||||||
|
either of that version or of any later version published by the Free
|
||||||
|
Software Foundation. If the Program does not specify a version number of
|
||||||
|
the license, you may choose any version ever published by the Free Software
|
||||||
|
Foundation.
|
||||||
|
|
||||||
|
8. If you wish to incorporate parts of the Program into other free
|
||||||
|
programs whose distribution conditions are different, write to the author
|
||||||
|
to ask for permission. For software which is copyrighted by the Free
|
||||||
|
Software Foundation, write to the Free Software Foundation; we sometimes
|
||||||
|
make exceptions for this. Our decision will be guided by the two goals
|
||||||
|
of preserving the free status of all derivatives of our free software and
|
||||||
|
of promoting the sharing and reuse of software generally.
|
||||||
|
|
||||||
|
NO WARRANTY
|
||||||
|
|
||||||
|
9. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
|
||||||
|
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
|
||||||
|
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
|
||||||
|
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
|
||||||
|
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||||
|
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
|
||||||
|
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
|
||||||
|
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
|
||||||
|
REPAIR OR CORRECTION.
|
||||||
|
|
||||||
|
10. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||||
|
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
|
||||||
|
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
|
||||||
|
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
|
||||||
|
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
|
||||||
|
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
|
||||||
|
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
|
||||||
|
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGES.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
Appendix: How to Apply These Terms to Your New Programs
|
||||||
|
|
||||||
|
If you develop a new program, and you want it to be of the greatest
|
||||||
|
possible use to humanity, the best way to achieve this is to make it
|
||||||
|
free software which everyone can redistribute and change under these
|
||||||
|
terms.
|
||||||
|
|
||||||
|
To do so, attach the following notices to the program. It is safest to
|
||||||
|
attach them to the start of each source file to most effectively convey
|
||||||
|
the exclusion of warranty; and each file should have at least the
|
||||||
|
"copyright" line and a pointer to where the full notice is found.
|
||||||
|
|
||||||
|
<one line to give the program's name and a brief idea of what it does.>
|
||||||
|
Copyright (C) 19yy <name of author>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 1, or (at your option)
|
||||||
|
any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA
|
||||||
|
|
||||||
|
|
||||||
|
Also add information on how to contact you by electronic and paper mail.
|
||||||
|
|
||||||
|
If the program is interactive, make it output a short notice like this
|
||||||
|
when it starts in an interactive mode:
|
||||||
|
|
||||||
|
Gnomovision version 69, Copyright (C) 19xx name of author
|
||||||
|
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||||
|
This is free software, and you are welcome to redistribute it
|
||||||
|
under certain conditions; type `show c' for details.
|
||||||
|
|
||||||
|
The hypothetical commands `show w' and `show c' should show the
|
||||||
|
appropriate parts of the General Public License. Of course, the
|
||||||
|
commands you use may be called something other than `show w' and `show
|
||||||
|
c'; they could even be mouse-clicks or menu items--whatever suits your
|
||||||
|
program.
|
||||||
|
|
||||||
|
You should also get your employer (if you work as a programmer) or your
|
||||||
|
school, if any, to sign a "copyright disclaimer" for the program, if
|
||||||
|
necessary. Here a sample; alter the names:
|
||||||
|
|
||||||
|
Yoyodyne, Inc., hereby disclaims all copyright interest in the
|
||||||
|
program `Gnomovision' (a program to direct compilers to make passes
|
||||||
|
at assemblers) written by James Hacker.
|
||||||
|
|
||||||
|
<signature of Ty Coon>, 1 April 1989
|
||||||
|
Ty Coon, President of Vice
|
||||||
|
|
||||||
|
That's all there is to it!
|
||||||
|
|
||||||
|
|
||||||
|
--- The Artistic License 1.0 ---
|
||||||
|
|
||||||
|
This software is Copyright (c) 2012 by Leon Timmermans, Christian Hansen.
|
||||||
|
|
||||||
|
This is free software, licensed under:
|
||||||
|
|
||||||
|
The Artistic License 1.0
|
||||||
|
|
||||||
|
The Artistic License
|
||||||
|
|
||||||
|
Preamble
|
||||||
|
|
||||||
|
The intent of this document is to state the conditions under which a Package
|
||||||
|
may be copied, such that the Copyright Holder maintains some semblance of
|
||||||
|
artistic control over the development of the package, while giving the users of
|
||||||
|
the package the right to use and distribute the Package in a more-or-less
|
||||||
|
customary fashion, plus the right to make reasonable modifications.
|
||||||
|
|
||||||
|
Definitions:
|
||||||
|
|
||||||
|
- "Package" refers to the collection of files distributed by the Copyright
|
||||||
|
Holder, and derivatives of that collection of files created through
|
||||||
|
textual modification.
|
||||||
|
- "Standard Version" refers to such a Package if it has not been modified,
|
||||||
|
or has been modified in accordance with the wishes of the Copyright
|
||||||
|
Holder.
|
||||||
|
- "Copyright Holder" is whoever is named in the copyright or copyrights for
|
||||||
|
the package.
|
||||||
|
- "You" is you, if you're thinking about copying or distributing this Package.
|
||||||
|
- "Reasonable copying fee" is whatever you can justify on the basis of media
|
||||||
|
cost, duplication charges, time of people involved, and so on. (You will
|
||||||
|
not be required to justify it to the Copyright Holder, but only to the
|
||||||
|
computing community at large as a market that must bear the fee.)
|
||||||
|
- "Freely Available" means that no fee is charged for the item itself, though
|
||||||
|
there may be fees involved in handling the item. It also means that
|
||||||
|
recipients of the item may redistribute it under the same conditions they
|
||||||
|
received it.
|
||||||
|
|
||||||
|
1. You may make and give away verbatim copies of the source form of the
|
||||||
|
Standard Version of this Package without restriction, provided that you
|
||||||
|
duplicate all of the original copyright notices and associated disclaimers.
|
||||||
|
|
||||||
|
2. You may apply bug fixes, portability fixes and other modifications derived
|
||||||
|
from the Public Domain or from the Copyright Holder. A Package modified in such
|
||||||
|
a way shall still be considered the Standard Version.
|
||||||
|
|
||||||
|
3. You may otherwise modify your copy of this Package in any way, provided that
|
||||||
|
you insert a prominent notice in each changed file stating how and when you
|
||||||
|
changed that file, and provided that you do at least ONE of the following:
|
||||||
|
|
||||||
|
a) place your modifications in the Public Domain or otherwise make them
|
||||||
|
Freely Available, such as by posting said modifications to Usenet or an
|
||||||
|
equivalent medium, or placing the modifications on a major archive site
|
||||||
|
such as ftp.uu.net, or by allowing the Copyright Holder to include your
|
||||||
|
modifications in the Standard Version of the Package.
|
||||||
|
|
||||||
|
b) use the modified Package only within your corporation or organization.
|
||||||
|
|
||||||
|
c) rename any non-standard executables so the names do not conflict with
|
||||||
|
standard executables, which must also be provided, and provide a separate
|
||||||
|
manual page for each non-standard executable that clearly documents how it
|
||||||
|
differs from the Standard Version.
|
||||||
|
|
||||||
|
d) make other distribution arrangements with the Copyright Holder.
|
||||||
|
|
||||||
|
4. You may distribute the programs of this Package in object code or executable
|
||||||
|
form, provided that you do at least ONE of the following:
|
||||||
|
|
||||||
|
a) distribute a Standard Version of the executables and library files,
|
||||||
|
together with instructions (in the manual page or equivalent) on where to
|
||||||
|
get the Standard Version.
|
||||||
|
|
||||||
|
b) accompany the distribution with the machine-readable source of the Package
|
||||||
|
with your modifications.
|
||||||
|
|
||||||
|
c) accompany any non-standard executables with their corresponding Standard
|
||||||
|
Version executables, giving the non-standard executables non-standard
|
||||||
|
names, and clearly documenting the differences in manual pages (or
|
||||||
|
equivalent), together with instructions on where to get the Standard
|
||||||
|
Version.
|
||||||
|
|
||||||
|
d) make other distribution arrangements with the Copyright Holder.
|
||||||
|
|
||||||
|
5. You may charge a reasonable copying fee for any distribution of this
|
||||||
|
Package. You may charge any fee you choose for support of this Package. You
|
||||||
|
may not charge a fee for this Package itself. However, you may distribute this
|
||||||
|
Package in aggregate with other (possibly commercial) programs as part of a
|
||||||
|
larger (possibly commercial) software distribution provided that you do not
|
||||||
|
advertise this Package as a product of your own.
|
||||||
|
|
||||||
|
6. The scripts and library files supplied as input to or produced as output
|
||||||
|
from the programs of this Package do not automatically fall under the copyright
|
||||||
|
of this Package, but belong to whomever generated them, and may be sold
|
||||||
|
commercially, and may be aggregated with this Package.
|
||||||
|
|
||||||
|
7. C or perl subroutines supplied by you and linked into this Package shall not
|
||||||
|
be considered part of this Package.
|
||||||
|
|
||||||
|
8. The name of the Copyright Holder may not be used to endorse or promote
|
||||||
|
products derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
9. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
|
||||||
|
WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
||||||
|
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
|
||||||
|
The End
|
||||||
|
|
|
@ -0,0 +1,26 @@
|
||||||
|
# This file was automatically generated by Dist::Zilla::Plugin::Manifest v6.024.
|
||||||
|
Changes
|
||||||
|
INSTALL
|
||||||
|
LICENSE
|
||||||
|
MANIFEST
|
||||||
|
META.json
|
||||||
|
META.yml
|
||||||
|
Makefile.PL
|
||||||
|
README
|
||||||
|
corpus/quickbrown.txt
|
||||||
|
corpus/test1-latin1.txt
|
||||||
|
corpus/test1.txt
|
||||||
|
dist.ini
|
||||||
|
lib/PerlIO/utf8_strict.pm
|
||||||
|
ppport.h
|
||||||
|
t/basics.t
|
||||||
|
t/bug.t
|
||||||
|
t/incomplete.t
|
||||||
|
t/lib/Util.pm
|
||||||
|
t/non-shortest-form.t
|
||||||
|
t/noncharacters.t
|
||||||
|
t/super.t
|
||||||
|
t/surrogates.t
|
||||||
|
t/unicode.t
|
||||||
|
utf8_strict.xs
|
||||||
|
xt/author/pod-syntax.t
|
|
@ -0,0 +1,84 @@
|
||||||
|
{
|
||||||
|
"abstract" : "Fast and correct UTF-8 IO",
|
||||||
|
"author" : [
|
||||||
|
"Leon Timmermans <leont@cpan.org>",
|
||||||
|
"Christian Hansen <chansen@cpan.org>"
|
||||||
|
],
|
||||||
|
"dynamic_config" : 0,
|
||||||
|
"generated_by" : "Dist::Zilla version 6.024, CPAN::Meta::Converter version 2.150010",
|
||||||
|
"license" : [
|
||||||
|
"perl_5"
|
||||||
|
],
|
||||||
|
"meta-spec" : {
|
||||||
|
"url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
|
||||||
|
"version" : 2
|
||||||
|
},
|
||||||
|
"name" : "PerlIO-utf8_strict",
|
||||||
|
"prereqs" : {
|
||||||
|
"configure" : {
|
||||||
|
"requires" : {
|
||||||
|
"ExtUtils::MakeMaker" : "0",
|
||||||
|
"perl" : "5.006"
|
||||||
|
},
|
||||||
|
"suggests" : {
|
||||||
|
"JSON::PP" : "2.27300"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"develop" : {
|
||||||
|
"requires" : {
|
||||||
|
"Devel::PPPort" : "3.23",
|
||||||
|
"Test::More" : "0",
|
||||||
|
"Test::Pod" : "1.41"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"runtime" : {
|
||||||
|
"requires" : {
|
||||||
|
"XSLoader" : "0",
|
||||||
|
"perl" : "5.008",
|
||||||
|
"strict" : "0",
|
||||||
|
"warnings" : "0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"test" : {
|
||||||
|
"requires" : {
|
||||||
|
"Carp" : "0",
|
||||||
|
"Exporter" : "0",
|
||||||
|
"File::Spec::Functions" : "0",
|
||||||
|
"IO::File" : "0",
|
||||||
|
"IO::Handle" : "0",
|
||||||
|
"Test::Exception" : "0",
|
||||||
|
"Test::More" : "0.88",
|
||||||
|
"lib" : "0",
|
||||||
|
"perl" : "5.008",
|
||||||
|
"utf8" : "0"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"provides" : {
|
||||||
|
"PerlIO::utf8_strict" : {
|
||||||
|
"file" : "lib/PerlIO/utf8_strict.pm",
|
||||||
|
"version" : "0.009"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"release_status" : "stable",
|
||||||
|
"resources" : {
|
||||||
|
"bugtracker" : {
|
||||||
|
"mailto" : "bug-perlio-utf8_strict at rt.cpan.org",
|
||||||
|
"web" : "http://rt.cpan.org/Public/Dist/Display.html?Name=PerlIO-utf8_strict"
|
||||||
|
},
|
||||||
|
"repository" : {
|
||||||
|
"type" : "git",
|
||||||
|
"url" : "git://github.com/Leont/perlio-utf8_strict.git",
|
||||||
|
"web" : "https://github.com/Leont/perlio-utf8_strict"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"version" : "0.009",
|
||||||
|
"x_contributors" : [
|
||||||
|
"Andreas V\u00f6gele <voegelas@cpan.org>",
|
||||||
|
"Leon Timmermans <fawaka@gmail.com>"
|
||||||
|
],
|
||||||
|
"x_generated_by_perl" : "v5.32.0",
|
||||||
|
"x_serialization_backend" : "Cpanel::JSON::XS version 4.19",
|
||||||
|
"x_spdx_expression" : "Artistic-1.0-Perl OR GPL-1.0-or-later"
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,45 @@
|
||||||
|
---
|
||||||
|
abstract: 'Fast and correct UTF-8 IO'
|
||||||
|
author:
|
||||||
|
- 'Leon Timmermans <leont@cpan.org>'
|
||||||
|
- 'Christian Hansen <chansen@cpan.org>'
|
||||||
|
build_requires:
|
||||||
|
Carp: '0'
|
||||||
|
Exporter: '0'
|
||||||
|
File::Spec::Functions: '0'
|
||||||
|
IO::File: '0'
|
||||||
|
IO::Handle: '0'
|
||||||
|
Test::Exception: '0'
|
||||||
|
Test::More: '0.88'
|
||||||
|
lib: '0'
|
||||||
|
perl: '5.008'
|
||||||
|
utf8: '0'
|
||||||
|
configure_requires:
|
||||||
|
ExtUtils::MakeMaker: '0'
|
||||||
|
perl: '5.006'
|
||||||
|
dynamic_config: 0
|
||||||
|
generated_by: 'Dist::Zilla version 6.024, CPAN::Meta::Converter version 2.150010'
|
||||||
|
license: perl
|
||||||
|
meta-spec:
|
||||||
|
url: http://module-build.sourceforge.net/META-spec-v1.4.html
|
||||||
|
version: '1.4'
|
||||||
|
name: PerlIO-utf8_strict
|
||||||
|
provides:
|
||||||
|
PerlIO::utf8_strict:
|
||||||
|
file: lib/PerlIO/utf8_strict.pm
|
||||||
|
version: '0.009'
|
||||||
|
requires:
|
||||||
|
XSLoader: '0'
|
||||||
|
perl: '5.008'
|
||||||
|
strict: '0'
|
||||||
|
warnings: '0'
|
||||||
|
resources:
|
||||||
|
bugtracker: http://rt.cpan.org/Public/Dist/Display.html?Name=PerlIO-utf8_strict
|
||||||
|
repository: git://github.com/Leont/perlio-utf8_strict.git
|
||||||
|
version: '0.009'
|
||||||
|
x_contributors:
|
||||||
|
- 'Andreas Vögele <voegelas@cpan.org>'
|
||||||
|
- 'Leon Timmermans <fawaka@gmail.com>'
|
||||||
|
x_generated_by_perl: v5.32.0
|
||||||
|
x_serialization_backend: 'YAML::Tiny version 1.73'
|
||||||
|
x_spdx_expression: 'Artistic-1.0-Perl OR GPL-1.0-or-later'
|
|
@ -0,0 +1,67 @@
|
||||||
|
# This file was automatically generated by Dist::Zilla::Plugin::MakeMaker v6.024.
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
|
||||||
|
use 5.008;
|
||||||
|
|
||||||
|
use ExtUtils::MakeMaker;
|
||||||
|
|
||||||
|
my %WriteMakefileArgs = (
|
||||||
|
"ABSTRACT" => "Fast and correct UTF-8 IO",
|
||||||
|
"AUTHOR" => "Leon Timmermans <leont\@cpan.org>, Christian Hansen <chansen\@cpan.org>",
|
||||||
|
"CONFIGURE_REQUIRES" => {
|
||||||
|
"ExtUtils::MakeMaker" => 0
|
||||||
|
},
|
||||||
|
"DISTNAME" => "PerlIO-utf8_strict",
|
||||||
|
"LICENSE" => "perl",
|
||||||
|
"MIN_PERL_VERSION" => "5.008",
|
||||||
|
"NAME" => "PerlIO::utf8_strict",
|
||||||
|
"PREREQ_PM" => {
|
||||||
|
"XSLoader" => 0,
|
||||||
|
"strict" => 0,
|
||||||
|
"warnings" => 0
|
||||||
|
},
|
||||||
|
"TEST_REQUIRES" => {
|
||||||
|
"Carp" => 0,
|
||||||
|
"Exporter" => 0,
|
||||||
|
"File::Spec::Functions" => 0,
|
||||||
|
"IO::File" => 0,
|
||||||
|
"IO::Handle" => 0,
|
||||||
|
"Test::Exception" => 0,
|
||||||
|
"Test::More" => "0.88",
|
||||||
|
"lib" => 0,
|
||||||
|
"utf8" => 0
|
||||||
|
},
|
||||||
|
"VERSION" => "0.009",
|
||||||
|
"test" => {
|
||||||
|
"TESTS" => "t/*.t"
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
my %FallbackPrereqs = (
|
||||||
|
"Carp" => 0,
|
||||||
|
"Exporter" => 0,
|
||||||
|
"File::Spec::Functions" => 0,
|
||||||
|
"IO::File" => 0,
|
||||||
|
"IO::Handle" => 0,
|
||||||
|
"Test::Exception" => 0,
|
||||||
|
"Test::More" => "0.88",
|
||||||
|
"XSLoader" => 0,
|
||||||
|
"lib" => 0,
|
||||||
|
"strict" => 0,
|
||||||
|
"utf8" => 0,
|
||||||
|
"warnings" => 0
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
unless ( eval { ExtUtils::MakeMaker->VERSION(6.63_03) } ) {
|
||||||
|
delete $WriteMakefileArgs{TEST_REQUIRES};
|
||||||
|
delete $WriteMakefileArgs{BUILD_REQUIRES};
|
||||||
|
$WriteMakefileArgs{PREREQ_PM} = \%FallbackPrereqs;
|
||||||
|
}
|
||||||
|
|
||||||
|
delete $WriteMakefileArgs{CONFIGURE_REQUIRES}
|
||||||
|
unless eval { ExtUtils::MakeMaker->VERSION(6.52) };
|
||||||
|
|
||||||
|
WriteMakefile(%WriteMakefileArgs);
|
|
@ -0,0 +1,12 @@
|
||||||
|
This archive contains the distribution PerlIO-utf8_strict,
|
||||||
|
version 0.009:
|
||||||
|
|
||||||
|
Fast and correct UTF-8 IO
|
||||||
|
|
||||||
|
This software is copyright (c) 2012 by Leon Timmermans, Christian Hansen.
|
||||||
|
|
||||||
|
This is free software; you can redistribute it and/or modify it under
|
||||||
|
the same terms as the Perl 5 programming language system itself.
|
||||||
|
|
||||||
|
|
||||||
|
This README file was generated by Dist::Zilla::Plugin::Readme v6.024.
|
|
@ -0,0 +1,138 @@
|
||||||
|
Sentences that contain all letters commonly used in a language
|
||||||
|
--------------------------------------------------------------
|
||||||
|
|
||||||
|
Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/> -- 2010-09-20
|
||||||
|
|
||||||
|
This is an example of a plain-text file encoded in UTF-8.
|
||||||
|
|
||||||
|
|
||||||
|
Danish (da)
|
||||||
|
---------
|
||||||
|
|
||||||
|
Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen
|
||||||
|
Wolther spillede på xylofon.
|
||||||
|
(= Quiz contestants were eating strawbery with cream while Wolther
|
||||||
|
the circus clown played on xylophone.)
|
||||||
|
|
||||||
|
German (de)
|
||||||
|
-----------
|
||||||
|
|
||||||
|
Falsches Üben von Xylophonmusik quält jeden größeren Zwerg
|
||||||
|
(= Wrongful practicing of xylophone music tortures every larger dwarf)
|
||||||
|
|
||||||
|
Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich
|
||||||
|
(= Twelve boxing fighters hunted Eva across the dike of Sylt)
|
||||||
|
|
||||||
|
Heizölrückstoßabdämpfung
|
||||||
|
(= fuel oil recoil absorber)
|
||||||
|
(jqvwxy missing, but all non-ASCII letters in one word)
|
||||||
|
|
||||||
|
Greek (el)
|
||||||
|
----------
|
||||||
|
|
||||||
|
Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ πιὰ στὸ χρυσαφὶ ξέφωτο
|
||||||
|
(= No more shall I see acacias or myrtles in the golden clearing)
|
||||||
|
|
||||||
|
Ξεσκεπάζω τὴν ψυχοφθόρα βδελυγμία
|
||||||
|
(= I uncover the soul-destroying abhorrence)
|
||||||
|
|
||||||
|
English (en)
|
||||||
|
------------
|
||||||
|
|
||||||
|
The quick brown fox jumps over the lazy dog
|
||||||
|
|
||||||
|
Spanish (es)
|
||||||
|
------------
|
||||||
|
|
||||||
|
El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y
|
||||||
|
frío, añoraba a su querido cachorro.
|
||||||
|
(Contains every letter and every accent, but not every combination
|
||||||
|
of vowel + acute.)
|
||||||
|
|
||||||
|
French (fr)
|
||||||
|
-----------
|
||||||
|
|
||||||
|
Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à
|
||||||
|
côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce
|
||||||
|
qui lui permet de penser à la cænogenèse de l'être dont il est question
|
||||||
|
dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui,
|
||||||
|
pense-t-il, diminue çà et là la qualité de son œuvre.
|
||||||
|
|
||||||
|
l'île exiguë
|
||||||
|
Où l'obèse jury mûr
|
||||||
|
Fête l'haï volapük,
|
||||||
|
Âne ex aéquo au whist,
|
||||||
|
Ôtez ce vœu déçu.
|
||||||
|
|
||||||
|
Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en
|
||||||
|
canoë au delà des îles, près du mälström où brûlent les novæ.
|
||||||
|
|
||||||
|
Irish Gaelic (ga)
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh
|
||||||
|
|
||||||
|
Hungarian (hu)
|
||||||
|
--------------
|
||||||
|
|
||||||
|
Árvíztűrő tükörfúrógép
|
||||||
|
(= flood-proof mirror-drilling machine, only all non-ASCII letters)
|
||||||
|
|
||||||
|
Icelandic (is)
|
||||||
|
--------------
|
||||||
|
|
||||||
|
Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa
|
||||||
|
|
||||||
|
Sævör grét áðan því úlpan var ónýt
|
||||||
|
(some ASCII letters missing)
|
||||||
|
|
||||||
|
Japanese (jp)
|
||||||
|
-------------
|
||||||
|
|
||||||
|
Hiragana: (Iroha)
|
||||||
|
|
||||||
|
いろはにほへとちりぬるを
|
||||||
|
わかよたれそつねならむ
|
||||||
|
うゐのおくやまけふこえて
|
||||||
|
あさきゆめみしゑひもせす
|
||||||
|
|
||||||
|
Katakana:
|
||||||
|
|
||||||
|
イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム
|
||||||
|
ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン
|
||||||
|
|
||||||
|
Hebrew (iw)
|
||||||
|
-----------
|
||||||
|
|
||||||
|
? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה
|
||||||
|
|
||||||
|
Polish (pl)
|
||||||
|
-----------
|
||||||
|
|
||||||
|
Pchnąć w tę łódź jeża lub ośm skrzyń fig
|
||||||
|
(= To push a hedgehog or eight bins of figs in this boat)
|
||||||
|
|
||||||
|
Russian (ru)
|
||||||
|
------------
|
||||||
|
|
||||||
|
В чащах юга жил бы цитрус? Да, но фальшивый экземпляр!
|
||||||
|
(= Would a citrus live in the bushes of south? Yes, but only a fake one!)
|
||||||
|
|
||||||
|
Съешь же ещё этих мягких французских булок да выпей чаю
|
||||||
|
(= Eat some more of these fresh French loafs and have some tea)
|
||||||
|
|
||||||
|
Thai (th)
|
||||||
|
---------
|
||||||
|
|
||||||
|
[--------------------------|------------------------]
|
||||||
|
๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน
|
||||||
|
จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร
|
||||||
|
ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย
|
||||||
|
ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ
|
||||||
|
|
||||||
|
[The copyright for the Thai example is owned by The Computer
|
||||||
|
Association of Thailand under the Royal Patronage of His Majesty the
|
||||||
|
King.]
|
||||||
|
|
||||||
|
Special thanks to the people from all over the world who contributed
|
||||||
|
these sentences.
|
|
@ -0,0 +1,2 @@
|
||||||
|
Fo<EFBFBD> B<>rtololol
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Foö-Báŗ
|
||||||
|
|
|
@ -0,0 +1,40 @@
|
||||||
|
name = PerlIO-utf8_strict
|
||||||
|
author = Leon Timmermans <leont@cpan.org>
|
||||||
|
author = Christian Hansen <chansen@cpan.org>
|
||||||
|
license = Perl_5
|
||||||
|
copyright_holder = Leon Timmermans, Christian Hansen
|
||||||
|
copyright_year = 2012
|
||||||
|
|
||||||
|
[Git::GatherDir]
|
||||||
|
[PruneCruft]
|
||||||
|
[MetaYAML]
|
||||||
|
[MetaJSON]
|
||||||
|
[Readme]
|
||||||
|
[Manifest]
|
||||||
|
[License]
|
||||||
|
|
||||||
|
[AutoPrereqs]
|
||||||
|
[Repository]
|
||||||
|
[Bugtracker]
|
||||||
|
[MetaProvides::Package]
|
||||||
|
[NextRelease]
|
||||||
|
|
||||||
|
[Git::Contributors]
|
||||||
|
[MinimumPerl]
|
||||||
|
[MakeMaker]
|
||||||
|
|
||||||
|
[Git::NextVersion]
|
||||||
|
[@Git]
|
||||||
|
|
||||||
|
[CheckChangesHasContent]
|
||||||
|
[RunExtraTests]
|
||||||
|
[TestRelease]
|
||||||
|
[ConfirmRelease]
|
||||||
|
[UploadToCPAN]
|
||||||
|
[PodWeaver]
|
||||||
|
[PkgVersion]
|
||||||
|
[PodSyntaxTests]
|
||||||
|
[InstallGuide]
|
||||||
|
|
||||||
|
[PPPort]
|
||||||
|
style = MakeMaker
|
|
@ -0,0 +1,88 @@
|
||||||
|
package PerlIO::utf8_strict;
|
||||||
|
$PerlIO::utf8_strict::VERSION = '0.009';
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
|
||||||
|
use XSLoader;
|
||||||
|
|
||||||
|
XSLoader::load(__PACKAGE__, __PACKAGE__->VERSION);
|
||||||
|
|
||||||
|
1;
|
||||||
|
|
||||||
|
#ABSTRACT: Fast and correct UTF-8 IO
|
||||||
|
|
||||||
|
__END__
|
||||||
|
|
||||||
|
=pod
|
||||||
|
|
||||||
|
=encoding UTF-8
|
||||||
|
|
||||||
|
=head1 NAME
|
||||||
|
|
||||||
|
PerlIO::utf8_strict - Fast and correct UTF-8 IO
|
||||||
|
|
||||||
|
=head1 VERSION
|
||||||
|
|
||||||
|
version 0.009
|
||||||
|
|
||||||
|
=head1 SYNOPSIS
|
||||||
|
|
||||||
|
open my $fh, '<:utf8_strict', $filename;
|
||||||
|
|
||||||
|
=head1 DESCRIPTION
|
||||||
|
|
||||||
|
This module provides a fast and correct UTF-8 PerlIO layer. Unlike perl's default C<:utf8> layer it checks the input for correctness.
|
||||||
|
|
||||||
|
=head1 LAYER ARGUMENTS
|
||||||
|
|
||||||
|
=over 4
|
||||||
|
|
||||||
|
=item allow_noncharacters
|
||||||
|
|
||||||
|
=item allow_surrogates
|
||||||
|
|
||||||
|
=back
|
||||||
|
|
||||||
|
=head1 EXPORT
|
||||||
|
|
||||||
|
PerlIO::utf8_strict exports no subroutines or symbols, just a perl layer C<utf8_strict>
|
||||||
|
|
||||||
|
=head1 DIAGNOSTICS
|
||||||
|
|
||||||
|
=over 4
|
||||||
|
|
||||||
|
=item Can't decode ill-formed UTF-8 octet sequence <%s>
|
||||||
|
|
||||||
|
(F) Encountered an ill-formed UTF-8 octet sequence. <%s> contains a hexadecimal
|
||||||
|
representation of the maximal subpart of the ill-formed subsequence.
|
||||||
|
|
||||||
|
=item Can't interchange noncharacter code point U+%.4X
|
||||||
|
|
||||||
|
(F) Noncharacters is permanently reserved for internal use and that should
|
||||||
|
never be interchanged. Noncharacters consist of the values U+nFFFE and U+nFFFF
|
||||||
|
(where n is from 0 to 10^16) and the values U+FDD0..U+FDEF.
|
||||||
|
|
||||||
|
=back
|
||||||
|
|
||||||
|
=head1 AUTHORS
|
||||||
|
|
||||||
|
=over 4
|
||||||
|
|
||||||
|
=item *
|
||||||
|
|
||||||
|
Leon Timmermans <leont@cpan.org>
|
||||||
|
|
||||||
|
=item *
|
||||||
|
|
||||||
|
Christian Hansen <chansen@cpan.org>
|
||||||
|
|
||||||
|
=back
|
||||||
|
|
||||||
|
=head1 COPYRIGHT AND LICENSE
|
||||||
|
|
||||||
|
This software is copyright (c) 2012 by Leon Timmermans, Christian Hansen.
|
||||||
|
|
||||||
|
This is free software; you can redistribute it and/or modify it under
|
||||||
|
the same terms as the Perl 5 programming language system itself.
|
||||||
|
|
||||||
|
=cut
|
|
@ -0,0 +1,41 @@
|
||||||
|
#! perl
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
use utf8;
|
||||||
|
|
||||||
|
use Test::More 0.88;
|
||||||
|
use Test::Exception;
|
||||||
|
|
||||||
|
use File::Spec::Functions qw/catfile/;
|
||||||
|
|
||||||
|
my $builder = Test::More->builder;
|
||||||
|
binmode $builder->output, ":utf8";
|
||||||
|
binmode $builder->failure_output, ":utf8";
|
||||||
|
binmode $builder->todo_output, ":utf8";
|
||||||
|
|
||||||
|
{
|
||||||
|
my $filename = catfile(qw/corpus test1.txt/);
|
||||||
|
open my $fh, '<:utf8_strict', $filename or die "Couldn't open file $filename";
|
||||||
|
|
||||||
|
my $line = <$fh>;
|
||||||
|
|
||||||
|
is($line, "Foö-Báŗ\n", 'Content is Foö-Báŗ');
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
my $filename = catfile(qw/corpus quickbrown.txt/);
|
||||||
|
open my $fh, '<:utf8_strict', $filename or die "Couldn't open file $filename";
|
||||||
|
|
||||||
|
lives_ok { my $data = do { local $/; <$fh> } } 'successfull reading quickbrown.txt'
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
my $filename = catfile(qw/corpus test1-latin1.txt/);
|
||||||
|
open my $fh, '<:utf8_strict', $filename or die "Couldn't open file $filename";
|
||||||
|
|
||||||
|
my $line;
|
||||||
|
throws_ok { $line = <$fh> } qr/^Can't decode ill-formed UTF-8 octet sequence/, 'Trying to read ill-formed encoded UTF-8 fails' or diag "Just read '$line'";
|
||||||
|
}
|
||||||
|
|
||||||
|
done_testing;
|
|
@ -0,0 +1,34 @@
|
||||||
|
#! perl
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
use utf8;
|
||||||
|
|
||||||
|
use Test::More 0.88;
|
||||||
|
use Test::Exception;
|
||||||
|
use IO::Handle;
|
||||||
|
use lib 't/lib';
|
||||||
|
use Util qw[fh_with_octets slurp];
|
||||||
|
|
||||||
|
|
||||||
|
my $fh = fh_with_octets("\xE2\x98\xBA" x 8092);
|
||||||
|
|
||||||
|
lives_ok {
|
||||||
|
my $data = do { local $/; <$fh> }
|
||||||
|
} q[successfull reading 8092 WHITE SMILING FACE's];
|
||||||
|
|
||||||
|
{
|
||||||
|
my $line = 'ascii';
|
||||||
|
my ( $in, $out );
|
||||||
|
pipe $in, $out;
|
||||||
|
binmode $out, ':utf8_strict';
|
||||||
|
binmode $in, ':utf8_strict';
|
||||||
|
print $out "...\n";
|
||||||
|
$out->flush;
|
||||||
|
$line .= readline $in;
|
||||||
|
|
||||||
|
is($line, "ascii...\n", 'Appending from utf8 to ascii');
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
done_testing;
|
|
@ -0,0 +1,27 @@
|
||||||
|
#! perl
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
use utf8;
|
||||||
|
|
||||||
|
use Test::More 0.88;
|
||||||
|
use Test::Exception;
|
||||||
|
use lib 't/lib';
|
||||||
|
use Util qw[fh_with_octets pack_utf8 slurp];
|
||||||
|
|
||||||
|
|
||||||
|
for (my $cp = 0x80; $cp < 0x10FFFF; $cp += 0x1000) {
|
||||||
|
my $sequence = substr(pack_utf8($cp), 0, -1);
|
||||||
|
|
||||||
|
my $name = sprintf 'reading incomplete UTF-8 sequence <%s> throws an exception',
|
||||||
|
join ' ', map { sprintf '%.2X', ord $_ } split //, $sequence;
|
||||||
|
|
||||||
|
my $fh = fh_with_octets($sequence);
|
||||||
|
|
||||||
|
throws_ok {
|
||||||
|
slurp($fh);
|
||||||
|
} qr/^Can't decode ill-formed UTF-8 octet sequence/, $name;
|
||||||
|
}
|
||||||
|
|
||||||
|
done_testing;
|
||||||
|
|
|
@ -0,0 +1,104 @@
|
||||||
|
package Util;
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
|
||||||
|
use Carp qw[];
|
||||||
|
use IO::File qw[SEEK_SET];
|
||||||
|
|
||||||
|
BEGIN {
|
||||||
|
our @EXPORT_OK = qw[ fh_with_codepoints fh_with_octets
|
||||||
|
pack_utf8 pack_overlong_utf8
|
||||||
|
slurp rewind
|
||||||
|
tmpfile ];
|
||||||
|
our %EXPORT_TAGS = (
|
||||||
|
all => [ @EXPORT_OK ],
|
||||||
|
);
|
||||||
|
|
||||||
|
require Exporter;
|
||||||
|
*import = \&Exporter::import;
|
||||||
|
}
|
||||||
|
|
||||||
|
my @UTF8_MIN = (0x80, 0x800, 0x10000, 0x200000, 0x4000000, 0x80000000);
|
||||||
|
sub pack_utf8 ($;$) {
|
||||||
|
my ($cp, $len) = @_;
|
||||||
|
($cp >= 0 && $cp < 0x80000000)
|
||||||
|
|| Carp::confess(qq/Cannot pack '$cp'/);
|
||||||
|
(@_ == 1 || ($len > 0 && $len <= 6 && $cp < $UTF8_MIN[$len - 1]))
|
||||||
|
|| Carp::confess(qq/Cannot pack '$cp' to sequence length '$len'/);
|
||||||
|
my @c = (0) x ($len || ($cp < 0x80 ? 1 : $cp < 0x800 ? 2
|
||||||
|
: $cp < 0x10000 ? 3 : $cp < 0x200000 ? 4
|
||||||
|
: $cp < 0x4000000 ? 5 : 6));
|
||||||
|
for (reverse @c[1..$#c]) {
|
||||||
|
$_ = ($cp & 0x3F) | 0x80;
|
||||||
|
$cp >>= 6;
|
||||||
|
}
|
||||||
|
$c[0] = $cp | (0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC)[$#c];
|
||||||
|
return pack('C*', @c);
|
||||||
|
}
|
||||||
|
|
||||||
|
sub pack_overlong_utf8 ($) {
|
||||||
|
my ($cp) = @_;
|
||||||
|
($cp >= 0 && $cp < 0x4000000)
|
||||||
|
|| Carp::confess(qq/Cannot pack '$cp'/);
|
||||||
|
my @enc;
|
||||||
|
for (my $i = 0; $i < 5; $i++) {
|
||||||
|
next unless $cp < $UTF8_MIN[$i];
|
||||||
|
push @enc, pack_utf8($cp, $i + 2);
|
||||||
|
}
|
||||||
|
return wantarray ? @enc : $enc[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
sub rewind (*) {
|
||||||
|
seek($_[0], 0, SEEK_SET)
|
||||||
|
|| die(qq/Couldn't rewind file handle: '$!'/);
|
||||||
|
}
|
||||||
|
|
||||||
|
sub tmpfile (;$) {
|
||||||
|
my $fh = IO::File->new_tmpfile
|
||||||
|
|| die(qq/Couldn't create a new temporary file: '$!'/);
|
||||||
|
|
||||||
|
binmode($fh)
|
||||||
|
|| die(qq/Couldn't binmode temporary file handle: '$!'/);
|
||||||
|
|
||||||
|
if (@_) {
|
||||||
|
print({$fh} @_)
|
||||||
|
|| die(qq/Couldn't write to temporary file handle: '$!'/);
|
||||||
|
|
||||||
|
seek($fh, 0, SEEK_SET)
|
||||||
|
|| die(qq/Couldn't rewind temporary file handle: '$!'/);
|
||||||
|
}
|
||||||
|
|
||||||
|
return $fh;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub slurp (*) {
|
||||||
|
my ($fh) = @_;
|
||||||
|
return do { local $/; <$fh> };
|
||||||
|
}
|
||||||
|
|
||||||
|
sub fh_with_octets ($;@) {
|
||||||
|
my ($octets, @args) = @_;
|
||||||
|
|
||||||
|
my $args = @args ? sprintf('(%s)', join ',', @args) : '';
|
||||||
|
|
||||||
|
if (0) {
|
||||||
|
open(my $fh, "<:utf8_strict${args}", \$octets)
|
||||||
|
or die(qq/Couldn't open scalar fh: '$!'/);
|
||||||
|
return $fh;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
my $fh = tmpfile($octets);
|
||||||
|
binmode($fh, ":utf8_strict${args}")
|
||||||
|
or die(qq/Couldn't binmode :utf8_strict${args} '$!'/);
|
||||||
|
return $fh;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sub fh_with_codepoints ($;@) {
|
||||||
|
my (@cp) = @_;
|
||||||
|
return fh_with_octets(join '', map { pack_utf8($_) } @cp);
|
||||||
|
}
|
||||||
|
|
||||||
|
1;
|
||||||
|
|
|
@ -0,0 +1,33 @@
|
||||||
|
#! perl
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
use utf8;
|
||||||
|
|
||||||
|
use Test::More 0.88;
|
||||||
|
use Test::Exception;
|
||||||
|
use lib 't/lib';
|
||||||
|
use Util qw[fh_with_octets pack_overlong_utf8 slurp];
|
||||||
|
|
||||||
|
my @tests = (
|
||||||
|
0x00,
|
||||||
|
0x80,
|
||||||
|
0x800,
|
||||||
|
0x1000,
|
||||||
|
);
|
||||||
|
|
||||||
|
foreach my $cp (@tests) {
|
||||||
|
foreach my $sequence (pack_overlong_utf8($cp)) {
|
||||||
|
my $name = sprintf 'reading non-shortest form representation of U+%.4X <%s> throws an exception',
|
||||||
|
$cp, join ' ', map { sprintf '%.2X', ord $_ } split //, $sequence;
|
||||||
|
|
||||||
|
my $fh = fh_with_octets($sequence);
|
||||||
|
|
||||||
|
throws_ok {
|
||||||
|
slurp($fh);
|
||||||
|
} qr/^Can't decode ill-formed UTF-8 octet sequence/, $name;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
done_testing;
|
||||||
|
|
|
@ -0,0 +1,44 @@
|
||||||
|
#! perl
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
use utf8;
|
||||||
|
|
||||||
|
use Test::More 0.88;
|
||||||
|
use Test::Exception;
|
||||||
|
use lib 't/lib';
|
||||||
|
use Util qw[fh_with_octets pack_utf8 slurp];
|
||||||
|
|
||||||
|
my @NONCHARACTERS = (0xFDD0 .. 0xFDEF);
|
||||||
|
{
|
||||||
|
for (my $i = 0; $i < 0x10FFFF; $i += 0x10000) {
|
||||||
|
push @NONCHARACTERS, $i ^ 0xFFFE, $i ^ 0xFFFF;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach my $cp (@NONCHARACTERS) {
|
||||||
|
my $octets = pack_utf8($cp);
|
||||||
|
my $name = sprintf 'reading noncharacter U+%.4X <%s> throws an exception when using strict',
|
||||||
|
$cp, join ' ', map { sprintf '%.2X', ord $_ } split //, $octets;
|
||||||
|
|
||||||
|
my $fh = fh_with_octets($octets);
|
||||||
|
my $hex = sprintf '%.4X', $cp;
|
||||||
|
throws_ok {
|
||||||
|
slurp($fh);
|
||||||
|
} qr/^Can't interchange noncharacter code point U\+$hex/, $name;
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach my $cp (@NONCHARACTERS) {
|
||||||
|
my $octets = pack_utf8($cp);
|
||||||
|
my $name = sprintf 'reading noncharacter U+%.4X <%s> succeeds when allow_noncharacters is set',
|
||||||
|
$cp, join ' ', map { sprintf '%.2X', ord $_ } split //, $octets;
|
||||||
|
|
||||||
|
my $fh = fh_with_octets($octets, 'allow_noncharacters');
|
||||||
|
|
||||||
|
lives_ok {
|
||||||
|
slurp($fh);
|
||||||
|
} $name;
|
||||||
|
}
|
||||||
|
|
||||||
|
done_testing;
|
||||||
|
|
|
@ -0,0 +1,24 @@
|
||||||
|
#! perl
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
use utf8;
|
||||||
|
|
||||||
|
use Test::More 0.88;
|
||||||
|
use Test::Exception;
|
||||||
|
use lib 't/lib';
|
||||||
|
use Util qw[fh_with_octets pack_utf8 slurp];
|
||||||
|
|
||||||
|
for (my $cp = 0x0011_0000; $cp < 0x7FFF_FFFF; $cp += 0x200000) {
|
||||||
|
my $name = sprintf 'reading encoded super codepoint U-%.8X throws an exception',
|
||||||
|
$cp;
|
||||||
|
|
||||||
|
my $fh = fh_with_octets(pack_utf8($cp));
|
||||||
|
|
||||||
|
throws_ok {
|
||||||
|
slurp($fh);
|
||||||
|
} qr/^Can't decode ill-formed UTF-8 octet sequence/, $name;
|
||||||
|
}
|
||||||
|
|
||||||
|
done_testing;
|
||||||
|
|
|
@ -0,0 +1,35 @@
|
||||||
|
#! perl
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
use utf8;
|
||||||
|
|
||||||
|
use Test::More 0.88;
|
||||||
|
use Test::Exception;
|
||||||
|
use lib 't/lib';
|
||||||
|
use Util qw[fh_with_octets pack_utf8 slurp];
|
||||||
|
|
||||||
|
my @SURROGATES = (0xD800 .. 0xDFFF);
|
||||||
|
|
||||||
|
foreach my $cp (@SURROGATES) {
|
||||||
|
my $fh = fh_with_octets(pack_utf8($cp));
|
||||||
|
|
||||||
|
my $name = sprintf 'reading encoded surrogate U+%.4X throws an exception when using strict', $cp;
|
||||||
|
|
||||||
|
throws_ok {
|
||||||
|
slurp($fh);
|
||||||
|
} qr/^Can't decode ill-formed UTF-8 octet sequence/, $name;
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach my $cp (@SURROGATES) {
|
||||||
|
my $fh = fh_with_octets(pack_utf8($cp), 'allow_surrogates');
|
||||||
|
|
||||||
|
my $name = sprintf 'reading encoded surrogate U+%.4X succeeds when allow_surrogates is set', $cp;
|
||||||
|
|
||||||
|
lives_ok {
|
||||||
|
slurp($fh);
|
||||||
|
} $name;
|
||||||
|
}
|
||||||
|
|
||||||
|
done_testing;
|
||||||
|
|
|
@ -0,0 +1,25 @@
|
||||||
|
#! perl
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
use utf8;
|
||||||
|
|
||||||
|
use Test::More 0.88;
|
||||||
|
use Test::Exception;
|
||||||
|
use lib 't/lib';
|
||||||
|
use Util qw[fh_with_octets pack_utf8 slurp];
|
||||||
|
|
||||||
|
for (my $cp = 0x00; $cp < 0x10FFFF; $cp += 0x1000) {
|
||||||
|
my $octets = pack_utf8($cp);
|
||||||
|
my $name = sprintf 'successfull reading U+%.4X <%s>',
|
||||||
|
$cp, join ' ', map { sprintf '%.2X', ord $_ } split //, $octets;
|
||||||
|
|
||||||
|
my $fh = fh_with_octets($octets);
|
||||||
|
|
||||||
|
lives_ok {
|
||||||
|
slurp($fh);
|
||||||
|
} $name;
|
||||||
|
}
|
||||||
|
|
||||||
|
done_testing;
|
||||||
|
|
|
@ -0,0 +1,381 @@
|
||||||
|
#include "EXTERN.h"
|
||||||
|
#include "perl.h"
|
||||||
|
#include "XSUB.h"
|
||||||
|
#include "perliol.h"
|
||||||
|
#include "ppport.h"
|
||||||
|
|
||||||
|
#define UTF8_MAX_BYTES 4
|
||||||
|
|
||||||
|
static const U8 xs_utf8_sequence_len[0x100] = {
|
||||||
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x00-0x0F */
|
||||||
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x10-0x1F */
|
||||||
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x20-0x2F */
|
||||||
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x30-0x3F */
|
||||||
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x40-0x4F */
|
||||||
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x50-0x5F */
|
||||||
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x60-0x6F */
|
||||||
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x70-0x7F */
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x80-0x8F */
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x90-0x9F */
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xA0-0xAF */
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xB0-0xBF */
|
||||||
|
0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* 0xC0-0xCF */
|
||||||
|
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* 0xD0-0xDF */
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* 0xE0-0xEF */
|
||||||
|
4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, /* 0xF0-0xFF */
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
typedef enum { STRICT_UTF8=0, ALLOW_SURROGATES=1, ALLOW_NONCHARACTERS=2, ALLOW_NONSHORTEST=4 } utf8_flags;
|
||||||
|
|
||||||
|
|
||||||
|
static STRLEN skip_sequence(const U8 *cur, const STRLEN len) {
|
||||||
|
STRLEN i, n = xs_utf8_sequence_len[*cur];
|
||||||
|
|
||||||
|
if (n < 1 || len < 2)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
switch (cur[0]) {
|
||||||
|
case 0xE0: if ((cur[1] & 0xE0) != 0xA0) return 1; break;
|
||||||
|
case 0xED: if ((cur[1] & 0xE0) != 0x80) return 1; break;
|
||||||
|
case 0xF4: if ((cur[1] & 0xF0) != 0x80) return 1; break;
|
||||||
|
case 0xF0: if ((cur[1] & 0xF0) == 0x80) return 1; /* FALLTROUGH */
|
||||||
|
default: if ((cur[1] & 0xC0) != 0x80) return 1; break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (n > len)
|
||||||
|
n = len;
|
||||||
|
for (i = 2; i < n; i++)
|
||||||
|
if ((cur[i] & 0xC0) != 0x80)
|
||||||
|
break;
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(PERL_STATIC_NO_RET) && defined(__attribute__noreturn__)
|
||||||
|
PERL_STATIC_NO_RET void report_illformed(pTHX_ const U8 *cur, STRLEN len, bool eof) __attribute__noreturn__;
|
||||||
|
#elif defined(__attribute__noreturn__)
|
||||||
|
static void report_illformed(pTHX_ const U8 *cur, STRLEN len, bool eof) __attribute__noreturn__;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static void report_illformed(pTHX_ const U8 *cur, STRLEN len, bool eof) {
|
||||||
|
static const char *hex = "0123456789ABCDEF";
|
||||||
|
const char *fmt;
|
||||||
|
char seq[UTF8_MAX_BYTES * 3];
|
||||||
|
char *d = seq;
|
||||||
|
|
||||||
|
if (eof)
|
||||||
|
fmt = "Can't decode ill-formed UTF-8 octet sequence <%s> at end of file";
|
||||||
|
else
|
||||||
|
fmt = "Can't decode ill-formed UTF-8 octet sequence <%s>";
|
||||||
|
|
||||||
|
while (len-- > 0) {
|
||||||
|
const U8 c = *cur++;
|
||||||
|
*d++ = hex[c >> 4];
|
||||||
|
*d++ = hex[c & 15];
|
||||||
|
if (len)
|
||||||
|
*d++ = ' ';
|
||||||
|
}
|
||||||
|
*d = 0;
|
||||||
|
Perl_croak(aTHX_ fmt, seq);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(PERL_STATIC_NO_RET) && defined(__attribute__noreturn__)
|
||||||
|
PERL_STATIC_NO_RET void report_noncharacter(pTHX_ UV usv) __attribute__noreturn__;
|
||||||
|
#elif defined(__attribute__noreturn__)
|
||||||
|
static void report_noncharacter(pTHX_ UV usv) __attribute__noreturn__;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static void report_noncharacter(pTHX_ UV usv) {
|
||||||
|
static const char *fmt = "Can't interchange noncharacter code point U+%"UVXf;
|
||||||
|
Perl_croak(aTHX_ fmt, usv);
|
||||||
|
}
|
||||||
|
|
||||||
|
static STRLEN validate(pTHX_ const U8 *buf, const U8 *end, const int flags, PerlIO* handle) {
|
||||||
|
const bool eof = PerlIO_eof(handle);
|
||||||
|
const U8 *cur = buf;
|
||||||
|
const U8 *end4 = end - UTF8_MAX_BYTES;
|
||||||
|
STRLEN skip = 0;
|
||||||
|
U32 v;
|
||||||
|
|
||||||
|
while (cur < end4) {
|
||||||
|
while (cur < end4 && *cur < 0x80)
|
||||||
|
cur++;
|
||||||
|
|
||||||
|
check:
|
||||||
|
switch (xs_utf8_sequence_len[*cur]) {
|
||||||
|
case 0:
|
||||||
|
goto illformed;
|
||||||
|
case 1:
|
||||||
|
cur += 1;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
/* 110xxxxx 10xxxxxx */
|
||||||
|
if ((cur[1] & 0xC0) != 0x80)
|
||||||
|
goto illformed;
|
||||||
|
cur += 2;
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
v = ((U32)cur[0] << 16)
|
||||||
|
| ((U32)cur[1] << 8)
|
||||||
|
| ((U32)cur[2]);
|
||||||
|
/* 1110xxxx 10xxxxxx 10xxxxxx */
|
||||||
|
if ((v & 0x00F0C0C0) != 0x00E08080 ||
|
||||||
|
/* Non-shortest form */
|
||||||
|
v < 0x00E0A080)
|
||||||
|
goto illformed;
|
||||||
|
/* Surrogates U+D800..U+DFFF */
|
||||||
|
if (!(flags & ALLOW_SURROGATES) && (v & 0x00EFA080) == 0x00EDA080)
|
||||||
|
goto illformed;
|
||||||
|
/* Non-characters U+FDD0..U+FDEF, U+FFFE..U+FFFF */
|
||||||
|
if (!(flags & ALLOW_NONCHARACTERS) && v >= 0x00EFB790 && (v <= 0x00EFB7AF || v >= 0x00EFBFBE))
|
||||||
|
goto noncharacter;
|
||||||
|
cur += 3;
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
v = ((U32)cur[0] << 24)
|
||||||
|
| ((U32)cur[1] << 16)
|
||||||
|
| ((U32)cur[2] << 8)
|
||||||
|
| ((U32)cur[3]);
|
||||||
|
/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
|
||||||
|
if ((v & 0xF8C0C0C0) != 0xF0808080 ||
|
||||||
|
/* Non-shortest form */
|
||||||
|
v < 0xF0908080 ||
|
||||||
|
/* Greater than U+10FFFF */
|
||||||
|
v > 0xF48FBFBF)
|
||||||
|
goto illformed;
|
||||||
|
/* Non-characters U+nFFFE..U+nFFFF on plane 1-16 */
|
||||||
|
if (!(flags & ALLOW_NONCHARACTERS) && (v & 0x000FBFBE) == 0x000FBFBE)
|
||||||
|
goto noncharacter;
|
||||||
|
cur += 4;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cur < end) {
|
||||||
|
if (cur + xs_utf8_sequence_len[*cur] <= end)
|
||||||
|
goto check;
|
||||||
|
skip = skip_sequence(cur, end - cur);
|
||||||
|
if (eof || cur + skip < end)
|
||||||
|
goto illformed;
|
||||||
|
}
|
||||||
|
return cur - buf;
|
||||||
|
|
||||||
|
illformed:
|
||||||
|
if (!skip)
|
||||||
|
skip = skip_sequence(cur, end - cur);
|
||||||
|
PerlIOBase(handle)->flags |= PERLIO_F_ERROR;
|
||||||
|
report_illformed(aTHX_ cur, skip, eof);
|
||||||
|
|
||||||
|
noncharacter:
|
||||||
|
if (v < 0xF0808080)
|
||||||
|
v = (v & 0x3F) | (v & 0x3F00) >> 2 | (v & 0x0F0000) >> 4;
|
||||||
|
else
|
||||||
|
v = (v & 0x3F) | (v & 0x3F00) >> 2 | (v & 0x3F0000) >> 4 | (v & 0x07000000) >> 6;
|
||||||
|
PerlIOBase(handle)->flags |= PERLIO_F_ERROR;
|
||||||
|
report_noncharacter(aTHX_ v);
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
PerlIOBuf buf;
|
||||||
|
STDCHAR leftovers[UTF8_MAX_BYTES];
|
||||||
|
size_t leftover_length;
|
||||||
|
utf8_flags flags;
|
||||||
|
} PerlIOUnicode;
|
||||||
|
|
||||||
|
static struct {
|
||||||
|
const char* name;
|
||||||
|
size_t length;
|
||||||
|
utf8_flags value;
|
||||||
|
} map[] = {
|
||||||
|
{ STR_WITH_LEN("allow_surrogates"), ALLOW_SURROGATES },
|
||||||
|
{ STR_WITH_LEN("allow_noncharacters"), ALLOW_NONCHARACTERS },
|
||||||
|
{ STR_WITH_LEN("allow_nonshortest"), ALLOW_NONSHORTEST },
|
||||||
|
{ STR_WITH_LEN("strict"), 0 },
|
||||||
|
{ STR_WITH_LEN("loose"), ALLOW_SURROGATES | ALLOW_NONCHARACTERS | ALLOW_NONSHORTEST },
|
||||||
|
};
|
||||||
|
|
||||||
|
static utf8_flags lookup_parameter(pTHX_ const char* ptr, size_t len) {
|
||||||
|
unsigned i;
|
||||||
|
for (i = 0; i < sizeof map / sizeof *map; ++i) {
|
||||||
|
if (map[i].length == len && memcmp(ptr, map[i].name, len) == 0)
|
||||||
|
return map[i].value;
|
||||||
|
}
|
||||||
|
Perl_croak(aTHX_ "Unknown argument to :utf8_strict: %*s", (int)len, ptr);
|
||||||
|
}
|
||||||
|
static utf8_flags parse_parameters(pTHX_ SV* param) {
|
||||||
|
STRLEN len;
|
||||||
|
const char *begin, *delim;
|
||||||
|
if (!param || !SvOK(param))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
begin = SvPV(param, len);
|
||||||
|
delim = strchr(begin, ',');
|
||||||
|
if(delim) {
|
||||||
|
utf8_flags ret = 0;
|
||||||
|
const char* end = begin + len;
|
||||||
|
do {
|
||||||
|
ret |= lookup_parameter(aTHX_ begin, delim - begin);
|
||||||
|
begin = delim + 1;
|
||||||
|
delim = strchr(begin, ',');
|
||||||
|
} while (delim);
|
||||||
|
if (begin < end)
|
||||||
|
ret |= lookup_parameter(aTHX_ begin, end - begin);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return lookup_parameter(aTHX_ begin, len);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#define line_buffered(flags) ((flags & (PERLIO_F_LINEBUF | PERLIO_F_CANWRITE)) == (PERLIO_F_LINEBUF | PERLIO_F_CANWRITE))
|
||||||
|
|
||||||
|
void PerlIOBase_flush_linebuf(pTHX) {
|
||||||
|
#ifdef dVAR
|
||||||
|
dVAR;
|
||||||
|
#endif
|
||||||
|
PerlIOl **table = &PL_perlio;
|
||||||
|
PerlIOl *f;
|
||||||
|
while ((f = *table)) {
|
||||||
|
int i;
|
||||||
|
table = (PerlIOl **) (f++);
|
||||||
|
for (i = 1; i < 64; i++) {
|
||||||
|
if (f->next && line_buffered(PerlIOBase(&(f->next))->flags))
|
||||||
|
PerlIO_flush(&(f->next));
|
||||||
|
f++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static IV PerlIOUnicode_pushed(pTHX_ PerlIO* f, const char* mode, SV* arg, PerlIO_funcs* tab) {
|
||||||
|
utf8_flags flags = parse_parameters(aTHX_ arg);
|
||||||
|
if (PerlIOBuf_pushed(aTHX_ f, mode, arg, tab) == 0) {
|
||||||
|
PerlIOBase(f)->flags |= PERLIO_F_UTF8;
|
||||||
|
PerlIOSelf(f, PerlIOUnicode)->flags = flags;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static IV PerlIOUnicode_fill(pTHX_ PerlIO* f) {
|
||||||
|
PerlIOUnicode * const u = PerlIOSelf(f, PerlIOUnicode);
|
||||||
|
PerlIOBuf * const b = &u->buf;
|
||||||
|
PerlIO *n = PerlIONext(f);
|
||||||
|
SSize_t avail;
|
||||||
|
Size_t read_bytes = 0;
|
||||||
|
STDCHAR *end;
|
||||||
|
SSize_t fit;
|
||||||
|
|
||||||
|
if (PerlIO_flush(f) != 0)
|
||||||
|
return -1;
|
||||||
|
if (PerlIOBase(f)->flags & PERLIO_F_TTY)
|
||||||
|
PerlIOBase_flush_linebuf(aTHX);
|
||||||
|
|
||||||
|
if (!b->buf)
|
||||||
|
PerlIO_get_base(f);
|
||||||
|
|
||||||
|
assert(b->buf);
|
||||||
|
|
||||||
|
if (u->leftover_length) {
|
||||||
|
Copy(u->leftovers, b->buf, u->leftover_length, STDCHAR);
|
||||||
|
b->end = b->buf + u->leftover_length;
|
||||||
|
read_bytes = u->leftover_length;
|
||||||
|
u->leftover_length = 0;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
b->ptr = b->end = b->buf;
|
||||||
|
}
|
||||||
|
fit = (SSize_t)b->bufsiz - (b->end - b->buf);
|
||||||
|
|
||||||
|
if (!PerlIOValid(n)) {
|
||||||
|
PerlIOBase(f)->flags |= PERLIO_F_EOF;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (PerlIO_fast_gets(n)) {
|
||||||
|
/*
|
||||||
|
* Layer below is also buffered. We do _NOT_ want to call its
|
||||||
|
* ->Read() because that will loop till it gets what we asked for
|
||||||
|
* which may hang on a pipe etc. Instead take anything it has to
|
||||||
|
* hand, or ask it to fill _once_.
|
||||||
|
*/
|
||||||
|
avail = PerlIO_get_cnt(n);
|
||||||
|
if (avail <= 0) {
|
||||||
|
avail = PerlIO_fill(n);
|
||||||
|
if (avail == 0)
|
||||||
|
avail = PerlIO_get_cnt(n);
|
||||||
|
else {
|
||||||
|
if (!PerlIO_error(n) && PerlIO_eof(n))
|
||||||
|
avail = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (avail > 0) {
|
||||||
|
STDCHAR *ptr = PerlIO_get_ptr(n);
|
||||||
|
const SSize_t cnt = avail;
|
||||||
|
if (avail > fit)
|
||||||
|
avail = fit;
|
||||||
|
Copy(ptr, b->end, avail, STDCHAR);
|
||||||
|
PerlIO_set_ptrcnt(n, ptr + avail, cnt - avail);
|
||||||
|
read_bytes += avail;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
avail = PerlIO_read(n, b->end, fit);
|
||||||
|
if (avail > 0)
|
||||||
|
read_bytes += avail;
|
||||||
|
}
|
||||||
|
if (avail <= 0) {
|
||||||
|
if (avail < 0 || (read_bytes == 0 && PerlIO_eof(n))) {
|
||||||
|
PerlIOBase(f)->flags |= (avail == 0) ? PERLIO_F_EOF : PERLIO_F_ERROR;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
end = b->buf + read_bytes;
|
||||||
|
b->end = b->buf + validate(aTHX_ (const U8 *)b->buf, (const U8 *)end, u->flags, n);
|
||||||
|
if (b->end < end) {
|
||||||
|
size_t len = b->buf + read_bytes - b->end;
|
||||||
|
Copy(b->end, u->leftovers, len, char);
|
||||||
|
u->leftover_length = len;
|
||||||
|
}
|
||||||
|
PerlIOBase(f)->flags |= PERLIO_F_RDBUF;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
PERLIO_FUNCS_DECL(PerlIO_utf8_strict) = {
|
||||||
|
sizeof(PerlIO_funcs),
|
||||||
|
"utf8_strict",
|
||||||
|
sizeof(PerlIOUnicode),
|
||||||
|
PERLIO_K_BUFFERED|PERLIO_K_UTF8,
|
||||||
|
PerlIOUnicode_pushed,
|
||||||
|
PerlIOBuf_popped,
|
||||||
|
PerlIOBuf_open,
|
||||||
|
PerlIOBase_binmode,
|
||||||
|
NULL,
|
||||||
|
PerlIOBase_fileno,
|
||||||
|
PerlIOBuf_dup,
|
||||||
|
PerlIOBuf_read,
|
||||||
|
PerlIOBase_unread,
|
||||||
|
PerlIOBuf_write,
|
||||||
|
PerlIOBuf_seek,
|
||||||
|
PerlIOBuf_tell,
|
||||||
|
PerlIOBuf_close,
|
||||||
|
PerlIOBuf_flush,
|
||||||
|
PerlIOUnicode_fill,
|
||||||
|
PerlIOBase_eof,
|
||||||
|
PerlIOBase_error,
|
||||||
|
PerlIOBase_clearerr,
|
||||||
|
PerlIOBase_setlinebuf,
|
||||||
|
PerlIOBuf_get_base,
|
||||||
|
PerlIOBuf_bufsiz,
|
||||||
|
PerlIOBuf_get_ptr,
|
||||||
|
PerlIOBuf_get_cnt,
|
||||||
|
PerlIOBuf_set_ptrcnt,
|
||||||
|
};
|
||||||
|
|
||||||
|
MODULE = PerlIO::utf8_strict
|
||||||
|
|
||||||
|
PROTOTYPES: DISABLE
|
||||||
|
|
||||||
|
BOOT:
|
||||||
|
PerlIO_define_layer(aTHX_ (PerlIO_funcs*)&PerlIO_utf8_strict);
|
||||||
|
|
|
@ -0,0 +1,7 @@
|
||||||
|
#!perl
|
||||||
|
# This file was automatically generated by Dist::Zilla::Plugin::PodSyntaxTests.
|
||||||
|
use strict; use warnings;
|
||||||
|
use Test::More;
|
||||||
|
use Test::Pod 1.41;
|
||||||
|
|
||||||
|
all_pod_files_ok();
|
Loading…
Reference in New Issue