Import Upstream version 0.009
This commit is contained in:
commit
21094f86cd
|
@ -0,0 +1,33 @@
|
|||
Revision history for PerlIO-utf8_strict
|
||||
|
||||
0.009 2022-01-08 17:26:23+01:00 Europe/Amsterdam
|
||||
- Mark functions that croak as noreturn
|
||||
|
||||
0.008 2020-09-19 00:11:59+02:00 Europe/Amsterdam
|
||||
- Make unread by :crlf on top of :utf8_strict reliable
|
||||
|
||||
0.007 2017-04-06 14:58:37+02:00 Europe/Amsterdam
|
||||
- Adapt to dot no longer being in @INC
|
||||
- Adapt to unicode syswrite being deprecated
|
||||
|
||||
0.006 2015-05-01 11:36:21+02:00 Europe/Amsterdam
|
||||
Move to MakeMaker
|
||||
|
||||
0.005 2014-11-06 00:56:54+01:00 Europe/Amsterdam
|
||||
Add PPPort to compile on perl <5.8.9
|
||||
|
||||
0.004 2013-02-24 10:59:59 Europe/Amsterdam
|
||||
Fix compilation on 5.8
|
||||
|
||||
0.003 2013-02-21 01:30:44 Europe/Amsterdam
|
||||
Fixed conversion to code points in noncharacter error reporting
|
||||
PerlIOBase_flush_linebuf is unresolved on some platforms, pull in a copy
|
||||
|
||||
0.002 2012-05-30 20:43:21 Europe/Amsterdam
|
||||
Fix bug with non-buffered lower layers
|
||||
FIX C89 conformance
|
||||
Documentation update
|
||||
|
||||
|
||||
0.001 2012-04-08 22:08:08 Europe/Amsterdam
|
||||
Initial release
|
|
@ -0,0 +1,72 @@
|
|||
This is the Perl distribution PerlIO-utf8_strict.
|
||||
|
||||
Installing PerlIO-utf8_strict is straightforward.
|
||||
|
||||
## Installation with cpanm
|
||||
|
||||
If you have cpanm, you only need one line:
|
||||
|
||||
% cpanm PerlIO::utf8_strict
|
||||
|
||||
If it does not have permission to install modules to the current perl, cpanm
|
||||
will automatically set up and install to a local::lib in your home directory.
|
||||
See the local::lib documentation (https://metacpan.org/pod/local::lib) for
|
||||
details on enabling it in your environment.
|
||||
|
||||
## Installing with the CPAN shell
|
||||
|
||||
Alternatively, if your CPAN shell is set up, you should just be able to do:
|
||||
|
||||
% cpan PerlIO::utf8_strict
|
||||
|
||||
## Manual installation
|
||||
|
||||
As a last resort, you can manually install it. Download the tarball, untar it,
|
||||
install configure prerequisites (see below), then build it:
|
||||
|
||||
% perl Makefile.PL
|
||||
% make && make test
|
||||
|
||||
Then install it:
|
||||
|
||||
% make install
|
||||
|
||||
On Windows platforms, you should use `dmake` or `nmake`, instead of `make`.
|
||||
|
||||
If your perl is system-managed, you can create a local::lib in your home
|
||||
directory to install modules to. For details, see the local::lib documentation:
|
||||
https://metacpan.org/pod/local::lib
|
||||
|
||||
The prerequisites of this distribution will also have to be installed manually. The
|
||||
prerequisites are listed in one of the files: `MYMETA.yml` or `MYMETA.json` generated
|
||||
by running the manual build process described above.
|
||||
|
||||
## Configure Prerequisites
|
||||
|
||||
This distribution requires other modules to be installed before this
|
||||
distribution's installer can be run. They can be found under the
|
||||
"configure_requires" key of META.yml or the
|
||||
"{prereqs}{configure}{requires}" key of META.json.
|
||||
|
||||
## Other Prerequisites
|
||||
|
||||
This distribution may require additional modules to be installed after running
|
||||
Makefile.PL.
|
||||
Look for prerequisites in the following phases:
|
||||
|
||||
* to run make, PHASE = build
|
||||
* to use the module code itself, PHASE = runtime
|
||||
* to run tests, PHASE = test
|
||||
|
||||
They can all be found in the "PHASE_requires" key of MYMETA.yml or the
|
||||
"{prereqs}{PHASE}{requires}" key of MYMETA.json.
|
||||
|
||||
## Documentation
|
||||
|
||||
PerlIO-utf8_strict documentation is available as POD.
|
||||
You can run `perldoc` from a shell to read the documentation:
|
||||
|
||||
% perldoc PerlIO::utf8_strict
|
||||
|
||||
For more information on installing Perl modules via CPAN, please see:
|
||||
https://www.cpan.org/modules/INSTALL.html
|
|
@ -0,0 +1,379 @@
|
|||
This software is copyright (c) 2012 by Leon Timmermans, Christian Hansen.
|
||||
|
||||
This is free software; you can redistribute it and/or modify it under
|
||||
the same terms as the Perl 5 programming language system itself.
|
||||
|
||||
Terms of the Perl programming language system itself
|
||||
|
||||
a) the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 1, or (at your option) any
|
||||
later version, or
|
||||
b) the "Artistic License"
|
||||
|
||||
--- The GNU General Public License, Version 1, February 1989 ---
|
||||
|
||||
This software is Copyright (c) 2012 by Leon Timmermans, Christian Hansen.
|
||||
|
||||
This is free software, licensed under:
|
||||
|
||||
The GNU General Public License, Version 1, February 1989
|
||||
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
Version 1, February 1989
|
||||
|
||||
Copyright (C) 1989 Free Software Foundation, Inc.
|
||||
51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
Preamble
|
||||
|
||||
The license agreements of most software companies try to keep users
|
||||
at the mercy of those companies. By contrast, our General Public
|
||||
License is intended to guarantee your freedom to share and change free
|
||||
software--to make sure the software is free for all its users. The
|
||||
General Public License applies to the Free Software Foundation's
|
||||
software and to any other program whose authors commit to using it.
|
||||
You can use it for your programs, too.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Specifically, the General Public License is designed to make
|
||||
sure that you have the freedom to give away or sell copies of free
|
||||
software, that you receive source code or can get it if you want it,
|
||||
that you can change the software or use pieces of it in new free
|
||||
programs; and that you know you can do these things.
|
||||
|
||||
To protect your rights, we need to make restrictions that forbid
|
||||
anyone to deny you these rights or to ask you to surrender the rights.
|
||||
These restrictions translate to certain responsibilities for you if you
|
||||
distribute copies of the software, or if you modify it.
|
||||
|
||||
For example, if you distribute copies of a such a program, whether
|
||||
gratis or for a fee, you must give the recipients all the rights that
|
||||
you have. You must make sure that they, too, receive or can get the
|
||||
source code. And you must tell them their rights.
|
||||
|
||||
We protect your rights with two steps: (1) copyright the software, and
|
||||
(2) offer you this license which gives you legal permission to copy,
|
||||
distribute and/or modify the software.
|
||||
|
||||
Also, for each author's protection and ours, we want to make certain
|
||||
that everyone understands that there is no warranty for this free
|
||||
software. If the software is modified by someone else and passed on, we
|
||||
want its recipients to know that what they have is not the original, so
|
||||
that any problems introduced by others will not reflect on the original
|
||||
authors' reputations.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||
|
||||
0. This License Agreement applies to any program or other work which
|
||||
contains a notice placed by the copyright holder saying it may be
|
||||
distributed under the terms of this General Public License. The
|
||||
"Program", below, refers to any such program or work, and a "work based
|
||||
on the Program" means either the Program or any work containing the
|
||||
Program or a portion of it, either verbatim or with modifications. Each
|
||||
licensee is addressed as "you".
|
||||
|
||||
1. You may copy and distribute verbatim copies of the Program's source
|
||||
code as you receive it, in any medium, provided that you conspicuously and
|
||||
appropriately publish on each copy an appropriate copyright notice and
|
||||
disclaimer of warranty; keep intact all the notices that refer to this
|
||||
General Public License and to the absence of any warranty; and give any
|
||||
other recipients of the Program a copy of this General Public License
|
||||
along with the Program. You may charge a fee for the physical act of
|
||||
transferring a copy.
|
||||
|
||||
2. You may modify your copy or copies of the Program or any portion of
|
||||
it, and copy and distribute such modifications under the terms of Paragraph
|
||||
1 above, provided that you also do the following:
|
||||
|
||||
a) cause the modified files to carry prominent notices stating that
|
||||
you changed the files and the date of any change; and
|
||||
|
||||
b) cause the whole of any work that you distribute or publish, that
|
||||
in whole or in part contains the Program or any part thereof, either
|
||||
with or without modifications, to be licensed at no charge to all
|
||||
third parties under the terms of this General Public License (except
|
||||
that you may choose to grant warranty protection to some or all
|
||||
third parties, at your option).
|
||||
|
||||
c) If the modified program normally reads commands interactively when
|
||||
run, you must cause it, when started running for such interactive use
|
||||
in the simplest and most usual way, to print or display an
|
||||
announcement including an appropriate copyright notice and a notice
|
||||
that there is no warranty (or else, saying that you provide a
|
||||
warranty) and that users may redistribute the program under these
|
||||
conditions, and telling the user how to view a copy of this General
|
||||
Public License.
|
||||
|
||||
d) You may charge a fee for the physical act of transferring a
|
||||
copy, and you may at your option offer warranty protection in
|
||||
exchange for a fee.
|
||||
|
||||
Mere aggregation of another independent work with the Program (or its
|
||||
derivative) on a volume of a storage or distribution medium does not bring
|
||||
the other work under the scope of these terms.
|
||||
|
||||
3. You may copy and distribute the Program (or a portion or derivative of
|
||||
it, under Paragraph 2) in object code or executable form under the terms of
|
||||
Paragraphs 1 and 2 above provided that you also do one of the following:
|
||||
|
||||
a) accompany it with the complete corresponding machine-readable
|
||||
source code, which must be distributed under the terms of
|
||||
Paragraphs 1 and 2 above; or,
|
||||
|
||||
b) accompany it with a written offer, valid for at least three
|
||||
years, to give any third party free (except for a nominal charge
|
||||
for the cost of distribution) a complete machine-readable copy of the
|
||||
corresponding source code, to be distributed under the terms of
|
||||
Paragraphs 1 and 2 above; or,
|
||||
|
||||
c) accompany it with the information you received as to where the
|
||||
corresponding source code may be obtained. (This alternative is
|
||||
allowed only for noncommercial distribution and only if you
|
||||
received the program in object code or executable form alone.)
|
||||
|
||||
Source code for a work means the preferred form of the work for making
|
||||
modifications to it. For an executable file, complete source code means
|
||||
all the source code for all modules it contains; but, as a special
|
||||
exception, it need not include source code for modules which are standard
|
||||
libraries that accompany the operating system on which the executable
|
||||
file runs, or for standard header files or definitions files that
|
||||
accompany that operating system.
|
||||
|
||||
4. You may not copy, modify, sublicense, distribute or transfer the
|
||||
Program except as expressly provided under this General Public License.
|
||||
Any attempt otherwise to copy, modify, sublicense, distribute or transfer
|
||||
the Program is void, and will automatically terminate your rights to use
|
||||
the Program under this License. However, parties who have received
|
||||
copies, or rights to use copies, from you under this General Public
|
||||
License will not have their licenses terminated so long as such parties
|
||||
remain in full compliance.
|
||||
|
||||
5. By copying, distributing or modifying the Program (or any work based
|
||||
on the Program) you indicate your acceptance of this license to do so,
|
||||
and all its terms and conditions.
|
||||
|
||||
6. Each time you redistribute the Program (or any work based on the
|
||||
Program), the recipient automatically receives a license from the original
|
||||
licensor to copy, distribute or modify the Program subject to these
|
||||
terms and conditions. You may not impose any further restrictions on the
|
||||
recipients' exercise of the rights granted herein.
|
||||
|
||||
7. The Free Software Foundation may publish revised and/or new versions
|
||||
of the General Public License from time to time. Such new versions will
|
||||
be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the Program
|
||||
specifies a version number of the license which applies to it and "any
|
||||
later version", you have the option of following the terms and conditions
|
||||
either of that version or of any later version published by the Free
|
||||
Software Foundation. If the Program does not specify a version number of
|
||||
the license, you may choose any version ever published by the Free Software
|
||||
Foundation.
|
||||
|
||||
8. If you wish to incorporate parts of the Program into other free
|
||||
programs whose distribution conditions are different, write to the author
|
||||
to ask for permission. For software which is copyrighted by the Free
|
||||
Software Foundation, write to the Free Software Foundation; we sometimes
|
||||
make exceptions for this. Our decision will be guided by the two goals
|
||||
of preserving the free status of all derivatives of our free software and
|
||||
of promoting the sharing and reuse of software generally.
|
||||
|
||||
NO WARRANTY
|
||||
|
||||
9. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
|
||||
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
|
||||
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
|
||||
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
|
||||
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
|
||||
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
|
||||
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
|
||||
REPAIR OR CORRECTION.
|
||||
|
||||
10. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
|
||||
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
|
||||
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
|
||||
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
|
||||
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
|
||||
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
|
||||
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGES.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
Appendix: How to Apply These Terms to Your New Programs
|
||||
|
||||
If you develop a new program, and you want it to be of the greatest
|
||||
possible use to humanity, the best way to achieve this is to make it
|
||||
free software which everyone can redistribute and change under these
|
||||
terms.
|
||||
|
||||
To do so, attach the following notices to the program. It is safest to
|
||||
attach them to the start of each source file to most effectively convey
|
||||
the exclusion of warranty; and each file should have at least the
|
||||
"copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
<one line to give the program's name and a brief idea of what it does.>
|
||||
Copyright (C) 19yy <name of author>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 1, or (at your option)
|
||||
any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA
|
||||
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If the program is interactive, make it output a short notice like this
|
||||
when it starts in an interactive mode:
|
||||
|
||||
Gnomovision version 69, Copyright (C) 19xx name of author
|
||||
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||
This is free software, and you are welcome to redistribute it
|
||||
under certain conditions; type `show c' for details.
|
||||
|
||||
The hypothetical commands `show w' and `show c' should show the
|
||||
appropriate parts of the General Public License. Of course, the
|
||||
commands you use may be called something other than `show w' and `show
|
||||
c'; they could even be mouse-clicks or menu items--whatever suits your
|
||||
program.
|
||||
|
||||
You should also get your employer (if you work as a programmer) or your
|
||||
school, if any, to sign a "copyright disclaimer" for the program, if
|
||||
necessary. Here a sample; alter the names:
|
||||
|
||||
Yoyodyne, Inc., hereby disclaims all copyright interest in the
|
||||
program `Gnomovision' (a program to direct compilers to make passes
|
||||
at assemblers) written by James Hacker.
|
||||
|
||||
<signature of Ty Coon>, 1 April 1989
|
||||
Ty Coon, President of Vice
|
||||
|
||||
That's all there is to it!
|
||||
|
||||
|
||||
--- The Artistic License 1.0 ---
|
||||
|
||||
This software is Copyright (c) 2012 by Leon Timmermans, Christian Hansen.
|
||||
|
||||
This is free software, licensed under:
|
||||
|
||||
The Artistic License 1.0
|
||||
|
||||
The Artistic License
|
||||
|
||||
Preamble
|
||||
|
||||
The intent of this document is to state the conditions under which a Package
|
||||
may be copied, such that the Copyright Holder maintains some semblance of
|
||||
artistic control over the development of the package, while giving the users of
|
||||
the package the right to use and distribute the Package in a more-or-less
|
||||
customary fashion, plus the right to make reasonable modifications.
|
||||
|
||||
Definitions:
|
||||
|
||||
- "Package" refers to the collection of files distributed by the Copyright
|
||||
Holder, and derivatives of that collection of files created through
|
||||
textual modification.
|
||||
- "Standard Version" refers to such a Package if it has not been modified,
|
||||
or has been modified in accordance with the wishes of the Copyright
|
||||
Holder.
|
||||
- "Copyright Holder" is whoever is named in the copyright or copyrights for
|
||||
the package.
|
||||
- "You" is you, if you're thinking about copying or distributing this Package.
|
||||
- "Reasonable copying fee" is whatever you can justify on the basis of media
|
||||
cost, duplication charges, time of people involved, and so on. (You will
|
||||
not be required to justify it to the Copyright Holder, but only to the
|
||||
computing community at large as a market that must bear the fee.)
|
||||
- "Freely Available" means that no fee is charged for the item itself, though
|
||||
there may be fees involved in handling the item. It also means that
|
||||
recipients of the item may redistribute it under the same conditions they
|
||||
received it.
|
||||
|
||||
1. You may make and give away verbatim copies of the source form of the
|
||||
Standard Version of this Package without restriction, provided that you
|
||||
duplicate all of the original copyright notices and associated disclaimers.
|
||||
|
||||
2. You may apply bug fixes, portability fixes and other modifications derived
|
||||
from the Public Domain or from the Copyright Holder. A Package modified in such
|
||||
a way shall still be considered the Standard Version.
|
||||
|
||||
3. You may otherwise modify your copy of this Package in any way, provided that
|
||||
you insert a prominent notice in each changed file stating how and when you
|
||||
changed that file, and provided that you do at least ONE of the following:
|
||||
|
||||
a) place your modifications in the Public Domain or otherwise make them
|
||||
Freely Available, such as by posting said modifications to Usenet or an
|
||||
equivalent medium, or placing the modifications on a major archive site
|
||||
such as ftp.uu.net, or by allowing the Copyright Holder to include your
|
||||
modifications in the Standard Version of the Package.
|
||||
|
||||
b) use the modified Package only within your corporation or organization.
|
||||
|
||||
c) rename any non-standard executables so the names do not conflict with
|
||||
standard executables, which must also be provided, and provide a separate
|
||||
manual page for each non-standard executable that clearly documents how it
|
||||
differs from the Standard Version.
|
||||
|
||||
d) make other distribution arrangements with the Copyright Holder.
|
||||
|
||||
4. You may distribute the programs of this Package in object code or executable
|
||||
form, provided that you do at least ONE of the following:
|
||||
|
||||
a) distribute a Standard Version of the executables and library files,
|
||||
together with instructions (in the manual page or equivalent) on where to
|
||||
get the Standard Version.
|
||||
|
||||
b) accompany the distribution with the machine-readable source of the Package
|
||||
with your modifications.
|
||||
|
||||
c) accompany any non-standard executables with their corresponding Standard
|
||||
Version executables, giving the non-standard executables non-standard
|
||||
names, and clearly documenting the differences in manual pages (or
|
||||
equivalent), together with instructions on where to get the Standard
|
||||
Version.
|
||||
|
||||
d) make other distribution arrangements with the Copyright Holder.
|
||||
|
||||
5. You may charge a reasonable copying fee for any distribution of this
|
||||
Package. You may charge any fee you choose for support of this Package. You
|
||||
may not charge a fee for this Package itself. However, you may distribute this
|
||||
Package in aggregate with other (possibly commercial) programs as part of a
|
||||
larger (possibly commercial) software distribution provided that you do not
|
||||
advertise this Package as a product of your own.
|
||||
|
||||
6. The scripts and library files supplied as input to or produced as output
|
||||
from the programs of this Package do not automatically fall under the copyright
|
||||
of this Package, but belong to whomever generated them, and may be sold
|
||||
commercially, and may be aggregated with this Package.
|
||||
|
||||
7. C or perl subroutines supplied by you and linked into this Package shall not
|
||||
be considered part of this Package.
|
||||
|
||||
8. The name of the Copyright Holder may not be used to endorse or promote
|
||||
products derived from this software without specific prior written permission.
|
||||
|
||||
9. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
|
||||
WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
The End
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
# This file was automatically generated by Dist::Zilla::Plugin::Manifest v6.024.
|
||||
Changes
|
||||
INSTALL
|
||||
LICENSE
|
||||
MANIFEST
|
||||
META.json
|
||||
META.yml
|
||||
Makefile.PL
|
||||
README
|
||||
corpus/quickbrown.txt
|
||||
corpus/test1-latin1.txt
|
||||
corpus/test1.txt
|
||||
dist.ini
|
||||
lib/PerlIO/utf8_strict.pm
|
||||
ppport.h
|
||||
t/basics.t
|
||||
t/bug.t
|
||||
t/incomplete.t
|
||||
t/lib/Util.pm
|
||||
t/non-shortest-form.t
|
||||
t/noncharacters.t
|
||||
t/super.t
|
||||
t/surrogates.t
|
||||
t/unicode.t
|
||||
utf8_strict.xs
|
||||
xt/author/pod-syntax.t
|
|
@ -0,0 +1,84 @@
|
|||
{
|
||||
"abstract" : "Fast and correct UTF-8 IO",
|
||||
"author" : [
|
||||
"Leon Timmermans <leont@cpan.org>",
|
||||
"Christian Hansen <chansen@cpan.org>"
|
||||
],
|
||||
"dynamic_config" : 0,
|
||||
"generated_by" : "Dist::Zilla version 6.024, CPAN::Meta::Converter version 2.150010",
|
||||
"license" : [
|
||||
"perl_5"
|
||||
],
|
||||
"meta-spec" : {
|
||||
"url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
|
||||
"version" : 2
|
||||
},
|
||||
"name" : "PerlIO-utf8_strict",
|
||||
"prereqs" : {
|
||||
"configure" : {
|
||||
"requires" : {
|
||||
"ExtUtils::MakeMaker" : "0",
|
||||
"perl" : "5.006"
|
||||
},
|
||||
"suggests" : {
|
||||
"JSON::PP" : "2.27300"
|
||||
}
|
||||
},
|
||||
"develop" : {
|
||||
"requires" : {
|
||||
"Devel::PPPort" : "3.23",
|
||||
"Test::More" : "0",
|
||||
"Test::Pod" : "1.41"
|
||||
}
|
||||
},
|
||||
"runtime" : {
|
||||
"requires" : {
|
||||
"XSLoader" : "0",
|
||||
"perl" : "5.008",
|
||||
"strict" : "0",
|
||||
"warnings" : "0"
|
||||
}
|
||||
},
|
||||
"test" : {
|
||||
"requires" : {
|
||||
"Carp" : "0",
|
||||
"Exporter" : "0",
|
||||
"File::Spec::Functions" : "0",
|
||||
"IO::File" : "0",
|
||||
"IO::Handle" : "0",
|
||||
"Test::Exception" : "0",
|
||||
"Test::More" : "0.88",
|
||||
"lib" : "0",
|
||||
"perl" : "5.008",
|
||||
"utf8" : "0"
|
||||
}
|
||||
}
|
||||
},
|
||||
"provides" : {
|
||||
"PerlIO::utf8_strict" : {
|
||||
"file" : "lib/PerlIO/utf8_strict.pm",
|
||||
"version" : "0.009"
|
||||
}
|
||||
},
|
||||
"release_status" : "stable",
|
||||
"resources" : {
|
||||
"bugtracker" : {
|
||||
"mailto" : "bug-perlio-utf8_strict at rt.cpan.org",
|
||||
"web" : "http://rt.cpan.org/Public/Dist/Display.html?Name=PerlIO-utf8_strict"
|
||||
},
|
||||
"repository" : {
|
||||
"type" : "git",
|
||||
"url" : "git://github.com/Leont/perlio-utf8_strict.git",
|
||||
"web" : "https://github.com/Leont/perlio-utf8_strict"
|
||||
}
|
||||
},
|
||||
"version" : "0.009",
|
||||
"x_contributors" : [
|
||||
"Andreas V\u00f6gele <voegelas@cpan.org>",
|
||||
"Leon Timmermans <fawaka@gmail.com>"
|
||||
],
|
||||
"x_generated_by_perl" : "v5.32.0",
|
||||
"x_serialization_backend" : "Cpanel::JSON::XS version 4.19",
|
||||
"x_spdx_expression" : "Artistic-1.0-Perl OR GPL-1.0-or-later"
|
||||
}
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
---
|
||||
abstract: 'Fast and correct UTF-8 IO'
|
||||
author:
|
||||
- 'Leon Timmermans <leont@cpan.org>'
|
||||
- 'Christian Hansen <chansen@cpan.org>'
|
||||
build_requires:
|
||||
Carp: '0'
|
||||
Exporter: '0'
|
||||
File::Spec::Functions: '0'
|
||||
IO::File: '0'
|
||||
IO::Handle: '0'
|
||||
Test::Exception: '0'
|
||||
Test::More: '0.88'
|
||||
lib: '0'
|
||||
perl: '5.008'
|
||||
utf8: '0'
|
||||
configure_requires:
|
||||
ExtUtils::MakeMaker: '0'
|
||||
perl: '5.006'
|
||||
dynamic_config: 0
|
||||
generated_by: 'Dist::Zilla version 6.024, CPAN::Meta::Converter version 2.150010'
|
||||
license: perl
|
||||
meta-spec:
|
||||
url: http://module-build.sourceforge.net/META-spec-v1.4.html
|
||||
version: '1.4'
|
||||
name: PerlIO-utf8_strict
|
||||
provides:
|
||||
PerlIO::utf8_strict:
|
||||
file: lib/PerlIO/utf8_strict.pm
|
||||
version: '0.009'
|
||||
requires:
|
||||
XSLoader: '0'
|
||||
perl: '5.008'
|
||||
strict: '0'
|
||||
warnings: '0'
|
||||
resources:
|
||||
bugtracker: http://rt.cpan.org/Public/Dist/Display.html?Name=PerlIO-utf8_strict
|
||||
repository: git://github.com/Leont/perlio-utf8_strict.git
|
||||
version: '0.009'
|
||||
x_contributors:
|
||||
- 'Andreas Vögele <voegelas@cpan.org>'
|
||||
- 'Leon Timmermans <fawaka@gmail.com>'
|
||||
x_generated_by_perl: v5.32.0
|
||||
x_serialization_backend: 'YAML::Tiny version 1.73'
|
||||
x_spdx_expression: 'Artistic-1.0-Perl OR GPL-1.0-or-later'
|
|
@ -0,0 +1,67 @@
|
|||
# This file was automatically generated by Dist::Zilla::Plugin::MakeMaker v6.024.
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use 5.008;
|
||||
|
||||
use ExtUtils::MakeMaker;
|
||||
|
||||
my %WriteMakefileArgs = (
|
||||
"ABSTRACT" => "Fast and correct UTF-8 IO",
|
||||
"AUTHOR" => "Leon Timmermans <leont\@cpan.org>, Christian Hansen <chansen\@cpan.org>",
|
||||
"CONFIGURE_REQUIRES" => {
|
||||
"ExtUtils::MakeMaker" => 0
|
||||
},
|
||||
"DISTNAME" => "PerlIO-utf8_strict",
|
||||
"LICENSE" => "perl",
|
||||
"MIN_PERL_VERSION" => "5.008",
|
||||
"NAME" => "PerlIO::utf8_strict",
|
||||
"PREREQ_PM" => {
|
||||
"XSLoader" => 0,
|
||||
"strict" => 0,
|
||||
"warnings" => 0
|
||||
},
|
||||
"TEST_REQUIRES" => {
|
||||
"Carp" => 0,
|
||||
"Exporter" => 0,
|
||||
"File::Spec::Functions" => 0,
|
||||
"IO::File" => 0,
|
||||
"IO::Handle" => 0,
|
||||
"Test::Exception" => 0,
|
||||
"Test::More" => "0.88",
|
||||
"lib" => 0,
|
||||
"utf8" => 0
|
||||
},
|
||||
"VERSION" => "0.009",
|
||||
"test" => {
|
||||
"TESTS" => "t/*.t"
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
my %FallbackPrereqs = (
|
||||
"Carp" => 0,
|
||||
"Exporter" => 0,
|
||||
"File::Spec::Functions" => 0,
|
||||
"IO::File" => 0,
|
||||
"IO::Handle" => 0,
|
||||
"Test::Exception" => 0,
|
||||
"Test::More" => "0.88",
|
||||
"XSLoader" => 0,
|
||||
"lib" => 0,
|
||||
"strict" => 0,
|
||||
"utf8" => 0,
|
||||
"warnings" => 0
|
||||
);
|
||||
|
||||
|
||||
unless ( eval { ExtUtils::MakeMaker->VERSION(6.63_03) } ) {
|
||||
delete $WriteMakefileArgs{TEST_REQUIRES};
|
||||
delete $WriteMakefileArgs{BUILD_REQUIRES};
|
||||
$WriteMakefileArgs{PREREQ_PM} = \%FallbackPrereqs;
|
||||
}
|
||||
|
||||
delete $WriteMakefileArgs{CONFIGURE_REQUIRES}
|
||||
unless eval { ExtUtils::MakeMaker->VERSION(6.52) };
|
||||
|
||||
WriteMakefile(%WriteMakefileArgs);
|
|
@ -0,0 +1,12 @@
|
|||
This archive contains the distribution PerlIO-utf8_strict,
|
||||
version 0.009:
|
||||
|
||||
Fast and correct UTF-8 IO
|
||||
|
||||
This software is copyright (c) 2012 by Leon Timmermans, Christian Hansen.
|
||||
|
||||
This is free software; you can redistribute it and/or modify it under
|
||||
the same terms as the Perl 5 programming language system itself.
|
||||
|
||||
|
||||
This README file was generated by Dist::Zilla::Plugin::Readme v6.024.
|
|
@ -0,0 +1,138 @@
|
|||
Sentences that contain all letters commonly used in a language
|
||||
--------------------------------------------------------------
|
||||
|
||||
Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/> -- 2010-09-20
|
||||
|
||||
This is an example of a plain-text file encoded in UTF-8.
|
||||
|
||||
|
||||
Danish (da)
|
||||
---------
|
||||
|
||||
Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen
|
||||
Wolther spillede på xylofon.
|
||||
(= Quiz contestants were eating strawbery with cream while Wolther
|
||||
the circus clown played on xylophone.)
|
||||
|
||||
German (de)
|
||||
-----------
|
||||
|
||||
Falsches Üben von Xylophonmusik quält jeden größeren Zwerg
|
||||
(= Wrongful practicing of xylophone music tortures every larger dwarf)
|
||||
|
||||
Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich
|
||||
(= Twelve boxing fighters hunted Eva across the dike of Sylt)
|
||||
|
||||
Heizölrückstoßabdämpfung
|
||||
(= fuel oil recoil absorber)
|
||||
(jqvwxy missing, but all non-ASCII letters in one word)
|
||||
|
||||
Greek (el)
|
||||
----------
|
||||
|
||||
Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ πιὰ στὸ χρυσαφὶ ξέφωτο
|
||||
(= No more shall I see acacias or myrtles in the golden clearing)
|
||||
|
||||
Ξεσκεπάζω τὴν ψυχοφθόρα βδελυγμία
|
||||
(= I uncover the soul-destroying abhorrence)
|
||||
|
||||
English (en)
|
||||
------------
|
||||
|
||||
The quick brown fox jumps over the lazy dog
|
||||
|
||||
Spanish (es)
|
||||
------------
|
||||
|
||||
El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y
|
||||
frío, añoraba a su querido cachorro.
|
||||
(Contains every letter and every accent, but not every combination
|
||||
of vowel + acute.)
|
||||
|
||||
French (fr)
|
||||
-----------
|
||||
|
||||
Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à
|
||||
côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce
|
||||
qui lui permet de penser à la cænogenèse de l'être dont il est question
|
||||
dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui,
|
||||
pense-t-il, diminue çà et là la qualité de son œuvre.
|
||||
|
||||
l'île exiguë
|
||||
Où l'obèse jury mûr
|
||||
Fête l'haï volapük,
|
||||
Âne ex aéquo au whist,
|
||||
Ôtez ce vœu déçu.
|
||||
|
||||
Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en
|
||||
canoë au delà des îles, près du mälström où brûlent les novæ.
|
||||
|
||||
Irish Gaelic (ga)
|
||||
-----------------
|
||||
|
||||
D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh
|
||||
|
||||
Hungarian (hu)
|
||||
--------------
|
||||
|
||||
Árvíztűrő tükörfúrógép
|
||||
(= flood-proof mirror-drilling machine, only all non-ASCII letters)
|
||||
|
||||
Icelandic (is)
|
||||
--------------
|
||||
|
||||
Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa
|
||||
|
||||
Sævör grét áðan því úlpan var ónýt
|
||||
(some ASCII letters missing)
|
||||
|
||||
Japanese (jp)
|
||||
-------------
|
||||
|
||||
Hiragana: (Iroha)
|
||||
|
||||
いろはにほへとちりぬるを
|
||||
わかよたれそつねならむ
|
||||
うゐのおくやまけふこえて
|
||||
あさきゆめみしゑひもせす
|
||||
|
||||
Katakana:
|
||||
|
||||
イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム
|
||||
ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン
|
||||
|
||||
Hebrew (iw)
|
||||
-----------
|
||||
|
||||
? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה
|
||||
|
||||
Polish (pl)
|
||||
-----------
|
||||
|
||||
Pchnąć w tę łódź jeża lub ośm skrzyń fig
|
||||
(= To push a hedgehog or eight bins of figs in this boat)
|
||||
|
||||
Russian (ru)
|
||||
------------
|
||||
|
||||
В чащах юга жил бы цитрус? Да, но фальшивый экземпляр!
|
||||
(= Would a citrus live in the bushes of south? Yes, but only a fake one!)
|
||||
|
||||
Съешь же ещё этих мягких французских булок да выпей чаю
|
||||
(= Eat some more of these fresh French loafs and have some tea)
|
||||
|
||||
Thai (th)
|
||||
---------
|
||||
|
||||
[--------------------------|------------------------]
|
||||
๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน
|
||||
จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร
|
||||
ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย
|
||||
ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ
|
||||
|
||||
[The copyright for the Thai example is owned by The Computer
|
||||
Association of Thailand under the Royal Patronage of His Majesty the
|
||||
King.]
|
||||
|
||||
Special thanks to the people from all over the world who contributed
|
||||
these sentences.
|
|
@ -0,0 +1,2 @@
|
|||
Fo<EFBFBD> B<>rtololol
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
Foö-Báŗ
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
name = PerlIO-utf8_strict
|
||||
author = Leon Timmermans <leont@cpan.org>
|
||||
author = Christian Hansen <chansen@cpan.org>
|
||||
license = Perl_5
|
||||
copyright_holder = Leon Timmermans, Christian Hansen
|
||||
copyright_year = 2012
|
||||
|
||||
[Git::GatherDir]
|
||||
[PruneCruft]
|
||||
[MetaYAML]
|
||||
[MetaJSON]
|
||||
[Readme]
|
||||
[Manifest]
|
||||
[License]
|
||||
|
||||
[AutoPrereqs]
|
||||
[Repository]
|
||||
[Bugtracker]
|
||||
[MetaProvides::Package]
|
||||
[NextRelease]
|
||||
|
||||
[Git::Contributors]
|
||||
[MinimumPerl]
|
||||
[MakeMaker]
|
||||
|
||||
[Git::NextVersion]
|
||||
[@Git]
|
||||
|
||||
[CheckChangesHasContent]
|
||||
[RunExtraTests]
|
||||
[TestRelease]
|
||||
[ConfirmRelease]
|
||||
[UploadToCPAN]
|
||||
[PodWeaver]
|
||||
[PkgVersion]
|
||||
[PodSyntaxTests]
|
||||
[InstallGuide]
|
||||
|
||||
[PPPort]
|
||||
style = MakeMaker
|
|
@ -0,0 +1,88 @@
|
|||
package PerlIO::utf8_strict;
|
||||
$PerlIO::utf8_strict::VERSION = '0.009';
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use XSLoader;
|
||||
|
||||
XSLoader::load(__PACKAGE__, __PACKAGE__->VERSION);
|
||||
|
||||
1;
|
||||
|
||||
#ABSTRACT: Fast and correct UTF-8 IO
|
||||
|
||||
__END__
|
||||
|
||||
=pod
|
||||
|
||||
=encoding UTF-8
|
||||
|
||||
=head1 NAME
|
||||
|
||||
PerlIO::utf8_strict - Fast and correct UTF-8 IO
|
||||
|
||||
=head1 VERSION
|
||||
|
||||
version 0.009
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
open my $fh, '<:utf8_strict', $filename;
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This module provides a fast and correct UTF-8 PerlIO layer. Unlike perl's default C<:utf8> layer it checks the input for correctness.
|
||||
|
||||
=head1 LAYER ARGUMENTS
|
||||
|
||||
=over 4
|
||||
|
||||
=item allow_noncharacters
|
||||
|
||||
=item allow_surrogates
|
||||
|
||||
=back
|
||||
|
||||
=head1 EXPORT
|
||||
|
||||
PerlIO::utf8_strict exports no subroutines or symbols, just a perl layer C<utf8_strict>
|
||||
|
||||
=head1 DIAGNOSTICS
|
||||
|
||||
=over 4
|
||||
|
||||
=item Can't decode ill-formed UTF-8 octet sequence <%s>
|
||||
|
||||
(F) Encountered an ill-formed UTF-8 octet sequence. <%s> contains a hexadecimal
|
||||
representation of the maximal subpart of the ill-formed subsequence.
|
||||
|
||||
=item Can't interchange noncharacter code point U+%.4X
|
||||
|
||||
(F) Noncharacters is permanently reserved for internal use and that should
|
||||
never be interchanged. Noncharacters consist of the values U+nFFFE and U+nFFFF
|
||||
(where n is from 0 to 10^16) and the values U+FDD0..U+FDEF.
|
||||
|
||||
=back
|
||||
|
||||
=head1 AUTHORS
|
||||
|
||||
=over 4
|
||||
|
||||
=item *
|
||||
|
||||
Leon Timmermans <leont@cpan.org>
|
||||
|
||||
=item *
|
||||
|
||||
Christian Hansen <chansen@cpan.org>
|
||||
|
||||
=back
|
||||
|
||||
=head1 COPYRIGHT AND LICENSE
|
||||
|
||||
This software is copyright (c) 2012 by Leon Timmermans, Christian Hansen.
|
||||
|
||||
This is free software; you can redistribute it and/or modify it under
|
||||
the same terms as the Perl 5 programming language system itself.
|
||||
|
||||
=cut
|
|
@ -0,0 +1,41 @@
|
|||
#! perl
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
use utf8;
|
||||
|
||||
use Test::More 0.88;
|
||||
use Test::Exception;
|
||||
|
||||
use File::Spec::Functions qw/catfile/;
|
||||
|
||||
my $builder = Test::More->builder;
|
||||
binmode $builder->output, ":utf8";
|
||||
binmode $builder->failure_output, ":utf8";
|
||||
binmode $builder->todo_output, ":utf8";
|
||||
|
||||
{
|
||||
my $filename = catfile(qw/corpus test1.txt/);
|
||||
open my $fh, '<:utf8_strict', $filename or die "Couldn't open file $filename";
|
||||
|
||||
my $line = <$fh>;
|
||||
|
||||
is($line, "Foö-Báŗ\n", 'Content is Foö-Báŗ');
|
||||
}
|
||||
|
||||
{
|
||||
my $filename = catfile(qw/corpus quickbrown.txt/);
|
||||
open my $fh, '<:utf8_strict', $filename or die "Couldn't open file $filename";
|
||||
|
||||
lives_ok { my $data = do { local $/; <$fh> } } 'successfull reading quickbrown.txt'
|
||||
}
|
||||
|
||||
{
|
||||
my $filename = catfile(qw/corpus test1-latin1.txt/);
|
||||
open my $fh, '<:utf8_strict', $filename or die "Couldn't open file $filename";
|
||||
|
||||
my $line;
|
||||
throws_ok { $line = <$fh> } qr/^Can't decode ill-formed UTF-8 octet sequence/, 'Trying to read ill-formed encoded UTF-8 fails' or diag "Just read '$line'";
|
||||
}
|
||||
|
||||
done_testing;
|
|
@ -0,0 +1,34 @@
|
|||
#! perl
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
use utf8;
|
||||
|
||||
use Test::More 0.88;
|
||||
use Test::Exception;
|
||||
use IO::Handle;
|
||||
use lib 't/lib';
|
||||
use Util qw[fh_with_octets slurp];
|
||||
|
||||
|
||||
my $fh = fh_with_octets("\xE2\x98\xBA" x 8092);
|
||||
|
||||
lives_ok {
|
||||
my $data = do { local $/; <$fh> }
|
||||
} q[successfull reading 8092 WHITE SMILING FACE's];
|
||||
|
||||
{
|
||||
my $line = 'ascii';
|
||||
my ( $in, $out );
|
||||
pipe $in, $out;
|
||||
binmode $out, ':utf8_strict';
|
||||
binmode $in, ':utf8_strict';
|
||||
print $out "...\n";
|
||||
$out->flush;
|
||||
$line .= readline $in;
|
||||
|
||||
is($line, "ascii...\n", 'Appending from utf8 to ascii');
|
||||
}
|
||||
|
||||
|
||||
done_testing;
|
|
@ -0,0 +1,27 @@
|
|||
#! perl
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
use utf8;
|
||||
|
||||
use Test::More 0.88;
|
||||
use Test::Exception;
|
||||
use lib 't/lib';
|
||||
use Util qw[fh_with_octets pack_utf8 slurp];
|
||||
|
||||
|
||||
for (my $cp = 0x80; $cp < 0x10FFFF; $cp += 0x1000) {
|
||||
my $sequence = substr(pack_utf8($cp), 0, -1);
|
||||
|
||||
my $name = sprintf 'reading incomplete UTF-8 sequence <%s> throws an exception',
|
||||
join ' ', map { sprintf '%.2X', ord $_ } split //, $sequence;
|
||||
|
||||
my $fh = fh_with_octets($sequence);
|
||||
|
||||
throws_ok {
|
||||
slurp($fh);
|
||||
} qr/^Can't decode ill-formed UTF-8 octet sequence/, $name;
|
||||
}
|
||||
|
||||
done_testing;
|
||||
|
|
@ -0,0 +1,104 @@
|
|||
package Util;
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use Carp qw[];
|
||||
use IO::File qw[SEEK_SET];
|
||||
|
||||
BEGIN {
|
||||
our @EXPORT_OK = qw[ fh_with_codepoints fh_with_octets
|
||||
pack_utf8 pack_overlong_utf8
|
||||
slurp rewind
|
||||
tmpfile ];
|
||||
our %EXPORT_TAGS = (
|
||||
all => [ @EXPORT_OK ],
|
||||
);
|
||||
|
||||
require Exporter;
|
||||
*import = \&Exporter::import;
|
||||
}
|
||||
|
||||
my @UTF8_MIN = (0x80, 0x800, 0x10000, 0x200000, 0x4000000, 0x80000000);
|
||||
sub pack_utf8 ($;$) {
|
||||
my ($cp, $len) = @_;
|
||||
($cp >= 0 && $cp < 0x80000000)
|
||||
|| Carp::confess(qq/Cannot pack '$cp'/);
|
||||
(@_ == 1 || ($len > 0 && $len <= 6 && $cp < $UTF8_MIN[$len - 1]))
|
||||
|| Carp::confess(qq/Cannot pack '$cp' to sequence length '$len'/);
|
||||
my @c = (0) x ($len || ($cp < 0x80 ? 1 : $cp < 0x800 ? 2
|
||||
: $cp < 0x10000 ? 3 : $cp < 0x200000 ? 4
|
||||
: $cp < 0x4000000 ? 5 : 6));
|
||||
for (reverse @c[1..$#c]) {
|
||||
$_ = ($cp & 0x3F) | 0x80;
|
||||
$cp >>= 6;
|
||||
}
|
||||
$c[0] = $cp | (0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC)[$#c];
|
||||
return pack('C*', @c);
|
||||
}
|
||||
|
||||
sub pack_overlong_utf8 ($) {
|
||||
my ($cp) = @_;
|
||||
($cp >= 0 && $cp < 0x4000000)
|
||||
|| Carp::confess(qq/Cannot pack '$cp'/);
|
||||
my @enc;
|
||||
for (my $i = 0; $i < 5; $i++) {
|
||||
next unless $cp < $UTF8_MIN[$i];
|
||||
push @enc, pack_utf8($cp, $i + 2);
|
||||
}
|
||||
return wantarray ? @enc : $enc[0];
|
||||
}
|
||||
|
||||
sub rewind (*) {
|
||||
seek($_[0], 0, SEEK_SET)
|
||||
|| die(qq/Couldn't rewind file handle: '$!'/);
|
||||
}
|
||||
|
||||
sub tmpfile (;$) {
|
||||
my $fh = IO::File->new_tmpfile
|
||||
|| die(qq/Couldn't create a new temporary file: '$!'/);
|
||||
|
||||
binmode($fh)
|
||||
|| die(qq/Couldn't binmode temporary file handle: '$!'/);
|
||||
|
||||
if (@_) {
|
||||
print({$fh} @_)
|
||||
|| die(qq/Couldn't write to temporary file handle: '$!'/);
|
||||
|
||||
seek($fh, 0, SEEK_SET)
|
||||
|| die(qq/Couldn't rewind temporary file handle: '$!'/);
|
||||
}
|
||||
|
||||
return $fh;
|
||||
}
|
||||
|
||||
sub slurp (*) {
|
||||
my ($fh) = @_;
|
||||
return do { local $/; <$fh> };
|
||||
}
|
||||
|
||||
sub fh_with_octets ($;@) {
|
||||
my ($octets, @args) = @_;
|
||||
|
||||
my $args = @args ? sprintf('(%s)', join ',', @args) : '';
|
||||
|
||||
if (0) {
|
||||
open(my $fh, "<:utf8_strict${args}", \$octets)
|
||||
or die(qq/Couldn't open scalar fh: '$!'/);
|
||||
return $fh;
|
||||
}
|
||||
else {
|
||||
my $fh = tmpfile($octets);
|
||||
binmode($fh, ":utf8_strict${args}")
|
||||
or die(qq/Couldn't binmode :utf8_strict${args} '$!'/);
|
||||
return $fh;
|
||||
}
|
||||
}
|
||||
|
||||
sub fh_with_codepoints ($;@) {
|
||||
my (@cp) = @_;
|
||||
return fh_with_octets(join '', map { pack_utf8($_) } @cp);
|
||||
}
|
||||
|
||||
1;
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
#! perl
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
use utf8;
|
||||
|
||||
use Test::More 0.88;
|
||||
use Test::Exception;
|
||||
use lib 't/lib';
|
||||
use Util qw[fh_with_octets pack_overlong_utf8 slurp];
|
||||
|
||||
my @tests = (
|
||||
0x00,
|
||||
0x80,
|
||||
0x800,
|
||||
0x1000,
|
||||
);
|
||||
|
||||
foreach my $cp (@tests) {
|
||||
foreach my $sequence (pack_overlong_utf8($cp)) {
|
||||
my $name = sprintf 'reading non-shortest form representation of U+%.4X <%s> throws an exception',
|
||||
$cp, join ' ', map { sprintf '%.2X', ord $_ } split //, $sequence;
|
||||
|
||||
my $fh = fh_with_octets($sequence);
|
||||
|
||||
throws_ok {
|
||||
slurp($fh);
|
||||
} qr/^Can't decode ill-formed UTF-8 octet sequence/, $name;
|
||||
}
|
||||
}
|
||||
|
||||
done_testing;
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
#! perl
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
use utf8;
|
||||
|
||||
use Test::More 0.88;
|
||||
use Test::Exception;
|
||||
use lib 't/lib';
|
||||
use Util qw[fh_with_octets pack_utf8 slurp];
|
||||
|
||||
my @NONCHARACTERS = (0xFDD0 .. 0xFDEF);
|
||||
{
|
||||
for (my $i = 0; $i < 0x10FFFF; $i += 0x10000) {
|
||||
push @NONCHARACTERS, $i ^ 0xFFFE, $i ^ 0xFFFF;
|
||||
}
|
||||
}
|
||||
|
||||
foreach my $cp (@NONCHARACTERS) {
|
||||
my $octets = pack_utf8($cp);
|
||||
my $name = sprintf 'reading noncharacter U+%.4X <%s> throws an exception when using strict',
|
||||
$cp, join ' ', map { sprintf '%.2X', ord $_ } split //, $octets;
|
||||
|
||||
my $fh = fh_with_octets($octets);
|
||||
my $hex = sprintf '%.4X', $cp;
|
||||
throws_ok {
|
||||
slurp($fh);
|
||||
} qr/^Can't interchange noncharacter code point U\+$hex/, $name;
|
||||
}
|
||||
|
||||
foreach my $cp (@NONCHARACTERS) {
|
||||
my $octets = pack_utf8($cp);
|
||||
my $name = sprintf 'reading noncharacter U+%.4X <%s> succeeds when allow_noncharacters is set',
|
||||
$cp, join ' ', map { sprintf '%.2X', ord $_ } split //, $octets;
|
||||
|
||||
my $fh = fh_with_octets($octets, 'allow_noncharacters');
|
||||
|
||||
lives_ok {
|
||||
slurp($fh);
|
||||
} $name;
|
||||
}
|
||||
|
||||
done_testing;
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
#! perl
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
use utf8;
|
||||
|
||||
use Test::More 0.88;
|
||||
use Test::Exception;
|
||||
use lib 't/lib';
|
||||
use Util qw[fh_with_octets pack_utf8 slurp];
|
||||
|
||||
for (my $cp = 0x0011_0000; $cp < 0x7FFF_FFFF; $cp += 0x200000) {
|
||||
my $name = sprintf 'reading encoded super codepoint U-%.8X throws an exception',
|
||||
$cp;
|
||||
|
||||
my $fh = fh_with_octets(pack_utf8($cp));
|
||||
|
||||
throws_ok {
|
||||
slurp($fh);
|
||||
} qr/^Can't decode ill-formed UTF-8 octet sequence/, $name;
|
||||
}
|
||||
|
||||
done_testing;
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
#! perl
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
use utf8;
|
||||
|
||||
use Test::More 0.88;
|
||||
use Test::Exception;
|
||||
use lib 't/lib';
|
||||
use Util qw[fh_with_octets pack_utf8 slurp];
|
||||
|
||||
my @SURROGATES = (0xD800 .. 0xDFFF);
|
||||
|
||||
foreach my $cp (@SURROGATES) {
|
||||
my $fh = fh_with_octets(pack_utf8($cp));
|
||||
|
||||
my $name = sprintf 'reading encoded surrogate U+%.4X throws an exception when using strict', $cp;
|
||||
|
||||
throws_ok {
|
||||
slurp($fh);
|
||||
} qr/^Can't decode ill-formed UTF-8 octet sequence/, $name;
|
||||
}
|
||||
|
||||
foreach my $cp (@SURROGATES) {
|
||||
my $fh = fh_with_octets(pack_utf8($cp), 'allow_surrogates');
|
||||
|
||||
my $name = sprintf 'reading encoded surrogate U+%.4X succeeds when allow_surrogates is set', $cp;
|
||||
|
||||
lives_ok {
|
||||
slurp($fh);
|
||||
} $name;
|
||||
}
|
||||
|
||||
done_testing;
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
#! perl
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
use utf8;
|
||||
|
||||
use Test::More 0.88;
|
||||
use Test::Exception;
|
||||
use lib 't/lib';
|
||||
use Util qw[fh_with_octets pack_utf8 slurp];
|
||||
|
||||
for (my $cp = 0x00; $cp < 0x10FFFF; $cp += 0x1000) {
|
||||
my $octets = pack_utf8($cp);
|
||||
my $name = sprintf 'successfull reading U+%.4X <%s>',
|
||||
$cp, join ' ', map { sprintf '%.2X', ord $_ } split //, $octets;
|
||||
|
||||
my $fh = fh_with_octets($octets);
|
||||
|
||||
lives_ok {
|
||||
slurp($fh);
|
||||
} $name;
|
||||
}
|
||||
|
||||
done_testing;
|
||||
|
|
@ -0,0 +1,381 @@
|
|||
#include "EXTERN.h"
|
||||
#include "perl.h"
|
||||
#include "XSUB.h"
|
||||
#include "perliol.h"
|
||||
#include "ppport.h"
|
||||
|
||||
#define UTF8_MAX_BYTES 4
|
||||
|
||||
static const U8 xs_utf8_sequence_len[0x100] = {
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x00-0x0F */
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x10-0x1F */
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x20-0x2F */
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x30-0x3F */
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x40-0x4F */
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x50-0x5F */
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x60-0x6F */
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x70-0x7F */
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x80-0x8F */
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x90-0x9F */
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xA0-0xAF */
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xB0-0xBF */
|
||||
0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* 0xC0-0xCF */
|
||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* 0xD0-0xDF */
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* 0xE0-0xEF */
|
||||
4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, /* 0xF0-0xFF */
|
||||
};
|
||||
|
||||
|
||||
typedef enum { STRICT_UTF8=0, ALLOW_SURROGATES=1, ALLOW_NONCHARACTERS=2, ALLOW_NONSHORTEST=4 } utf8_flags;
|
||||
|
||||
|
||||
static STRLEN skip_sequence(const U8 *cur, const STRLEN len) {
|
||||
STRLEN i, n = xs_utf8_sequence_len[*cur];
|
||||
|
||||
if (n < 1 || len < 2)
|
||||
return 1;
|
||||
|
||||
switch (cur[0]) {
|
||||
case 0xE0: if ((cur[1] & 0xE0) != 0xA0) return 1; break;
|
||||
case 0xED: if ((cur[1] & 0xE0) != 0x80) return 1; break;
|
||||
case 0xF4: if ((cur[1] & 0xF0) != 0x80) return 1; break;
|
||||
case 0xF0: if ((cur[1] & 0xF0) == 0x80) return 1; /* FALLTROUGH */
|
||||
default: if ((cur[1] & 0xC0) != 0x80) return 1; break;
|
||||
}
|
||||
|
||||
if (n > len)
|
||||
n = len;
|
||||
for (i = 2; i < n; i++)
|
||||
if ((cur[i] & 0xC0) != 0x80)
|
||||
break;
|
||||
return i;
|
||||
}
|
||||
|
||||
#if defined(PERL_STATIC_NO_RET) && defined(__attribute__noreturn__)
|
||||
PERL_STATIC_NO_RET void report_illformed(pTHX_ const U8 *cur, STRLEN len, bool eof) __attribute__noreturn__;
|
||||
#elif defined(__attribute__noreturn__)
|
||||
static void report_illformed(pTHX_ const U8 *cur, STRLEN len, bool eof) __attribute__noreturn__;
|
||||
#endif
|
||||
|
||||
static void report_illformed(pTHX_ const U8 *cur, STRLEN len, bool eof) {
|
||||
static const char *hex = "0123456789ABCDEF";
|
||||
const char *fmt;
|
||||
char seq[UTF8_MAX_BYTES * 3];
|
||||
char *d = seq;
|
||||
|
||||
if (eof)
|
||||
fmt = "Can't decode ill-formed UTF-8 octet sequence <%s> at end of file";
|
||||
else
|
||||
fmt = "Can't decode ill-formed UTF-8 octet sequence <%s>";
|
||||
|
||||
while (len-- > 0) {
|
||||
const U8 c = *cur++;
|
||||
*d++ = hex[c >> 4];
|
||||
*d++ = hex[c & 15];
|
||||
if (len)
|
||||
*d++ = ' ';
|
||||
}
|
||||
*d = 0;
|
||||
Perl_croak(aTHX_ fmt, seq);
|
||||
}
|
||||
|
||||
#if defined(PERL_STATIC_NO_RET) && defined(__attribute__noreturn__)
|
||||
PERL_STATIC_NO_RET void report_noncharacter(pTHX_ UV usv) __attribute__noreturn__;
|
||||
#elif defined(__attribute__noreturn__)
|
||||
static void report_noncharacter(pTHX_ UV usv) __attribute__noreturn__;
|
||||
#endif
|
||||
|
||||
static void report_noncharacter(pTHX_ UV usv) {
|
||||
static const char *fmt = "Can't interchange noncharacter code point U+%"UVXf;
|
||||
Perl_croak(aTHX_ fmt, usv);
|
||||
}
|
||||
|
||||
static STRLEN validate(pTHX_ const U8 *buf, const U8 *end, const int flags, PerlIO* handle) {
|
||||
const bool eof = PerlIO_eof(handle);
|
||||
const U8 *cur = buf;
|
||||
const U8 *end4 = end - UTF8_MAX_BYTES;
|
||||
STRLEN skip = 0;
|
||||
U32 v;
|
||||
|
||||
while (cur < end4) {
|
||||
while (cur < end4 && *cur < 0x80)
|
||||
cur++;
|
||||
|
||||
check:
|
||||
switch (xs_utf8_sequence_len[*cur]) {
|
||||
case 0:
|
||||
goto illformed;
|
||||
case 1:
|
||||
cur += 1;
|
||||
break;
|
||||
case 2:
|
||||
/* 110xxxxx 10xxxxxx */
|
||||
if ((cur[1] & 0xC0) != 0x80)
|
||||
goto illformed;
|
||||
cur += 2;
|
||||
break;
|
||||
case 3:
|
||||
v = ((U32)cur[0] << 16)
|
||||
| ((U32)cur[1] << 8)
|
||||
| ((U32)cur[2]);
|
||||
/* 1110xxxx 10xxxxxx 10xxxxxx */
|
||||
if ((v & 0x00F0C0C0) != 0x00E08080 ||
|
||||
/* Non-shortest form */
|
||||
v < 0x00E0A080)
|
||||
goto illformed;
|
||||
/* Surrogates U+D800..U+DFFF */
|
||||
if (!(flags & ALLOW_SURROGATES) && (v & 0x00EFA080) == 0x00EDA080)
|
||||
goto illformed;
|
||||
/* Non-characters U+FDD0..U+FDEF, U+FFFE..U+FFFF */
|
||||
if (!(flags & ALLOW_NONCHARACTERS) && v >= 0x00EFB790 && (v <= 0x00EFB7AF || v >= 0x00EFBFBE))
|
||||
goto noncharacter;
|
||||
cur += 3;
|
||||
break;
|
||||
case 4:
|
||||
v = ((U32)cur[0] << 24)
|
||||
| ((U32)cur[1] << 16)
|
||||
| ((U32)cur[2] << 8)
|
||||
| ((U32)cur[3]);
|
||||
/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
|
||||
if ((v & 0xF8C0C0C0) != 0xF0808080 ||
|
||||
/* Non-shortest form */
|
||||
v < 0xF0908080 ||
|
||||
/* Greater than U+10FFFF */
|
||||
v > 0xF48FBFBF)
|
||||
goto illformed;
|
||||
/* Non-characters U+nFFFE..U+nFFFF on plane 1-16 */
|
||||
if (!(flags & ALLOW_NONCHARACTERS) && (v & 0x000FBFBE) == 0x000FBFBE)
|
||||
goto noncharacter;
|
||||
cur += 4;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (cur < end) {
|
||||
if (cur + xs_utf8_sequence_len[*cur] <= end)
|
||||
goto check;
|
||||
skip = skip_sequence(cur, end - cur);
|
||||
if (eof || cur + skip < end)
|
||||
goto illformed;
|
||||
}
|
||||
return cur - buf;
|
||||
|
||||
illformed:
|
||||
if (!skip)
|
||||
skip = skip_sequence(cur, end - cur);
|
||||
PerlIOBase(handle)->flags |= PERLIO_F_ERROR;
|
||||
report_illformed(aTHX_ cur, skip, eof);
|
||||
|
||||
noncharacter:
|
||||
if (v < 0xF0808080)
|
||||
v = (v & 0x3F) | (v & 0x3F00) >> 2 | (v & 0x0F0000) >> 4;
|
||||
else
|
||||
v = (v & 0x3F) | (v & 0x3F00) >> 2 | (v & 0x3F0000) >> 4 | (v & 0x07000000) >> 6;
|
||||
PerlIOBase(handle)->flags |= PERLIO_F_ERROR;
|
||||
report_noncharacter(aTHX_ v);
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
PerlIOBuf buf;
|
||||
STDCHAR leftovers[UTF8_MAX_BYTES];
|
||||
size_t leftover_length;
|
||||
utf8_flags flags;
|
||||
} PerlIOUnicode;
|
||||
|
||||
static struct {
|
||||
const char* name;
|
||||
size_t length;
|
||||
utf8_flags value;
|
||||
} map[] = {
|
||||
{ STR_WITH_LEN("allow_surrogates"), ALLOW_SURROGATES },
|
||||
{ STR_WITH_LEN("allow_noncharacters"), ALLOW_NONCHARACTERS },
|
||||
{ STR_WITH_LEN("allow_nonshortest"), ALLOW_NONSHORTEST },
|
||||
{ STR_WITH_LEN("strict"), 0 },
|
||||
{ STR_WITH_LEN("loose"), ALLOW_SURROGATES | ALLOW_NONCHARACTERS | ALLOW_NONSHORTEST },
|
||||
};
|
||||
|
||||
static utf8_flags lookup_parameter(pTHX_ const char* ptr, size_t len) {
|
||||
unsigned i;
|
||||
for (i = 0; i < sizeof map / sizeof *map; ++i) {
|
||||
if (map[i].length == len && memcmp(ptr, map[i].name, len) == 0)
|
||||
return map[i].value;
|
||||
}
|
||||
Perl_croak(aTHX_ "Unknown argument to :utf8_strict: %*s", (int)len, ptr);
|
||||
}
|
||||
static utf8_flags parse_parameters(pTHX_ SV* param) {
|
||||
STRLEN len;
|
||||
const char *begin, *delim;
|
||||
if (!param || !SvOK(param))
|
||||
return 0;
|
||||
|
||||
begin = SvPV(param, len);
|
||||
delim = strchr(begin, ',');
|
||||
if(delim) {
|
||||
utf8_flags ret = 0;
|
||||
const char* end = begin + len;
|
||||
do {
|
||||
ret |= lookup_parameter(aTHX_ begin, delim - begin);
|
||||
begin = delim + 1;
|
||||
delim = strchr(begin, ',');
|
||||
} while (delim);
|
||||
if (begin < end)
|
||||
ret |= lookup_parameter(aTHX_ begin, end - begin);
|
||||
return ret;
|
||||
}
|
||||
else {
|
||||
return lookup_parameter(aTHX_ begin, len);
|
||||
}
|
||||
}
|
||||
|
||||
#define line_buffered(flags) ((flags & (PERLIO_F_LINEBUF | PERLIO_F_CANWRITE)) == (PERLIO_F_LINEBUF | PERLIO_F_CANWRITE))
|
||||
|
||||
void PerlIOBase_flush_linebuf(pTHX) {
|
||||
#ifdef dVAR
|
||||
dVAR;
|
||||
#endif
|
||||
PerlIOl **table = &PL_perlio;
|
||||
PerlIOl *f;
|
||||
while ((f = *table)) {
|
||||
int i;
|
||||
table = (PerlIOl **) (f++);
|
||||
for (i = 1; i < 64; i++) {
|
||||
if (f->next && line_buffered(PerlIOBase(&(f->next))->flags))
|
||||
PerlIO_flush(&(f->next));
|
||||
f++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static IV PerlIOUnicode_pushed(pTHX_ PerlIO* f, const char* mode, SV* arg, PerlIO_funcs* tab) {
|
||||
utf8_flags flags = parse_parameters(aTHX_ arg);
|
||||
if (PerlIOBuf_pushed(aTHX_ f, mode, arg, tab) == 0) {
|
||||
PerlIOBase(f)->flags |= PERLIO_F_UTF8;
|
||||
PerlIOSelf(f, PerlIOUnicode)->flags = flags;
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
static IV PerlIOUnicode_fill(pTHX_ PerlIO* f) {
|
||||
PerlIOUnicode * const u = PerlIOSelf(f, PerlIOUnicode);
|
||||
PerlIOBuf * const b = &u->buf;
|
||||
PerlIO *n = PerlIONext(f);
|
||||
SSize_t avail;
|
||||
Size_t read_bytes = 0;
|
||||
STDCHAR *end;
|
||||
SSize_t fit;
|
||||
|
||||
if (PerlIO_flush(f) != 0)
|
||||
return -1;
|
||||
if (PerlIOBase(f)->flags & PERLIO_F_TTY)
|
||||
PerlIOBase_flush_linebuf(aTHX);
|
||||
|
||||
if (!b->buf)
|
||||
PerlIO_get_base(f);
|
||||
|
||||
assert(b->buf);
|
||||
|
||||
if (u->leftover_length) {
|
||||
Copy(u->leftovers, b->buf, u->leftover_length, STDCHAR);
|
||||
b->end = b->buf + u->leftover_length;
|
||||
read_bytes = u->leftover_length;
|
||||
u->leftover_length = 0;
|
||||
}
|
||||
else {
|
||||
b->ptr = b->end = b->buf;
|
||||
}
|
||||
fit = (SSize_t)b->bufsiz - (b->end - b->buf);
|
||||
|
||||
if (!PerlIOValid(n)) {
|
||||
PerlIOBase(f)->flags |= PERLIO_F_EOF;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (PerlIO_fast_gets(n)) {
|
||||
/*
|
||||
* Layer below is also buffered. We do _NOT_ want to call its
|
||||
* ->Read() because that will loop till it gets what we asked for
|
||||
* which may hang on a pipe etc. Instead take anything it has to
|
||||
* hand, or ask it to fill _once_.
|
||||
*/
|
||||
avail = PerlIO_get_cnt(n);
|
||||
if (avail <= 0) {
|
||||
avail = PerlIO_fill(n);
|
||||
if (avail == 0)
|
||||
avail = PerlIO_get_cnt(n);
|
||||
else {
|
||||
if (!PerlIO_error(n) && PerlIO_eof(n))
|
||||
avail = 0;
|
||||
}
|
||||
}
|
||||
if (avail > 0) {
|
||||
STDCHAR *ptr = PerlIO_get_ptr(n);
|
||||
const SSize_t cnt = avail;
|
||||
if (avail > fit)
|
||||
avail = fit;
|
||||
Copy(ptr, b->end, avail, STDCHAR);
|
||||
PerlIO_set_ptrcnt(n, ptr + avail, cnt - avail);
|
||||
read_bytes += avail;
|
||||
}
|
||||
}
|
||||
else {
|
||||
avail = PerlIO_read(n, b->end, fit);
|
||||
if (avail > 0)
|
||||
read_bytes += avail;
|
||||
}
|
||||
if (avail <= 0) {
|
||||
if (avail < 0 || (read_bytes == 0 && PerlIO_eof(n))) {
|
||||
PerlIOBase(f)->flags |= (avail == 0) ? PERLIO_F_EOF : PERLIO_F_ERROR;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
end = b->buf + read_bytes;
|
||||
b->end = b->buf + validate(aTHX_ (const U8 *)b->buf, (const U8 *)end, u->flags, n);
|
||||
if (b->end < end) {
|
||||
size_t len = b->buf + read_bytes - b->end;
|
||||
Copy(b->end, u->leftovers, len, char);
|
||||
u->leftover_length = len;
|
||||
}
|
||||
PerlIOBase(f)->flags |= PERLIO_F_RDBUF;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
PERLIO_FUNCS_DECL(PerlIO_utf8_strict) = {
|
||||
sizeof(PerlIO_funcs),
|
||||
"utf8_strict",
|
||||
sizeof(PerlIOUnicode),
|
||||
PERLIO_K_BUFFERED|PERLIO_K_UTF8,
|
||||
PerlIOUnicode_pushed,
|
||||
PerlIOBuf_popped,
|
||||
PerlIOBuf_open,
|
||||
PerlIOBase_binmode,
|
||||
NULL,
|
||||
PerlIOBase_fileno,
|
||||
PerlIOBuf_dup,
|
||||
PerlIOBuf_read,
|
||||
PerlIOBase_unread,
|
||||
PerlIOBuf_write,
|
||||
PerlIOBuf_seek,
|
||||
PerlIOBuf_tell,
|
||||
PerlIOBuf_close,
|
||||
PerlIOBuf_flush,
|
||||
PerlIOUnicode_fill,
|
||||
PerlIOBase_eof,
|
||||
PerlIOBase_error,
|
||||
PerlIOBase_clearerr,
|
||||
PerlIOBase_setlinebuf,
|
||||
PerlIOBuf_get_base,
|
||||
PerlIOBuf_bufsiz,
|
||||
PerlIOBuf_get_ptr,
|
||||
PerlIOBuf_get_cnt,
|
||||
PerlIOBuf_set_ptrcnt,
|
||||
};
|
||||
|
||||
MODULE = PerlIO::utf8_strict
|
||||
|
||||
PROTOTYPES: DISABLE
|
||||
|
||||
BOOT:
|
||||
PerlIO_define_layer(aTHX_ (PerlIO_funcs*)&PerlIO_utf8_strict);
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
#!perl
|
||||
# This file was automatically generated by Dist::Zilla::Plugin::PodSyntaxTests.
|
||||
use strict; use warnings;
|
||||
use Test::More;
|
||||
use Test::Pod 1.41;
|
||||
|
||||
all_pod_files_ok();
|
Loading…
Reference in New Issue