mirror of https://gitee.com/openkylin/wget.git
462 lines
20 KiB
C
462 lines
20 KiB
C
|
/* Unicode character case mappings.
|
||
|
Copyright (C) 2002, 2009-2019 Free Software Foundation, Inc.
|
||
|
|
||
|
This program is free software: you can redistribute it and/or modify it
|
||
|
under the terms of the GNU General Public License as published
|
||
|
by the Free Software Foundation; either version 3 of the License, or
|
||
|
(at your option) any later version.
|
||
|
|
||
|
This program is distributed in the hope that it will be useful,
|
||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
|
General Public License for more details.
|
||
|
|
||
|
You should have received a copy of the GNU General Public License
|
||
|
along with this program. If not, see <https://www.gnu.org/licenses/>. */
|
||
|
|
||
|
#ifndef _UNICASE_H
|
||
|
#define _UNICASE_H
|
||
|
|
||
|
#include "unitypes.h"
|
||
|
|
||
|
/* Get bool. */
|
||
|
#include <stdbool.h>
|
||
|
|
||
|
/* Get size_t. */
|
||
|
#include <stddef.h>
|
||
|
|
||
|
/* Get uninorm_t. */
|
||
|
#include "uninorm.h"
|
||
|
|
||
|
#ifdef __cplusplus
|
||
|
extern "C" {
|
||
|
#endif
|
||
|
|
||
|
/* ========================================================================= */
|
||
|
|
||
|
/* Character case mappings.
|
||
|
These mappings are locale and context independent.
|
||
|
WARNING! These functions are not sufficient for languages such as German.
|
||
|
Better use the functions below that treat an entire string at once and are
|
||
|
language aware. */
|
||
|
|
||
|
/* Return the uppercase mapping of a Unicode character. */
|
||
|
extern ucs4_t
|
||
|
uc_toupper (ucs4_t uc)
|
||
|
_UC_ATTRIBUTE_CONST;
|
||
|
|
||
|
/* Return the lowercase mapping of a Unicode character. */
|
||
|
extern ucs4_t
|
||
|
uc_tolower (ucs4_t uc)
|
||
|
_UC_ATTRIBUTE_CONST;
|
||
|
|
||
|
/* Return the titlecase mapping of a Unicode character. */
|
||
|
extern ucs4_t
|
||
|
uc_totitle (ucs4_t uc)
|
||
|
_UC_ATTRIBUTE_CONST;
|
||
|
|
||
|
/* ========================================================================= */
|
||
|
|
||
|
/* String case mappings. */
|
||
|
|
||
|
/* These functions are locale dependent. The iso639_language argument
|
||
|
identifies the language (e.g. "tr" for Turkish). NULL means to use
|
||
|
locale independent case mappings. */
|
||
|
|
||
|
/* Return the ISO 639 language code of the current locale.
|
||
|
Return "" if it is unknown, or in the "C" locale. */
|
||
|
extern const char *
|
||
|
uc_locale_language (void)
|
||
|
_UC_ATTRIBUTE_PURE;
|
||
|
|
||
|
/* Conventions:
|
||
|
|
||
|
All functions prefixed with u8_ operate on UTF-8 encoded strings.
|
||
|
Their unit is an uint8_t (1 byte).
|
||
|
|
||
|
All functions prefixed with u16_ operate on UTF-16 encoded strings.
|
||
|
Their unit is an uint16_t (a 2-byte word).
|
||
|
|
||
|
All functions prefixed with u32_ operate on UCS-4 encoded strings.
|
||
|
Their unit is an uint32_t (a 4-byte word).
|
||
|
|
||
|
All argument pairs (s, n) denote a Unicode string s[0..n-1] with exactly
|
||
|
n units.
|
||
|
|
||
|
Functions returning a string result take a (resultbuf, lengthp) argument
|
||
|
pair. If resultbuf is not NULL and the result fits into *lengthp units,
|
||
|
it is put in resultbuf, and resultbuf is returned. Otherwise, a freshly
|
||
|
allocated string is returned. In both cases, *lengthp is set to the
|
||
|
length (number of units) of the returned string. In case of error,
|
||
|
NULL is returned and errno is set. */
|
||
|
|
||
|
/* Return the uppercase mapping of a string.
|
||
|
The nf argument identifies the normalization form to apply after the
|
||
|
case-mapping. It can also be NULL, for no normalization. */
|
||
|
extern uint8_t *
|
||
|
u8_toupper (const uint8_t *s, size_t n, const char *iso639_language,
|
||
|
uninorm_t nf,
|
||
|
uint8_t *resultbuf, size_t *lengthp);
|
||
|
extern uint16_t *
|
||
|
u16_toupper (const uint16_t *s, size_t n, const char *iso639_language,
|
||
|
uninorm_t nf,
|
||
|
uint16_t *resultbuf, size_t *lengthp);
|
||
|
extern uint32_t *
|
||
|
u32_toupper (const uint32_t *s, size_t n, const char *iso639_language,
|
||
|
uninorm_t nf,
|
||
|
uint32_t *resultbuf, size_t *lengthp);
|
||
|
|
||
|
/* Return the lowercase mapping of a string.
|
||
|
The nf argument identifies the normalization form to apply after the
|
||
|
case-mapping. It can also be NULL, for no normalization. */
|
||
|
extern uint8_t *
|
||
|
u8_tolower (const uint8_t *s, size_t n, const char *iso639_language,
|
||
|
uninorm_t nf,
|
||
|
uint8_t *resultbuf, size_t *lengthp);
|
||
|
extern uint16_t *
|
||
|
u16_tolower (const uint16_t *s, size_t n, const char *iso639_language,
|
||
|
uninorm_t nf,
|
||
|
uint16_t *resultbuf, size_t *lengthp);
|
||
|
extern uint32_t *
|
||
|
u32_tolower (const uint32_t *s, size_t n, const char *iso639_language,
|
||
|
uninorm_t nf,
|
||
|
uint32_t *resultbuf, size_t *lengthp);
|
||
|
|
||
|
/* Return the titlecase mapping of a string.
|
||
|
The nf argument identifies the normalization form to apply after the
|
||
|
case-mapping. It can also be NULL, for no normalization. */
|
||
|
extern uint8_t *
|
||
|
u8_totitle (const uint8_t *s, size_t n, const char *iso639_language,
|
||
|
uninorm_t nf,
|
||
|
uint8_t *resultbuf, size_t *lengthp);
|
||
|
extern uint16_t *
|
||
|
u16_totitle (const uint16_t *s, size_t n, const char *iso639_language,
|
||
|
uninorm_t nf,
|
||
|
uint16_t *resultbuf, size_t *lengthp);
|
||
|
extern uint32_t *
|
||
|
u32_totitle (const uint32_t *s, size_t n, const char *iso639_language,
|
||
|
uninorm_t nf,
|
||
|
uint32_t *resultbuf, size_t *lengthp);
|
||
|
|
||
|
/* The case-mapping context given by a prefix string. */
|
||
|
typedef struct casing_prefix_context
|
||
|
{
|
||
|
/* These fields are private, undocumented. */
|
||
|
uint32_t last_char_except_ignorable;
|
||
|
uint32_t last_char_normal_or_above;
|
||
|
}
|
||
|
casing_prefix_context_t;
|
||
|
/* The case-mapping context of the empty prefix string. */
|
||
|
extern const casing_prefix_context_t unicase_empty_prefix_context;
|
||
|
/* Return the case-mapping context of a given prefix string. */
|
||
|
extern casing_prefix_context_t
|
||
|
u8_casing_prefix_context (const uint8_t *s, size_t n);
|
||
|
extern casing_prefix_context_t
|
||
|
u16_casing_prefix_context (const uint16_t *s, size_t n);
|
||
|
extern casing_prefix_context_t
|
||
|
u32_casing_prefix_context (const uint32_t *s, size_t n);
|
||
|
/* Return the case-mapping context of the prefix concat(A, S), given the
|
||
|
case-mapping context of the prefix A. */
|
||
|
extern casing_prefix_context_t
|
||
|
u8_casing_prefixes_context (const uint8_t *s, size_t n,
|
||
|
casing_prefix_context_t a_context);
|
||
|
extern casing_prefix_context_t
|
||
|
u16_casing_prefixes_context (const uint16_t *s, size_t n,
|
||
|
casing_prefix_context_t a_context);
|
||
|
extern casing_prefix_context_t
|
||
|
u32_casing_prefixes_context (const uint32_t *s, size_t n,
|
||
|
casing_prefix_context_t a_context);
|
||
|
|
||
|
/* The case-mapping context given by a suffix string. */
|
||
|
typedef struct casing_suffix_context
|
||
|
{
|
||
|
/* These fields are private, undocumented. */
|
||
|
uint32_t first_char_except_ignorable;
|
||
|
uint32_t bits;
|
||
|
}
|
||
|
casing_suffix_context_t;
|
||
|
/* The case-mapping context of the empty suffix string. */
|
||
|
extern const casing_suffix_context_t unicase_empty_suffix_context;
|
||
|
/* Return the case-mapping context of a given suffix string. */
|
||
|
extern casing_suffix_context_t
|
||
|
u8_casing_suffix_context (const uint8_t *s, size_t n);
|
||
|
extern casing_suffix_context_t
|
||
|
u16_casing_suffix_context (const uint16_t *s, size_t n);
|
||
|
extern casing_suffix_context_t
|
||
|
u32_casing_suffix_context (const uint32_t *s, size_t n);
|
||
|
/* Return the case-mapping context of the suffix concat(S, A), given the
|
||
|
case-mapping context of the suffix A. */
|
||
|
extern casing_suffix_context_t
|
||
|
u8_casing_suffixes_context (const uint8_t *s, size_t n,
|
||
|
casing_suffix_context_t a_context);
|
||
|
extern casing_suffix_context_t
|
||
|
u16_casing_suffixes_context (const uint16_t *s, size_t n,
|
||
|
casing_suffix_context_t a_context);
|
||
|
extern casing_suffix_context_t
|
||
|
u32_casing_suffixes_context (const uint32_t *s, size_t n,
|
||
|
casing_suffix_context_t a_context);
|
||
|
|
||
|
/* Return the uppercase mapping of a string that is surrounded by a prefix
|
||
|
and a suffix. */
|
||
|
extern uint8_t *
|
||
|
u8_ct_toupper (const uint8_t *s, size_t n,
|
||
|
casing_prefix_context_t prefix_context,
|
||
|
casing_suffix_context_t suffix_context,
|
||
|
const char *iso639_language,
|
||
|
uninorm_t nf,
|
||
|
uint8_t *resultbuf, size_t *lengthp);
|
||
|
extern uint16_t *
|
||
|
u16_ct_toupper (const uint16_t *s, size_t n,
|
||
|
casing_prefix_context_t prefix_context,
|
||
|
casing_suffix_context_t suffix_context,
|
||
|
const char *iso639_language,
|
||
|
uninorm_t nf,
|
||
|
uint16_t *resultbuf, size_t *lengthp);
|
||
|
extern uint32_t *
|
||
|
u32_ct_toupper (const uint32_t *s, size_t n,
|
||
|
casing_prefix_context_t prefix_context,
|
||
|
casing_suffix_context_t suffix_context,
|
||
|
const char *iso639_language,
|
||
|
uninorm_t nf,
|
||
|
uint32_t *resultbuf, size_t *lengthp);
|
||
|
|
||
|
/* Return the lowercase mapping of a string that is surrounded by a prefix
|
||
|
and a suffix. */
|
||
|
extern uint8_t *
|
||
|
u8_ct_tolower (const uint8_t *s, size_t n,
|
||
|
casing_prefix_context_t prefix_context,
|
||
|
casing_suffix_context_t suffix_context,
|
||
|
const char *iso639_language,
|
||
|
uninorm_t nf,
|
||
|
uint8_t *resultbuf, size_t *lengthp);
|
||
|
extern uint16_t *
|
||
|
u16_ct_tolower (const uint16_t *s, size_t n,
|
||
|
casing_prefix_context_t prefix_context,
|
||
|
casing_suffix_context_t suffix_context,
|
||
|
const char *iso639_language,
|
||
|
uninorm_t nf,
|
||
|
uint16_t *resultbuf, size_t *lengthp);
|
||
|
extern uint32_t *
|
||
|
u32_ct_tolower (const uint32_t *s, size_t n,
|
||
|
casing_prefix_context_t prefix_context,
|
||
|
casing_suffix_context_t suffix_context,
|
||
|
const char *iso639_language,
|
||
|
uninorm_t nf,
|
||
|
uint32_t *resultbuf, size_t *lengthp);
|
||
|
|
||
|
/* Return the titlecase mapping of a string that is surrounded by a prefix
|
||
|
and a suffix. */
|
||
|
extern uint8_t *
|
||
|
u8_ct_totitle (const uint8_t *s, size_t n,
|
||
|
casing_prefix_context_t prefix_context,
|
||
|
casing_suffix_context_t suffix_context,
|
||
|
const char *iso639_language,
|
||
|
uninorm_t nf,
|
||
|
uint8_t *resultbuf, size_t *lengthp);
|
||
|
extern uint16_t *
|
||
|
u16_ct_totitle (const uint16_t *s, size_t n,
|
||
|
casing_prefix_context_t prefix_context,
|
||
|
casing_suffix_context_t suffix_context,
|
||
|
const char *iso639_language,
|
||
|
uninorm_t nf,
|
||
|
uint16_t *resultbuf, size_t *lengthp);
|
||
|
extern uint32_t *
|
||
|
u32_ct_totitle (const uint32_t *s, size_t n,
|
||
|
casing_prefix_context_t prefix_context,
|
||
|
casing_suffix_context_t suffix_context,
|
||
|
const char *iso639_language,
|
||
|
uninorm_t nf,
|
||
|
uint32_t *resultbuf, size_t *lengthp);
|
||
|
|
||
|
/* Return the case folded string.
|
||
|
Comparing uN_casefold (S1) and uN_casefold (S2) with uN_cmp2() is equivalent
|
||
|
to comparing S1 and S2 with uN_casecmp().
|
||
|
The nf argument identifies the normalization form to apply after the
|
||
|
case-mapping. It can also be NULL, for no normalization. */
|
||
|
extern uint8_t *
|
||
|
u8_casefold (const uint8_t *s, size_t n, const char *iso639_language,
|
||
|
uninorm_t nf,
|
||
|
uint8_t *resultbuf, size_t *lengthp);
|
||
|
extern uint16_t *
|
||
|
u16_casefold (const uint16_t *s, size_t n, const char *iso639_language,
|
||
|
uninorm_t nf,
|
||
|
uint16_t *resultbuf, size_t *lengthp);
|
||
|
extern uint32_t *
|
||
|
u32_casefold (const uint32_t *s, size_t n, const char *iso639_language,
|
||
|
uninorm_t nf,
|
||
|
uint32_t *resultbuf, size_t *lengthp);
|
||
|
/* Likewise, for a string that is surrounded by a prefix and a suffix. */
|
||
|
extern uint8_t *
|
||
|
u8_ct_casefold (const uint8_t *s, size_t n,
|
||
|
casing_prefix_context_t prefix_context,
|
||
|
casing_suffix_context_t suffix_context,
|
||
|
const char *iso639_language,
|
||
|
uninorm_t nf,
|
||
|
uint8_t *resultbuf, size_t *lengthp);
|
||
|
extern uint16_t *
|
||
|
u16_ct_casefold (const uint16_t *s, size_t n,
|
||
|
casing_prefix_context_t prefix_context,
|
||
|
casing_suffix_context_t suffix_context,
|
||
|
const char *iso639_language,
|
||
|
uninorm_t nf,
|
||
|
uint16_t *resultbuf, size_t *lengthp);
|
||
|
extern uint32_t *
|
||
|
u32_ct_casefold (const uint32_t *s, size_t n,
|
||
|
casing_prefix_context_t prefix_context,
|
||
|
casing_suffix_context_t suffix_context,
|
||
|
const char *iso639_language,
|
||
|
uninorm_t nf,
|
||
|
uint32_t *resultbuf, size_t *lengthp);
|
||
|
|
||
|
/* Compare S1 and S2, ignoring differences in case and normalization.
|
||
|
The nf argument identifies the normalization form to apply after the
|
||
|
case-mapping. It can also be NULL, for no normalization.
|
||
|
If successful, set *RESULTP to -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2, and
|
||
|
return 0. Upon failure, return -1 with errno set. */
|
||
|
extern int
|
||
|
u8_casecmp (const uint8_t *s1, size_t n1,
|
||
|
const uint8_t *s2, size_t n2,
|
||
|
const char *iso639_language, uninorm_t nf, int *resultp);
|
||
|
extern int
|
||
|
u16_casecmp (const uint16_t *s1, size_t n1,
|
||
|
const uint16_t *s2, size_t n2,
|
||
|
const char *iso639_language, uninorm_t nf, int *resultp);
|
||
|
extern int
|
||
|
u32_casecmp (const uint32_t *s1, size_t n1,
|
||
|
const uint32_t *s2, size_t n2,
|
||
|
const char *iso639_language, uninorm_t nf, int *resultp);
|
||
|
extern int
|
||
|
ulc_casecmp (const char *s1, size_t n1,
|
||
|
const char *s2, size_t n2,
|
||
|
const char *iso639_language, uninorm_t nf, int *resultp);
|
||
|
|
||
|
/* Convert the string S of length N to a NUL-terminated byte sequence, in such
|
||
|
a way that comparing uN_casexfrm (S1) and uN_casexfrm (S2) with the gnulib
|
||
|
function memcmp2() is equivalent to comparing S1 and S2 with uN_casecoll().
|
||
|
NF must be either UNINORM_NFC, UNINORM_NFKC, or NULL for no normalization. */
|
||
|
extern char *
|
||
|
u8_casexfrm (const uint8_t *s, size_t n, const char *iso639_language,
|
||
|
uninorm_t nf, char *resultbuf, size_t *lengthp);
|
||
|
extern char *
|
||
|
u16_casexfrm (const uint16_t *s, size_t n, const char *iso639_language,
|
||
|
uninorm_t nf, char *resultbuf, size_t *lengthp);
|
||
|
extern char *
|
||
|
u32_casexfrm (const uint32_t *s, size_t n, const char *iso639_language,
|
||
|
uninorm_t nf, char *resultbuf, size_t *lengthp);
|
||
|
extern char *
|
||
|
ulc_casexfrm (const char *s, size_t n, const char *iso639_language,
|
||
|
uninorm_t nf, char *resultbuf, size_t *lengthp);
|
||
|
|
||
|
/* Compare S1 and S2, ignoring differences in case and normalization, using the
|
||
|
collation rules of the current locale.
|
||
|
The nf argument identifies the normalization form to apply after the
|
||
|
case-mapping. It must be either UNINORM_NFC or UNINORM_NFKC. It can also
|
||
|
be NULL, for no normalization.
|
||
|
If successful, set *RESULTP to -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2, and
|
||
|
return 0. Upon failure, return -1 with errno set. */
|
||
|
extern int
|
||
|
u8_casecoll (const uint8_t *s1, size_t n1,
|
||
|
const uint8_t *s2, size_t n2,
|
||
|
const char *iso639_language, uninorm_t nf, int *resultp);
|
||
|
extern int
|
||
|
u16_casecoll (const uint16_t *s1, size_t n1,
|
||
|
const uint16_t *s2, size_t n2,
|
||
|
const char *iso639_language, uninorm_t nf, int *resultp);
|
||
|
extern int
|
||
|
u32_casecoll (const uint32_t *s1, size_t n1,
|
||
|
const uint32_t *s2, size_t n2,
|
||
|
const char *iso639_language, uninorm_t nf, int *resultp);
|
||
|
extern int
|
||
|
ulc_casecoll (const char *s1, size_t n1,
|
||
|
const char *s2, size_t n2,
|
||
|
const char *iso639_language, uninorm_t nf, int *resultp);
|
||
|
|
||
|
|
||
|
/* Set *RESULTP to true if mapping NFD(S) to upper case is a no-op, or to false
|
||
|
otherwise, and return 0. Upon failure, return -1 with errno set. */
|
||
|
extern int
|
||
|
u8_is_uppercase (const uint8_t *s, size_t n,
|
||
|
const char *iso639_language,
|
||
|
bool *resultp);
|
||
|
extern int
|
||
|
u16_is_uppercase (const uint16_t *s, size_t n,
|
||
|
const char *iso639_language,
|
||
|
bool *resultp);
|
||
|
extern int
|
||
|
u32_is_uppercase (const uint32_t *s, size_t n,
|
||
|
const char *iso639_language,
|
||
|
bool *resultp);
|
||
|
|
||
|
/* Set *RESULTP to true if mapping NFD(S) to lower case is a no-op, or to false
|
||
|
otherwise, and return 0. Upon failure, return -1 with errno set. */
|
||
|
extern int
|
||
|
u8_is_lowercase (const uint8_t *s, size_t n,
|
||
|
const char *iso639_language,
|
||
|
bool *resultp);
|
||
|
extern int
|
||
|
u16_is_lowercase (const uint16_t *s, size_t n,
|
||
|
const char *iso639_language,
|
||
|
bool *resultp);
|
||
|
extern int
|
||
|
u32_is_lowercase (const uint32_t *s, size_t n,
|
||
|
const char *iso639_language,
|
||
|
bool *resultp);
|
||
|
|
||
|
/* Set *RESULTP to true if mapping NFD(S) to title case is a no-op, or to false
|
||
|
otherwise, and return 0. Upon failure, return -1 with errno set. */
|
||
|
extern int
|
||
|
u8_is_titlecase (const uint8_t *s, size_t n,
|
||
|
const char *iso639_language,
|
||
|
bool *resultp);
|
||
|
extern int
|
||
|
u16_is_titlecase (const uint16_t *s, size_t n,
|
||
|
const char *iso639_language,
|
||
|
bool *resultp);
|
||
|
extern int
|
||
|
u32_is_titlecase (const uint32_t *s, size_t n,
|
||
|
const char *iso639_language,
|
||
|
bool *resultp);
|
||
|
|
||
|
/* Set *RESULTP to true if applying case folding to NFD(S) is a no-op, or to
|
||
|
false otherwise, and return 0. Upon failure, return -1 with errno set. */
|
||
|
extern int
|
||
|
u8_is_casefolded (const uint8_t *s, size_t n,
|
||
|
const char *iso639_language,
|
||
|
bool *resultp);
|
||
|
extern int
|
||
|
u16_is_casefolded (const uint16_t *s, size_t n,
|
||
|
const char *iso639_language,
|
||
|
bool *resultp);
|
||
|
extern int
|
||
|
u32_is_casefolded (const uint32_t *s, size_t n,
|
||
|
const char *iso639_language,
|
||
|
bool *resultp);
|
||
|
|
||
|
/* Set *RESULTP to true if case matters for S, that is, if mapping NFD(S) to
|
||
|
either upper case or lower case or title case is not a no-op.
|
||
|
Set *RESULTP to false if NFD(S) maps to itself under the upper case mapping,
|
||
|
under the lower case mapping, and under the title case mapping; in other
|
||
|
words, when NFD(S) consists entirely of caseless characters.
|
||
|
Upon failure, return -1 with errno set. */
|
||
|
extern int
|
||
|
u8_is_cased (const uint8_t *s, size_t n,
|
||
|
const char *iso639_language,
|
||
|
bool *resultp);
|
||
|
extern int
|
||
|
u16_is_cased (const uint16_t *s, size_t n,
|
||
|
const char *iso639_language,
|
||
|
bool *resultp);
|
||
|
extern int
|
||
|
u32_is_cased (const uint32_t *s, size_t n,
|
||
|
const char *iso639_language,
|
||
|
bool *resultp);
|
||
|
|
||
|
|
||
|
/* ========================================================================= */
|
||
|
|
||
|
#ifdef __cplusplus
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
#endif /* _UNICASE_H */
|