wget/lib/nl_langinfo.c

/* nl_langinfo() replacement: query locale dependent information.

   Copyright (C) 2007-2023 Free Software Foundation, Inc.

   This file is free software: you can redistribute it and/or modify
   it under the terms of the GNU Lesser General Public License as
   published by the Free Software Foundation; either version 2.1 of the
   License, or (at your option) any later version.

   This file is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public License
   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */

#include <config.h>

/* Specification.  */
#include <langinfo.h>

#include <locale.h>
#include <stdlib.h>
#include <string.h>
#if defined _WIN32 && ! defined __CYGWIN__
# define WIN32_LEAN_AND_MEAN  /* avoid including junk */
# include <windows.h>
# include <stdio.h>
#endif

#if REPLACE_NL_LANGINFO && !NL_LANGINFO_MTSAFE
# if defined _WIN32 && !defined __CYGWIN__

#  define WIN32_LEAN_AND_MEAN  /* avoid including junk */
#  include <windows.h>

# elif HAVE_PTHREAD_API

#  include <pthread.h>
#  if HAVE_THREADS_H && HAVE_WEAK_SYMBOLS
#   include <threads.h>
#   pragma weak thrd_exit
#   define c11_threads_in_use() (thrd_exit != NULL)
#  else
#   define c11_threads_in_use() 0
#  endif

# elif HAVE_THREADS_H

#  include <threads.h>

# endif
#endif

/* nl_langinfo() must be multithread-safe.  To achieve this without using
   thread-local storage:
     1. We use a specific static buffer for each possible argument.
        So that different threads can call nl_langinfo with different arguments,
        without interfering.
     2. We use a simple strcpy or memcpy to fill this static buffer.  Filling it
        through, for example, strcpy + strcat would not be guaranteed to leave
        the buffer's contents intact if another thread is currently accessing
        it.  If necessary, the contents is first assembled in a stack-allocated
        buffer.  */

#if !REPLACE_NL_LANGINFO || GNULIB_defined_CODESET
/* Return the codeset of the current locale, if this is easily deducible.
   Otherwise, return "".  */
static char *
ctype_codeset (void)
{
  /* This function is only used on platforms which don't have uselocale().
     Therefore we don't need to look at the per-thread locale first, here.  */
  static char result[2 + 10 + 1];
  char buf[2 + 10 + 1];
  char locale[SETLOCALE_NULL_MAX];
  char *codeset;
  size_t codesetlen;

  if (setlocale_null_r (LC_CTYPE, locale, sizeof (locale)))
    locale[0] = '\0';

  codeset = buf;
  codeset[0] = '\0';

  if (locale[0])
    {
      /* If the locale name contains an encoding after the dot, return it.  */
      char *dot = strchr (locale, '.');

      if (dot)
        {
          /* Look for the possible @... trailer and remove it, if any.  */
          char *codeset_start = dot + 1;
          char const *modifier = strchr (codeset_start, '@');

          if (! modifier)
            codeset = codeset_start;
          else
            {
              codesetlen = modifier - codeset_start;
              if (codesetlen < sizeof buf)
                {
                  codeset = memcpy (buf, codeset_start, codesetlen);
                  codeset[codesetlen] = '\0';
                }
            }
        }
    }

# if defined _WIN32 && ! defined __CYGWIN__
  /* If setlocale is successful, it returns the number of the
     codepage, as a string.  Otherwise, fall back on Windows API
     GetACP, which returns the locale's codepage as a number (although
     this doesn't change according to what the 'setlocale' call specified).
     Either way, prepend "CP" to make it a valid codeset name.  */
  codesetlen = strlen (codeset);
  if (0 < codesetlen && codesetlen < sizeof buf - 2)
    memmove (buf + 2, codeset, codesetlen + 1);
  else
    sprintf (buf + 2, "%u", GetACP ());
  /* For a locale name such as "French_France.65001", in Windows 10,
     setlocale now returns "French_France.utf8" instead.  */
  if (strcmp (buf + 2, "65001") == 0 || strcmp (buf + 2, "utf8") == 0)
    return (char *) "UTF-8";
  else
    {
      memcpy (buf, "CP", 2);
      strcpy (result, buf);
      return result;
    }
# else
  strcpy (result, codeset);
  return result;
#endif
}
#endif


#if REPLACE_NL_LANGINFO

/* Override nl_langinfo with support for added nl_item values.  */

# undef nl_langinfo

/* Without locking, on Solaris 11.3, test-nl_langinfo-mt fails, with message
   "thread5 disturbed by threadN!", even when threadN invokes only
      nl_langinfo (CODESET);
      nl_langinfo (CRNCYSTR);
   Similarly on Solaris 10.  */

# if !NL_LANGINFO_MTSAFE /* Solaris */

#  define ITEMS (MAXSTRMSG + 1)
#  define MAX_RESULT_LEN 80

static char *
nl_langinfo_unlocked (nl_item item)
{
  static char result[ITEMS][MAX_RESULT_LEN];

  /* The result of nl_langinfo is in storage that can be overwritten by
     other calls to nl_langinfo.  */
  char *tmp = nl_langinfo (item);
  if (item >= 0 && item < ITEMS && tmp != NULL)
    {
      size_t tmp_len = strlen (tmp);
      if (tmp_len < MAX_RESULT_LEN)
        strcpy (result[item], tmp);
      else
        {
          /* Produce a truncated result.  Oh well...  */
          result[item][MAX_RESULT_LEN - 1] = '\0';
          memcpy (result[item], tmp, MAX_RESULT_LEN - 1);
        }
      return result[item];
    }
  else
    return tmp;
}

/* Use a lock, so that no two threads can invoke nl_langinfo_unlocked
   at the same time.  */

/* Prohibit renaming this symbol.  */
#  undef gl_get_nl_langinfo_lock

#  if defined _WIN32 && !defined __CYGWIN__

extern __declspec(dllimport) CRITICAL_SECTION *gl_get_nl_langinfo_lock (void);

static char *
nl_langinfo_with_lock (nl_item item)
{
  CRITICAL_SECTION *lock = gl_get_nl_langinfo_lock ();
  char *ret;

  EnterCriticalSection (lock);
  ret = nl_langinfo_unlocked (item);
  LeaveCriticalSection (lock);

  return ret;
}

#  elif HAVE_PTHREAD_API

extern
#   if defined _WIN32 || defined __CYGWIN__
  __declspec(dllimport)
#   endif
  pthread_mutex_t *gl_get_nl_langinfo_lock (void);

#   if HAVE_WEAK_SYMBOLS /* musl libc, FreeBSD, NetBSD, OpenBSD, Haiku */

     /* Avoid the need to link with '-lpthread'.  */
#    pragma weak pthread_mutex_lock
#    pragma weak pthread_mutex_unlock

     /* Determine whether libpthread is in use.  */
#    pragma weak pthread_mutexattr_gettype
     /* See the comments in lock.h.  */
#    define pthread_in_use() \
       (pthread_mutexattr_gettype != NULL || c11_threads_in_use ())

#   else
#    define pthread_in_use() 1
#   endif

static char *
nl_langinfo_with_lock (nl_item item)
{
  if (pthread_in_use())
    {
      pthread_mutex_t *lock = gl_get_nl_langinfo_lock ();
      char *ret;

      if (pthread_mutex_lock (lock))
        abort ();
      ret = nl_langinfo_unlocked (item);
      if (pthread_mutex_unlock (lock))
        abort ();

      return ret;
    }
  else
    return nl_langinfo_unlocked (item);
}

#  elif HAVE_THREADS_H

extern mtx_t *gl_get_nl_langinfo_lock (void);

static char *
nl_langinfo_with_lock (nl_item item)
{
  mtx_t *lock = gl_get_nl_langinfo_lock ();
  char *ret;

  if (mtx_lock (lock) != thrd_success)
    abort ();
  ret = nl_langinfo_unlocked (item);
  if (mtx_unlock (lock) != thrd_success)
    abort ();

  return ret;
}

#  endif

# else

/* On other platforms, no lock is needed.  */
#  define nl_langinfo_with_lock nl_langinfo

# endif

char *
rpl_nl_langinfo (nl_item item)
{
  switch (item)
    {
# if GNULIB_defined_CODESET
    case CODESET:
      return ctype_codeset ();
# endif
# if GNULIB_defined_T_FMT_AMPM
    case T_FMT_AMPM:
      return (char *) "%I:%M:%S %p";
# endif
# if GNULIB_defined_ALTMON
    case ALTMON_1:
    case ALTMON_2:
    case ALTMON_3:
    case ALTMON_4:
    case ALTMON_5:
    case ALTMON_6:
    case ALTMON_7:
    case ALTMON_8:
    case ALTMON_9:
    case ALTMON_10:
    case ALTMON_11:
    case ALTMON_12:
      /* We don't ship the appropriate localizations with gnulib.  Therefore,
         treat ALTMON_i like MON_i.  */
      item = item - ALTMON_1 + MON_1;
      break;
# endif
# if GNULIB_defined_ERA
    case ERA:
      /* The format is not standardized.  In glibc it is a sequence of strings
         of the form "direction:offset:start_date:end_date:era_name:era_format"
         with an empty string at the end.  */
      return (char *) "";
    case ERA_D_FMT:
      /* The %Ex conversion in strftime behaves like %x if the locale does not
         have an alternative time format.  */
      item = D_FMT;
      break;
    case ERA_D_T_FMT:
      /* The %Ec conversion in strftime behaves like %c if the locale does not
         have an alternative time format.  */
      item = D_T_FMT;
      break;
    case ERA_T_FMT:
      /* The %EX conversion in strftime behaves like %X if the locale does not
         have an alternative time format.  */
      item = T_FMT;
      break;
    case ALT_DIGITS:
      /* The format is not standardized.  In glibc it is a sequence of 10
         strings, appended in memory.  */
      return (char *) "\0\0\0\0\0\0\0\0\0\0";
# endif
# if GNULIB_defined_YESEXPR || !FUNC_NL_LANGINFO_YESEXPR_WORKS
    case YESEXPR:
      return (char *) "^[yY]";
    case NOEXPR:
      return (char *) "^[nN]";
# endif
    default:
      break;
    }
  return nl_langinfo_with_lock (item);
}

#else

/* Provide nl_langinfo from scratch, either for native MS-Windows, or
   for old Unix platforms without locales, such as Linux libc5 or
   BeOS.  */

# include <time.h>

char *
nl_langinfo (nl_item item)
{
  char buf[100];
  struct tm tmm = { 0 };

  switch (item)
    {
    /* nl_langinfo items of the LC_CTYPE category */
    case CODESET:
      {
        char *codeset = ctype_codeset ();
        if (*codeset)
          return codeset;
      }
# ifdef __BEOS__
      return (char *) "UTF-8";
# else
      return (char *) "ISO-8859-1";
# endif
    /* nl_langinfo items of the LC_NUMERIC category */
    case RADIXCHAR:
      return localeconv () ->decimal_point;
    case THOUSEP:
      return localeconv () ->thousands_sep;
# ifdef GROUPING
    case GROUPING:
      return localeconv () ->grouping;
# endif
    /* nl_langinfo items of the LC_TIME category.
       TODO: Really use the locale.  */
    case D_T_FMT:
    case ERA_D_T_FMT:
      return (char *) "%a %b %e %H:%M:%S %Y";
    case D_FMT:
    case ERA_D_FMT:
      return (char *) "%m/%d/%y";
    case T_FMT:
    case ERA_T_FMT:
      return (char *) "%H:%M:%S";
    case T_FMT_AMPM:
      return (char *) "%I:%M:%S %p";
    case AM_STR:
      {
        static char result[80];
        if (!strftime (buf, sizeof result, "%p", &tmm))
          return (char *) "AM";
        strcpy (result, buf);
        return result;
      }
    case PM_STR:
      {
        static char result[80];
        tmm.tm_hour = 12;
        if (!strftime (buf, sizeof result, "%p", &tmm))
          return (char *) "PM";
        strcpy (result, buf);
        return result;
      }
    case DAY_1:
    case DAY_2:
    case DAY_3:
    case DAY_4:
    case DAY_5:
    case DAY_6:
    case DAY_7:
      {
        static char result[7][50];
        static char const days[][sizeof "Wednesday"] = {
          "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday",
          "Friday", "Saturday"
        };
        tmm.tm_wday = item - DAY_1;
        if (!strftime (buf, sizeof result[0], "%A", &tmm))
          return (char *) days[item - DAY_1];
        strcpy (result[item - DAY_1], buf);
        return result[item - DAY_1];
      }
    case ABDAY_1:
    case ABDAY_2:
    case ABDAY_3:
    case ABDAY_4:
    case ABDAY_5:
    case ABDAY_6:
    case ABDAY_7:
      {
        static char result[7][30];
        static char const abdays[][sizeof "Sun"] = {
          "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
        };
        tmm.tm_wday = item - ABDAY_1;
        if (!strftime (buf, sizeof result[0], "%a", &tmm))
          return (char *) abdays[item - ABDAY_1];
        strcpy (result[item - ABDAY_1], buf);
        return result[item - ABDAY_1];
      }
    {
      static char const months[][sizeof "September"] = {
        "January", "February", "March", "April", "May", "June", "July",
        "September", "October", "November", "December"
      };
      case MON_1:
      case MON_2:
      case MON_3:
      case MON_4:
      case MON_5:
      case MON_6:
      case MON_7:
      case MON_8:
      case MON_9:
      case MON_10:
      case MON_11:
      case MON_12:
        {
          static char result[12][50];
          tmm.tm_mon = item - MON_1;
          if (!strftime (buf, sizeof result[0], "%B", &tmm))
            return (char *) months[item - MON_1];
          strcpy (result[item - MON_1], buf);
          return result[item - MON_1];
        }
      case ALTMON_1:
      case ALTMON_2:
      case ALTMON_3:
      case ALTMON_4:
      case ALTMON_5:
      case ALTMON_6:
      case ALTMON_7:
      case ALTMON_8:
      case ALTMON_9:
      case ALTMON_10:
      case ALTMON_11:
      case ALTMON_12:
        {
          static char result[12][50];
          tmm.tm_mon = item - ALTMON_1;
          /* The platforms without nl_langinfo() don't support strftime with
             %OB.  We don't even need to try.  */
          #if 0
          if (!strftime (buf, sizeof result[0], "%OB", &tmm))
          #endif
            if (!strftime (buf, sizeof result[0], "%B", &tmm))
              return (char *) months[item - ALTMON_1];
          strcpy (result[item - ALTMON_1], buf);
          return result[item - ALTMON_1];
        }
    }
    case ABMON_1:
    case ABMON_2:
    case ABMON_3:
    case ABMON_4:
    case ABMON_5:
    case ABMON_6:
    case ABMON_7:
    case ABMON_8:
    case ABMON_9:
    case ABMON_10:
    case ABMON_11:
    case ABMON_12:
      {
        static char result[12][30];
        static char const abmonths[][sizeof "Jan"] = {
          "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul",
          "Sep", "Oct", "Nov", "Dec"
        };
        tmm.tm_mon = item - ABMON_1;
        if (!strftime (buf, sizeof result[0], "%b", &tmm))
          return (char *) abmonths[item - ABMON_1];
        strcpy (result[item - ABMON_1], buf);
        return result[item - ABMON_1];
      }
    case ERA:
      return (char *) "";
    case ALT_DIGITS:
      return (char *) "\0\0\0\0\0\0\0\0\0\0";
    /* nl_langinfo items of the LC_MONETARY category.  */
    case CRNCYSTR:
      return localeconv () ->currency_symbol;
# ifdef INT_CURR_SYMBOL
    case INT_CURR_SYMBOL:
      return localeconv () ->int_curr_symbol;
    case MON_DECIMAL_POINT:
      return localeconv () ->mon_decimal_point;
    case MON_THOUSANDS_SEP:
      return localeconv () ->mon_thousands_sep;
    case MON_GROUPING:
      return localeconv () ->mon_grouping;
    case POSITIVE_SIGN:
      return localeconv () ->positive_sign;
    case NEGATIVE_SIGN:
      return localeconv () ->negative_sign;
    case FRAC_DIGITS:
      return & localeconv () ->frac_digits;
    case INT_FRAC_DIGITS:
      return & localeconv () ->int_frac_digits;
    case P_CS_PRECEDES:
      return & localeconv () ->p_cs_precedes;
    case N_CS_PRECEDES:
      return & localeconv () ->n_cs_precedes;
    case P_SEP_BY_SPACE:
      return & localeconv () ->p_sep_by_space;
    case N_SEP_BY_SPACE:
      return & localeconv () ->n_sep_by_space;
    case P_SIGN_POSN:
      return & localeconv () ->p_sign_posn;
    case N_SIGN_POSN:
      return & localeconv () ->n_sign_posn;
# endif
    /* nl_langinfo items of the LC_MESSAGES category
       TODO: Really use the locale. */
    case YESEXPR:
      return (char *) "^[yY]";
    case NOEXPR:
      return (char *) "^[nN]";
    default:
      return (char *) "";
    }
}

#endif