aosp12/bionic/tests/iconv_test.cpp

/*
 * Copyright (C) 2017 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <gtest/gtest.h>

#include <iconv.h>

#define INVALID_ICONV_T reinterpret_cast<iconv_t>(-1)

TEST(iconv, iconv_open_EINVAL) {
  errno = 0;
  ASSERT_EQ(INVALID_ICONV_T, iconv_open("silly", "silly"));
  ASSERT_EQ(EINVAL, errno);
  errno = 0;
  ASSERT_EQ(INVALID_ICONV_T, iconv_open("silly", "UTF-8"));
  ASSERT_EQ(EINVAL, errno);
  errno = 0;
  ASSERT_EQ(INVALID_ICONV_T, iconv_open("UTF-8", "silly"));
  ASSERT_EQ(EINVAL, errno);
}

TEST(iconv, iconv_open_comparator) {
  // Examples from http://www.unicode.org/reports/tr22/#Charset_Alias_Matching:
  // "For example, the following names should match: "UTF-8", "utf8", "u.t.f-008", ..."
  iconv_t c;
  ASSERT_NE(INVALID_ICONV_T, c = iconv_open("UTF-8", "utf8"));
  ASSERT_EQ(0, iconv_close(c));
  ASSERT_NE(INVALID_ICONV_T, c = iconv_open("UTF-8", "u.t.f-008"));
  ASSERT_EQ(0, iconv_close(c));

  // "...but not "utf-80" or "ut8"."
  errno = 0;
  ASSERT_EQ(INVALID_ICONV_T, iconv_open("UTF-8", "utf-80"));
  ASSERT_EQ(EINVAL, errno);
  errno = 0;
  ASSERT_EQ(INVALID_ICONV_T, iconv_open("UTF-8", "ut80"));
  ASSERT_EQ(EINVAL, errno);
}

TEST(iconv, iconv_smoke) {
  const char* utf8 = "a٦ᄀ"; // U+0666 ٦ 0xd9 0xa6 // U+1100 ᄀ 0xe1 0x84 0x80
  char buf[BUFSIZ] = {};

  iconv_t c = iconv_open("UTF-32LE", "UTF-8");
  ASSERT_NE(INVALID_ICONV_T, c);

  char* in = const_cast<char*>(utf8);
  size_t in_bytes = strlen(in);

  char* out = buf;
  size_t out_bytes = sizeof(buf);

  EXPECT_EQ(0U, iconv(c, &in, &in_bytes, &out, &out_bytes));

  wchar_t* utf16 = reinterpret_cast<wchar_t*>(buf);
  EXPECT_EQ(L'a', utf16[0]);
  EXPECT_EQ(L'٦', utf16[1]);
  EXPECT_EQ(L'ᄀ', utf16[2]);
  EXPECT_EQ(L'\0', utf16[3]);
  EXPECT_EQ(0U, in_bytes);
  EXPECT_EQ(sizeof(buf) - (3 /* chars */ * 4 /* bytes each */), out_bytes);

  ASSERT_EQ(0, iconv_close(c));
}

TEST(iconv, iconv_lossy_TRANSLIT) {
  const char* utf8 = "a٦ᄀz"; // U+0666 ٦ 0xd9 0xa6 // U+1100 ᄀ 0xe1 0x84 0x80
  char buf[BUFSIZ] = {};

  iconv_t c = iconv_open("ASCII//TRANSLIT", "UTF-8");
  ASSERT_NE(INVALID_ICONV_T, c);

  char* in = const_cast<char*>(utf8);
  size_t in_bytes = strlen(in);

  char* out = buf;
  size_t out_bytes = sizeof(buf);

  // Two of the input characters (5 input bytes) aren't representable as ASCII.
  // With "//TRANSLIT", we use a replacement character, and report the number
  // of replacements.
  EXPECT_EQ(2U, iconv(c, &in, &in_bytes, &out, &out_bytes));

  EXPECT_EQ('a', buf[0]);
  EXPECT_EQ('?', buf[1]);
  EXPECT_EQ('?', buf[2]);
  EXPECT_EQ('z', buf[3]);
  EXPECT_EQ(0, buf[4]);
  EXPECT_EQ(0U, in_bytes);
  EXPECT_EQ(sizeof(buf) - 4, out_bytes);

  ASSERT_EQ(0, iconv_close(c));
}

TEST(iconv, iconv_lossy_IGNORE) {
  const char* utf8 = "a٦ᄀz"; // U+0666 ٦ 0xd9 0xa6 // U+1100 ᄀ 0xe1 0x84 0x80
  char buf[BUFSIZ] = {};

  iconv_t c = iconv_open("ASCII//IGNORE", "UTF-8");
  ASSERT_NE(INVALID_ICONV_T, c);

  char* in = const_cast<char*>(utf8);
  size_t in_bytes = strlen(in);

  char* out = buf;
  size_t out_bytes = sizeof(buf);

  // Two of the input characters (5 input bytes) aren't representable as ASCII.
  // With "//IGNORE", we just skip them (but return failure).
  errno = 0;
  EXPECT_EQ(static_cast<size_t>(-1), iconv(c, &in, &in_bytes, &out, &out_bytes));
  EXPECT_EQ(EILSEQ, errno);

  EXPECT_EQ('a', buf[0]);
  EXPECT_EQ('z', buf[1]);
  EXPECT_EQ(0, buf[2]);
  EXPECT_EQ(0U, in_bytes);
  EXPECT_EQ(sizeof(buf) - 2, out_bytes);

  ASSERT_EQ(0, iconv_close(c));
}

TEST(iconv, iconv_lossy) {
  const char* utf8 = "a٦ᄀz"; // U+0666 ٦ 0xd9 0xa6 // U+1100 ᄀ 0xe1 0x84 0x80
  char buf[BUFSIZ] = {};

  iconv_t c = iconv_open("ASCII", "UTF-8");
  ASSERT_NE(INVALID_ICONV_T, c);

  char* in = const_cast<char*>(utf8);
  size_t in_bytes = strlen(in);

  char* out = buf;
  size_t out_bytes = sizeof(buf);

  // The second input character isn't representable as ASCII, so we stop there.
  errno = 0;
  EXPECT_EQ(static_cast<size_t>(-1), iconv(c, &in, &in_bytes, &out, &out_bytes));
  EXPECT_EQ(EILSEQ, errno);

  EXPECT_EQ('a', buf[0]);
  EXPECT_EQ(0, buf[1]);
  EXPECT_EQ(6U, in_bytes); // Two bytes for ٦, three bytes for ᄀ, and one byte for z.
  EXPECT_EQ(sizeof(buf) - 1, out_bytes);

  ASSERT_EQ(0, iconv_close(c));
}

TEST(iconv, iconv_malformed_sequence_EILSEQ) {
  const char* utf8 = "a\xd9z"; // 0xd9 is the first byte of the two-byte U+0666 ٦.
  char buf[BUFSIZ] = {};

  iconv_t c = iconv_open("UTF-8", "UTF-8");
  ASSERT_NE(INVALID_ICONV_T, c);

  char* in = const_cast<char*>(utf8);
  size_t in_bytes = strlen(in);

  char* out = buf;
  size_t out_bytes = sizeof(buf);

  // The second input byte is a malformed character, so we stop there.
  errno = 0;
  EXPECT_EQ(static_cast<size_t>(-1), iconv(c, &in, &in_bytes, &out, &out_bytes));
  EXPECT_EQ(EILSEQ, errno);
  EXPECT_EQ('\xd9', *in); // *in is left pointing to the start of the invalid sequence.
  ++in;
  --in_bytes;
  errno = 0;
  EXPECT_EQ(0U, iconv(c, &in, &in_bytes, &out, &out_bytes));
  EXPECT_EQ(0, errno);

  EXPECT_EQ('a', buf[0]);
  EXPECT_EQ('z', buf[1]);
  EXPECT_EQ(0, buf[2]);
  EXPECT_EQ(0U, in_bytes);
  EXPECT_EQ(sizeof(buf) - 2, out_bytes);

  ASSERT_EQ(0, iconv_close(c));
}

TEST(iconv, iconv_incomplete_sequence_EINVAL) {
  const char* utf8 = "a\xd9"; // 0xd9 is the first byte of the two-byte U+0666 ٦.
  char buf[BUFSIZ] = {};

  iconv_t c = iconv_open("UTF-8", "UTF-8");
  ASSERT_NE(INVALID_ICONV_T, c);

  char* in = const_cast<char*>(utf8);
  size_t in_bytes = strlen(in);

  char* out = buf;
  size_t out_bytes = sizeof(buf);

  // The second input byte is just the start of a character, and we don't have any more bytes.
  errno = 0;
  EXPECT_EQ(static_cast<size_t>(-1), iconv(c, &in, &in_bytes, &out, &out_bytes));
  EXPECT_EQ(EINVAL, errno);
  EXPECT_EQ('\xd9', *in); // *in is left pointing to the start of the incomplete sequence.

  EXPECT_EQ('a', buf[0]);
  EXPECT_EQ(0, buf[1]);
  EXPECT_EQ(1U, in_bytes);
  EXPECT_EQ(sizeof(buf) - 1, out_bytes);

  ASSERT_EQ(0, iconv_close(c));
}

TEST(iconv, iconv_E2BIG) {
  const char* utf8 = "abc";
  char buf[BUFSIZ] = {};

  iconv_t c = iconv_open("UTF-8", "UTF-8");
  ASSERT_NE(INVALID_ICONV_T, c);

  char* in = const_cast<char*>(utf8);
  size_t in_bytes = strlen(in);

  char* out = buf;
  size_t out_bytes = 1;

  // We need three bytes, so one isn't enough (but we will make progress).
  out_bytes = 1;
  errno = 0;
  EXPECT_EQ(static_cast<size_t>(-1), iconv(c, &in, &in_bytes, &out, &out_bytes));
  EXPECT_EQ(E2BIG, errno);
  EXPECT_EQ(2U, in_bytes);
  EXPECT_EQ(0U, out_bytes);

  // Two bytes left, so zero isn't enough (and we can't even make progress).
  out_bytes = 0;
  errno = 0;
  EXPECT_EQ(static_cast<size_t>(-1), iconv(c, &in, &in_bytes, &out, &out_bytes));
  EXPECT_EQ(E2BIG, errno);
  EXPECT_EQ(2U, in_bytes);
  EXPECT_EQ(0U, out_bytes);

  // Two bytes left, so one isn't enough (but we will make progress).
  out_bytes = 1;
  errno = 0;
  EXPECT_EQ(static_cast<size_t>(-1), iconv(c, &in, &in_bytes, &out, &out_bytes));
  EXPECT_EQ(E2BIG, errno);
  EXPECT_EQ(1U, in_bytes);
  EXPECT_EQ(0U, out_bytes);

  // One byte left, so one byte is now enough.
  out_bytes = 1;
  errno = 0;
  EXPECT_EQ(0U, iconv(c, &in, &in_bytes, &out, &out_bytes));
  EXPECT_EQ(0, errno);
  EXPECT_EQ(0U, in_bytes);
  EXPECT_EQ(0U, out_bytes);

  EXPECT_EQ('a', buf[0]);
  EXPECT_EQ('b', buf[1]);
  EXPECT_EQ('c', buf[2]);
  EXPECT_EQ(0, buf[3]);

  ASSERT_EQ(0, iconv_close(c));
}

TEST(iconv, iconv_invalid_converter_EBADF) {
  char* in = nullptr;
  char* out = nullptr;
  size_t in_bytes = 0;
  size_t out_bytes = 0;
  errno = 0;
  ASSERT_EQ(static_cast<size_t>(-1), iconv(INVALID_ICONV_T, &in, &in_bytes, &out, &out_bytes));
  ASSERT_EQ(EBADF, errno);
}

TEST(iconv, iconv_close_invalid_converter_EBADF) {
  errno = 0;
  ASSERT_EQ(-1, iconv_close(INVALID_ICONV_T));
  ASSERT_EQ(EBADF, errno);
}

static void RoundTrip(const char* dst_enc, const char* expected_bytes, size_t n) {
  // Examples from https://en.wikipedia.org/wiki/UTF-16.
  const char* utf8 = "$€𐐷"; // U+0024, U+20AC, U+10437.

  iconv_t c = iconv_open(dst_enc, "UTF-8");
  ASSERT_NE(INVALID_ICONV_T, c) << dst_enc;

  char* in = const_cast<char*>(utf8);
  size_t in_bytes = strlen(utf8);
  char buf[BUFSIZ] = {};
  char* out = buf;
  size_t out_bytes = sizeof(buf);
  size_t replacement_count = iconv(c, &in, &in_bytes, &out, &out_bytes);

  // Check we got the bytes we were expecting.
  for (size_t i = 0; i < n; ++i) {
    EXPECT_EQ(expected_bytes[i], buf[i]) << i << ' '<< dst_enc;
  }

  ASSERT_EQ(0, iconv_close(c));

  // We can't round-trip if there were replacements.
  if (strstr(dst_enc, "ascii")) {
    GTEST_LOG_(INFO) << "can't round-trip " << dst_enc << "\n";
    return;
  }
  ASSERT_EQ(0U, replacement_count);

  c = iconv_open("UTF-8", dst_enc);
  ASSERT_NE(INVALID_ICONV_T, c) << dst_enc;

  in = buf;
  in_bytes = n;
  char buf2[BUFSIZ] = {};
  out = buf2;
  out_bytes = sizeof(buf2);
  iconv(c, &in, &in_bytes, &out, &out_bytes);

  ASSERT_STREQ(utf8, buf2) << dst_enc;

  ASSERT_EQ(0, iconv_close(c));
}

TEST(iconv, iconv_round_trip_ascii) {
  RoundTrip("ascii//TRANSLIT", "$??", 3);
}

TEST(iconv, iconv_round_trip_utf8) {
  RoundTrip("utf8", "\x24\xe2\x82\xac\xf0\x90\x90\xb7", 8);
}

TEST(iconv, iconv_round_trip_utf16be) {
  RoundTrip("utf16be", "\x00\x24" "\x20\xac" "\xd8\x01\xdc\x37", 8);
}

TEST(iconv, iconv_round_trip_utf16le) {
  RoundTrip("utf16le", "\x24\x00" "\xac\x20" "\x01\xd8\x37\xdc", 8);
}

TEST(iconv, iconv_round_trip_utf32be) {
  RoundTrip("utf32be", "\x00\x00\x00\x24" "\x00\x00\x20\xac" "\x00\x01\x04\x37", 12);
}

TEST(iconv, iconv_round_trip_utf32le) {
  RoundTrip("utf32le", "\x24\x00\x00\x00" "\xac\x20\x00\x00" "\x37\x04\x01\x00", 12);
}

TEST(iconv, iconv_round_trip_wchar_t) {
  RoundTrip("wchar_t", "\x24\x00\x00\x00" "\xac\x20\x00\x00" "\x37\x04\x01\x00", 12);
}

static void Check(int expected_errno, const char* src_enc, const char* src, size_t n) {
  iconv_t c = iconv_open("wchar_t", src_enc);
  char* in = const_cast<char*>(src);
  size_t in_bytes = n;
  wchar_t out_buf[16];
  size_t out_bytes = sizeof(out_buf);
  char* out = reinterpret_cast<char*>(out_buf);
  errno = 0;
  ASSERT_EQ(static_cast<size_t>(-1), iconv(c, &in, &in_bytes, &out, &out_bytes));
  EXPECT_EQ(expected_errno, errno);
  EXPECT_EQ(0, iconv_close(c));
}

TEST(iconv, iconv_EILSEQ_ascii) {
  Check(EILSEQ, "ASCII", "\xac", 1); // > 0x7f, so not ASCII.
}

TEST(iconv, iconv_EILSEQ_utf8_initial) {
  Check(EILSEQ, "utf8", "\x82", 1); // Invalid initial byte.
}

TEST(iconv, iconv_EILSEQ_utf8_non_initial) {
  Check(EILSEQ, "utf8", "\xe2\xe2\x82", 3); // Invalid second byte.
}

TEST(iconv, iconv_EILSEQ_utf16be_low_surrogate_first) {
  Check(EILSEQ, "utf16be", "\xdc\x37" "\xd8\x01", 4);
}

TEST(iconv, iconv_EILSEQ_utf16le_low_surrogate_first) {
  Check(EILSEQ, "utf16le", "\x37\xdc" "\x01\xd8", 4);
}

TEST(iconv, iconv_EINVAL_utf8_short) {
  Check(EINVAL, "utf8", "\xe2\x82", 2); // Missing final byte of 3-byte sequence.
}

TEST(iconv, iconv_EINVAL_utf16be_short) {
  Check(EINVAL, "utf16be", "\x00", 1); // Missing second byte.
}

TEST(iconv, iconv_EINVAL_utf16be_missing_low_surrogate) {
  Check(EINVAL, "utf16be", "\xd8\x01", 2);
}

TEST(iconv, iconv_EINVAL_utf16be_half_low_surrogate) {
  Check(EINVAL, "utf16be", "\xd8\x01\xdc", 3);
}

TEST(iconv, iconv_EINVAL_utf16le_short) {
  Check(EINVAL, "utf16le", "\x24", 1); // Missing second byte.
}

TEST(iconv, iconv_EINVAL_utf16le_missing_low_surrogate) {
  Check(EINVAL, "utf16le", "\x01\xd8", 2);
}

TEST(iconv, iconv_EINVAL_utf16le_half_low_surrogate) {
  Check(EINVAL, "utf16le", "\x01\xd8\x37", 3);
}

TEST(iconv, iconv_EINVAL_utf32be_short) {
  Check(EINVAL, "utf32be", "\x00\x00\x00", 3); // Missing final byte.
}

TEST(iconv, iconv_EINVAL_utf32le_short) {
  Check(EINVAL, "utf32le", "\x24\x00\x00", 3); // Missing final byte.
}

TEST(iconv, iconv_initial_shift_state) {
  // POSIX: "For state-dependent encodings, the conversion descriptor
  // cd is placed into its initial shift state by a call for which inbuf
  // is a null pointer, or for which inbuf points to a null pointer."
  iconv_t c = iconv_open("utf8", "utf8");
  char* in = nullptr;
  size_t in_bytes = 0;
  wchar_t out_buf[16];
  size_t out_bytes = sizeof(out_buf);
  char* out = reinterpret_cast<char*>(out_buf);

  // Points to a null pointer...
  errno = 0;
  ASSERT_EQ(static_cast<size_t>(0), iconv(c, &in, &in_bytes, &out, &out_bytes));
  EXPECT_EQ(0, errno);
  EXPECT_EQ(sizeof(out_buf), out_bytes);

  // Is a null pointer...
  errno = 0;
  ASSERT_EQ(static_cast<size_t>(0), iconv(c, nullptr, &in_bytes, &out, &out_bytes));
  EXPECT_EQ(0, errno);
  EXPECT_EQ(sizeof(out_buf), out_bytes);

  // Is a null pointer and so is in_bytes. This isn't specified by POSIX, but
  // glibc and macOS both allow that, where Android historically didn't.
  // https://issuetracker.google.com/180598400
  errno = 0;
  ASSERT_EQ(static_cast<size_t>(0), iconv(c, nullptr, nullptr, &out, &out_bytes));
  EXPECT_EQ(0, errno);
  EXPECT_EQ(sizeof(out_buf), out_bytes);

  EXPECT_EQ(0, iconv_close(c));
}
init from android-12.1.0_r8 2023-01-09 17:11:35 +08:00			`/*`
			`* Copyright (C) 2017 The Android Open Source Project`
			`*`
			`* Licensed under the Apache License, Version 2.0 (the "License");`
			`* you may not use this file except in compliance with the License.`
			`* You may obtain a copy of the License at`
			`*`
			`* http://www.apache.org/licenses/LICENSE-2.0`
			`*`
			`* Unless required by applicable law or agreed to in writing, software`
			`* distributed under the License is distributed on an "AS IS" BASIS,`
			`* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`* See the License for the specific language governing permissions and`
			`* limitations under the License.`
			`*/`

			`#include <gtest/gtest.h>`

			`#include <iconv.h>`

			`#define INVALID_ICONV_T reinterpret_cast<iconv_t>(-1)`

			`TEST(iconv, iconv_open_EINVAL) {`
			`errno = 0;`
			`ASSERT_EQ(INVALID_ICONV_T, iconv_open("silly", "silly"));`
			`ASSERT_EQ(EINVAL, errno);`
			`errno = 0;`
			`ASSERT_EQ(INVALID_ICONV_T, iconv_open("silly", "UTF-8"));`
			`ASSERT_EQ(EINVAL, errno);`
			`errno = 0;`
			`ASSERT_EQ(INVALID_ICONV_T, iconv_open("UTF-8", "silly"));`
			`ASSERT_EQ(EINVAL, errno);`
			`}`

			`TEST(iconv, iconv_open_comparator) {`
			`// Examples from http://www.unicode.org/reports/tr22/#Charset_Alias_Matching:`
			`// "For example, the following names should match: "UTF-8", "utf8", "u.t.f-008", ..."`
			`iconv_t c;`
			`ASSERT_NE(INVALID_ICONV_T, c = iconv_open("UTF-8", "utf8"));`
			`ASSERT_EQ(0, iconv_close(c));`
			`ASSERT_NE(INVALID_ICONV_T, c = iconv_open("UTF-8", "u.t.f-008"));`
			`ASSERT_EQ(0, iconv_close(c));`

			`// "...but not "utf-80" or "ut8"."`
			`errno = 0;`
			`ASSERT_EQ(INVALID_ICONV_T, iconv_open("UTF-8", "utf-80"));`
			`ASSERT_EQ(EINVAL, errno);`
			`errno = 0;`
			`ASSERT_EQ(INVALID_ICONV_T, iconv_open("UTF-8", "ut80"));`
			`ASSERT_EQ(EINVAL, errno);`
			`}`

			`TEST(iconv, iconv_smoke) {`
			`const char* utf8 = "a٦ᄀ"; // U+0666 ٦ 0xd9 0xa6 // U+1100 ᄀ 0xe1 0x84 0x80`
			`char buf[BUFSIZ] = {};`

			`iconv_t c = iconv_open("UTF-32LE", "UTF-8");`
			`ASSERT_NE(INVALID_ICONV_T, c);`

			`char* in = const_cast<char*>(utf8);`
			`size_t in_bytes = strlen(in);`

			`char* out = buf;`
			`size_t out_bytes = sizeof(buf);`

			`EXPECT_EQ(0U, iconv(c, &in, &in_bytes, &out, &out_bytes));`

			`wchar_t* utf16 = reinterpret_cast<wchar_t*>(buf);`
			`EXPECT_EQ(L'a', utf16[0]);`
			`EXPECT_EQ(L'٦', utf16[1]);`
			`EXPECT_EQ(L'ᄀ', utf16[2]);`
			`EXPECT_EQ(L'\0', utf16[3]);`
			`EXPECT_EQ(0U, in_bytes);`
			`EXPECT_EQ(sizeof(buf) - (3 /* chars / 4 /* bytes each */), out_bytes);`

			`ASSERT_EQ(0, iconv_close(c));`
			`}`

			`TEST(iconv, iconv_lossy_TRANSLIT) {`
			`const char* utf8 = "a٦ᄀz"; // U+0666 ٦ 0xd9 0xa6 // U+1100 ᄀ 0xe1 0x84 0x80`
			`char buf[BUFSIZ] = {};`

			`iconv_t c = iconv_open("ASCII//TRANSLIT", "UTF-8");`
			`ASSERT_NE(INVALID_ICONV_T, c);`

			`char* in = const_cast<char*>(utf8);`
			`size_t in_bytes = strlen(in);`

			`char* out = buf;`
			`size_t out_bytes = sizeof(buf);`

			`// Two of the input characters (5 input bytes) aren't representable as ASCII.`
			`// With "//TRANSLIT", we use a replacement character, and report the number`
			`// of replacements.`
			`EXPECT_EQ(2U, iconv(c, &in, &in_bytes, &out, &out_bytes));`

			`EXPECT_EQ('a', buf[0]);`
			`EXPECT_EQ('?', buf[1]);`
			`EXPECT_EQ('?', buf[2]);`
			`EXPECT_EQ('z', buf[3]);`
			`EXPECT_EQ(0, buf[4]);`
			`EXPECT_EQ(0U, in_bytes);`
			`EXPECT_EQ(sizeof(buf) - 4, out_bytes);`

			`ASSERT_EQ(0, iconv_close(c));`
			`}`

			`TEST(iconv, iconv_lossy_IGNORE) {`
			`const char* utf8 = "a٦ᄀz"; // U+0666 ٦ 0xd9 0xa6 // U+1100 ᄀ 0xe1 0x84 0x80`
			`char buf[BUFSIZ] = {};`

			`iconv_t c = iconv_open("ASCII//IGNORE", "UTF-8");`
			`ASSERT_NE(INVALID_ICONV_T, c);`

			`char* in = const_cast<char*>(utf8);`
			`size_t in_bytes = strlen(in);`

			`char* out = buf;`
			`size_t out_bytes = sizeof(buf);`

			`// Two of the input characters (5 input bytes) aren't representable as ASCII.`
			`// With "//IGNORE", we just skip them (but return failure).`
			`errno = 0;`
			`EXPECT_EQ(static_cast<size_t>(-1), iconv(c, &in, &in_bytes, &out, &out_bytes));`
			`EXPECT_EQ(EILSEQ, errno);`

			`EXPECT_EQ('a', buf[0]);`
			`EXPECT_EQ('z', buf[1]);`
			`EXPECT_EQ(0, buf[2]);`
			`EXPECT_EQ(0U, in_bytes);`
			`EXPECT_EQ(sizeof(buf) - 2, out_bytes);`

			`ASSERT_EQ(0, iconv_close(c));`
			`}`

			`TEST(iconv, iconv_lossy) {`
			`const char* utf8 = "a٦ᄀz"; // U+0666 ٦ 0xd9 0xa6 // U+1100 ᄀ 0xe1 0x84 0x80`
			`char buf[BUFSIZ] = {};`

			`iconv_t c = iconv_open("ASCII", "UTF-8");`
			`ASSERT_NE(INVALID_ICONV_T, c);`

			`char* in = const_cast<char*>(utf8);`
			`size_t in_bytes = strlen(in);`

			`char* out = buf;`
			`size_t out_bytes = sizeof(buf);`

			`// The second input character isn't representable as ASCII, so we stop there.`
			`errno = 0;`
			`EXPECT_EQ(static_cast<size_t>(-1), iconv(c, &in, &in_bytes, &out, &out_bytes));`
			`EXPECT_EQ(EILSEQ, errno);`

			`EXPECT_EQ('a', buf[0]);`
			`EXPECT_EQ(0, buf[1]);`
			`EXPECT_EQ(6U, in_bytes); // Two bytes for ٦, three bytes for ᄀ, and one byte for z.`
			`EXPECT_EQ(sizeof(buf) - 1, out_bytes);`

			`ASSERT_EQ(0, iconv_close(c));`
			`}`

			`TEST(iconv, iconv_malformed_sequence_EILSEQ) {`
			`const char* utf8 = "a\xd9z"; // 0xd9 is the first byte of the two-byte U+0666 ٦.`
			`char buf[BUFSIZ] = {};`

			`iconv_t c = iconv_open("UTF-8", "UTF-8");`
			`ASSERT_NE(INVALID_ICONV_T, c);`

			`char* in = const_cast<char*>(utf8);`
			`size_t in_bytes = strlen(in);`

			`char* out = buf;`
			`size_t out_bytes = sizeof(buf);`

			`// The second input byte is a malformed character, so we stop there.`
			`errno = 0;`
			`EXPECT_EQ(static_cast<size_t>(-1), iconv(c, &in, &in_bytes, &out, &out_bytes));`
			`EXPECT_EQ(EILSEQ, errno);`
			`EXPECT_EQ('\xd9', in); // in is left pointing to the start of the invalid sequence.`
			`++in;`
			`--in_bytes;`
			`errno = 0;`
			`EXPECT_EQ(0U, iconv(c, &in, &in_bytes, &out, &out_bytes));`
			`EXPECT_EQ(0, errno);`

			`EXPECT_EQ('a', buf[0]);`
			`EXPECT_EQ('z', buf[1]);`
			`EXPECT_EQ(0, buf[2]);`
			`EXPECT_EQ(0U, in_bytes);`
			`EXPECT_EQ(sizeof(buf) - 2, out_bytes);`

			`ASSERT_EQ(0, iconv_close(c));`
			`}`

			`TEST(iconv, iconv_incomplete_sequence_EINVAL) {`
			`const char* utf8 = "a\xd9"; // 0xd9 is the first byte of the two-byte U+0666 ٦.`
			`char buf[BUFSIZ] = {};`

			`iconv_t c = iconv_open("UTF-8", "UTF-8");`
			`ASSERT_NE(INVALID_ICONV_T, c);`

			`char* in = const_cast<char*>(utf8);`
			`size_t in_bytes = strlen(in);`

			`char* out = buf;`
			`size_t out_bytes = sizeof(buf);`

			`// The second input byte is just the start of a character, and we don't have any more bytes.`
			`errno = 0;`
			`EXPECT_EQ(static_cast<size_t>(-1), iconv(c, &in, &in_bytes, &out, &out_bytes));`
			`EXPECT_EQ(EINVAL, errno);`
			`EXPECT_EQ('\xd9', in); // in is left pointing to the start of the incomplete sequence.`

			`EXPECT_EQ('a', buf[0]);`
			`EXPECT_EQ(0, buf[1]);`
			`EXPECT_EQ(1U, in_bytes);`
			`EXPECT_EQ(sizeof(buf) - 1, out_bytes);`

			`ASSERT_EQ(0, iconv_close(c));`
			`}`

			`TEST(iconv, iconv_E2BIG) {`
			`const char* utf8 = "abc";`
			`char buf[BUFSIZ] = {};`

			`iconv_t c = iconv_open("UTF-8", "UTF-8");`
			`ASSERT_NE(INVALID_ICONV_T, c);`

			`char* in = const_cast<char*>(utf8);`
			`size_t in_bytes = strlen(in);`

			`char* out = buf;`
			`size_t out_bytes = 1;`

			`// We need three bytes, so one isn't enough (but we will make progress).`
			`out_bytes = 1;`
			`errno = 0;`
			`EXPECT_EQ(static_cast<size_t>(-1), iconv(c, &in, &in_bytes, &out, &out_bytes));`
			`EXPECT_EQ(E2BIG, errno);`
			`EXPECT_EQ(2U, in_bytes);`
			`EXPECT_EQ(0U, out_bytes);`

			`// Two bytes left, so zero isn't enough (and we can't even make progress).`
			`out_bytes = 0;`
			`errno = 0;`
			`EXPECT_EQ(static_cast<size_t>(-1), iconv(c, &in, &in_bytes, &out, &out_bytes));`
			`EXPECT_EQ(E2BIG, errno);`
			`EXPECT_EQ(2U, in_bytes);`
			`EXPECT_EQ(0U, out_bytes);`

			`// Two bytes left, so one isn't enough (but we will make progress).`
			`out_bytes = 1;`
			`errno = 0;`
			`EXPECT_EQ(static_cast<size_t>(-1), iconv(c, &in, &in_bytes, &out, &out_bytes));`
			`EXPECT_EQ(E2BIG, errno);`
			`EXPECT_EQ(1U, in_bytes);`
			`EXPECT_EQ(0U, out_bytes);`

			`// One byte left, so one byte is now enough.`
			`out_bytes = 1;`
			`errno = 0;`
			`EXPECT_EQ(0U, iconv(c, &in, &in_bytes, &out, &out_bytes));`
			`EXPECT_EQ(0, errno);`
			`EXPECT_EQ(0U, in_bytes);`
			`EXPECT_EQ(0U, out_bytes);`

			`EXPECT_EQ('a', buf[0]);`
			`EXPECT_EQ('b', buf[1]);`
			`EXPECT_EQ('c', buf[2]);`
			`EXPECT_EQ(0, buf[3]);`

			`ASSERT_EQ(0, iconv_close(c));`
			`}`

			`TEST(iconv, iconv_invalid_converter_EBADF) {`
			`char* in = nullptr;`
			`char* out = nullptr;`
			`size_t in_bytes = 0;`
			`size_t out_bytes = 0;`
			`errno = 0;`
			`ASSERT_EQ(static_cast<size_t>(-1), iconv(INVALID_ICONV_T, &in, &in_bytes, &out, &out_bytes));`
			`ASSERT_EQ(EBADF, errno);`
			`}`

			`TEST(iconv, iconv_close_invalid_converter_EBADF) {`
			`errno = 0;`
			`ASSERT_EQ(-1, iconv_close(INVALID_ICONV_T));`
			`ASSERT_EQ(EBADF, errno);`
			`}`

			`static void RoundTrip(const char* dst_enc, const char* expected_bytes, size_t n) {`
			`// Examples from https://en.wikipedia.org/wiki/UTF-16.`
			`const char* utf8 = "$€𐐷"; // U+0024, U+20AC, U+10437.`

			`iconv_t c = iconv_open(dst_enc, "UTF-8");`
			`ASSERT_NE(INVALID_ICONV_T, c) << dst_enc;`

			`char* in = const_cast<char*>(utf8);`
			`size_t in_bytes = strlen(utf8);`
			`char buf[BUFSIZ] = {};`
			`char* out = buf;`
			`size_t out_bytes = sizeof(buf);`
			`size_t replacement_count = iconv(c, &in, &in_bytes, &out, &out_bytes);`

			`// Check we got the bytes we were expecting.`
			`for (size_t i = 0; i < n; ++i) {`
			`EXPECT_EQ(expected_bytes[i], buf[i]) << i << ' '<< dst_enc;`
			`}`

			`ASSERT_EQ(0, iconv_close(c));`

			`// We can't round-trip if there were replacements.`
			`if (strstr(dst_enc, "ascii")) {`
			`GTEST_LOG_(INFO) << "can't round-trip " << dst_enc << "\n";`
			`return;`
			`}`
			`ASSERT_EQ(0U, replacement_count);`

			`c = iconv_open("UTF-8", dst_enc);`
			`ASSERT_NE(INVALID_ICONV_T, c) << dst_enc;`

			`in = buf;`
			`in_bytes = n;`
			`char buf2[BUFSIZ] = {};`
			`out = buf2;`
			`out_bytes = sizeof(buf2);`
			`iconv(c, &in, &in_bytes, &out, &out_bytes);`

			`ASSERT_STREQ(utf8, buf2) << dst_enc;`

			`ASSERT_EQ(0, iconv_close(c));`
			`}`

			`TEST(iconv, iconv_round_trip_ascii) {`
			`RoundTrip("ascii//TRANSLIT", "$??", 3);`
			`}`

			`TEST(iconv, iconv_round_trip_utf8) {`
			`RoundTrip("utf8", "\x24\xe2\x82\xac\xf0\x90\x90\xb7", 8);`
			`}`

			`TEST(iconv, iconv_round_trip_utf16be) {`
			`RoundTrip("utf16be", "\x00\x24" "\x20\xac" "\xd8\x01\xdc\x37", 8);`
			`}`

			`TEST(iconv, iconv_round_trip_utf16le) {`
			`RoundTrip("utf16le", "\x24\x00" "\xac\x20" "\x01\xd8\x37\xdc", 8);`
			`}`

			`TEST(iconv, iconv_round_trip_utf32be) {`
			`RoundTrip("utf32be", "\x00\x00\x00\x24" "\x00\x00\x20\xac" "\x00\x01\x04\x37", 12);`
			`}`

			`TEST(iconv, iconv_round_trip_utf32le) {`
			`RoundTrip("utf32le", "\x24\x00\x00\x00" "\xac\x20\x00\x00" "\x37\x04\x01\x00", 12);`
			`}`

			`TEST(iconv, iconv_round_trip_wchar_t) {`
			`RoundTrip("wchar_t", "\x24\x00\x00\x00" "\xac\x20\x00\x00" "\x37\x04\x01\x00", 12);`
			`}`

			`static void Check(int expected_errno, const char* src_enc, const char* src, size_t n) {`
			`iconv_t c = iconv_open("wchar_t", src_enc);`
			`char* in = const_cast<char*>(src);`
			`size_t in_bytes = n;`
			`wchar_t out_buf[16];`
			`size_t out_bytes = sizeof(out_buf);`
			`char* out = reinterpret_cast<char*>(out_buf);`
			`errno = 0;`
			`ASSERT_EQ(static_cast<size_t>(-1), iconv(c, &in, &in_bytes, &out, &out_bytes));`
			`EXPECT_EQ(expected_errno, errno);`
			`EXPECT_EQ(0, iconv_close(c));`
			`}`

			`TEST(iconv, iconv_EILSEQ_ascii) {`
			`Check(EILSEQ, "ASCII", "\xac", 1); // > 0x7f, so not ASCII.`
			`}`

			`TEST(iconv, iconv_EILSEQ_utf8_initial) {`
			`Check(EILSEQ, "utf8", "\x82", 1); // Invalid initial byte.`
			`}`

			`TEST(iconv, iconv_EILSEQ_utf8_non_initial) {`
			`Check(EILSEQ, "utf8", "\xe2\xe2\x82", 3); // Invalid second byte.`
			`}`

			`TEST(iconv, iconv_EILSEQ_utf16be_low_surrogate_first) {`
			`Check(EILSEQ, "utf16be", "\xdc\x37" "\xd8\x01", 4);`
			`}`

			`TEST(iconv, iconv_EILSEQ_utf16le_low_surrogate_first) {`
			`Check(EILSEQ, "utf16le", "\x37\xdc" "\x01\xd8", 4);`
			`}`

			`TEST(iconv, iconv_EINVAL_utf8_short) {`
			`Check(EINVAL, "utf8", "\xe2\x82", 2); // Missing final byte of 3-byte sequence.`
			`}`

			`TEST(iconv, iconv_EINVAL_utf16be_short) {`
			`Check(EINVAL, "utf16be", "\x00", 1); // Missing second byte.`
			`}`

			`TEST(iconv, iconv_EINVAL_utf16be_missing_low_surrogate) {`
			`Check(EINVAL, "utf16be", "\xd8\x01", 2);`
			`}`

			`TEST(iconv, iconv_EINVAL_utf16be_half_low_surrogate) {`
			`Check(EINVAL, "utf16be", "\xd8\x01\xdc", 3);`
			`}`

			`TEST(iconv, iconv_EINVAL_utf16le_short) {`
			`Check(EINVAL, "utf16le", "\x24", 1); // Missing second byte.`
			`}`

			`TEST(iconv, iconv_EINVAL_utf16le_missing_low_surrogate) {`
			`Check(EINVAL, "utf16le", "\x01\xd8", 2);`
			`}`

			`TEST(iconv, iconv_EINVAL_utf16le_half_low_surrogate) {`
			`Check(EINVAL, "utf16le", "\x01\xd8\x37", 3);`
			`}`

			`TEST(iconv, iconv_EINVAL_utf32be_short) {`
			`Check(EINVAL, "utf32be", "\x00\x00\x00", 3); // Missing final byte.`
			`}`

			`TEST(iconv, iconv_EINVAL_utf32le_short) {`
			`Check(EINVAL, "utf32le", "\x24\x00\x00", 3); // Missing final byte.`
			`}`

			`TEST(iconv, iconv_initial_shift_state) {`
			`// POSIX: "For state-dependent encodings, the conversion descriptor`
			`// cd is placed into its initial shift state by a call for which inbuf`
			`// is a null pointer, or for which inbuf points to a null pointer."`
			`iconv_t c = iconv_open("utf8", "utf8");`
			`char* in = nullptr;`
			`size_t in_bytes = 0;`
			`wchar_t out_buf[16];`
			`size_t out_bytes = sizeof(out_buf);`
			`char* out = reinterpret_cast<char*>(out_buf);`

			`// Points to a null pointer...`
			`errno = 0;`
			`ASSERT_EQ(static_cast<size_t>(0), iconv(c, &in, &in_bytes, &out, &out_bytes));`
			`EXPECT_EQ(0, errno);`
			`EXPECT_EQ(sizeof(out_buf), out_bytes);`

			`// Is a null pointer...`
			`errno = 0;`
			`ASSERT_EQ(static_cast<size_t>(0), iconv(c, nullptr, &in_bytes, &out, &out_bytes));`
			`EXPECT_EQ(0, errno);`
			`EXPECT_EQ(sizeof(out_buf), out_bytes);`

			`// Is a null pointer and so is in_bytes. This isn't specified by POSIX, but`
			`// glibc and macOS both allow that, where Android historically didn't.`
			`// https://issuetracker.google.com/180598400`
			`errno = 0;`
			`ASSERT_EQ(static_cast<size_t>(0), iconv(c, nullptr, nullptr, &out, &out_bytes));`
			`EXPECT_EQ(0, errno);`
			`EXPECT_EQ(sizeof(out_buf), out_bytes);`

			`EXPECT_EQ(0, iconv_close(c));`
			`}`