Merge "Reject zip archives whose entry names are not valid UTF-8."

2014-12-08 13:08:33 +00:00 · 2014-12-08 13:08:33 +00:00 · 3f5bba537e
parent afe3d2718b 044bc8ee89
commit 3f5bba537e
4 changed files with 128 additions and 5 deletions
--- a/libziparchive/Android.mk
+++ b/libziparchive/Android.mk
@ -57,7 +57,7 @@ LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
 LOCAL_MODULE := ziparchive-tests
 LOCAL_CPP_EXTENSION := .cc
 LOCAL_CFLAGS := -Werror
-LOCAL_SRC_FILES := zip_archive_test.cc
+LOCAL_SRC_FILES := zip_archive_test.cc entry_name_utils_test.cc
 LOCAL_SHARED_LIBRARIES := liblog
 LOCAL_STATIC_LIBRARIES := libziparchive libz libutils
 include $(BUILD_NATIVE_TEST)
@ -69,7 +69,7 @@ LOCAL_CPP_EXTENSION := .cc
 LOCAL_CFLAGS += \
    -Werror \
    -Wno-unnamed-type-template-args
-LOCAL_SRC_FILES := zip_archive_test.cc
+LOCAL_SRC_FILES := zip_archive_test.cc entry_name_utils_test.cc
 LOCAL_SHARED_LIBRARIES := libziparchive-host liblog
 LOCAL_STATIC_LIBRARIES := \
    libz \
--- a/libziparchive/entry_name_utils-inl.h
+++ b/libziparchive/entry_name_utils-inl.h
@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBZIPARCHIVE_ENTRY_NAME_UTILS_INL_H_
+#define LIBZIPARCHIVE_ENTRY_NAME_UTILS_INL_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+// Check if |length| bytes at |entry_name| constitute a valid entry name.
+// Entry names must be valid UTF-8 and must not contain '0'.
+inline bool IsValidEntryName(const uint8_t* entry_name, const size_t length) {
+  for (size_t i = 0; i < length; ++i) {
+    const uint8_t byte = entry_name[i];
+    if (byte == 0) {
+      return false;
+    } else if ((byte & 0x80) == 0) {
+      // Single byte sequence.
+      continue;
+    } else if ((byte & 0xc0) == 0x80 || (byte & 0xfe) == 0xfe) {
+      // Invalid sequence.
+      return false;
+    } else {
+      // 2-5 byte sequences.
+      for (uint8_t first = byte << 1; first & 0x80; first <<= 1) {
+        ++i;
+
+        // Missing continuation byte..
+        if (i == length) {
+          return false;
+        }
+
+        // Invalid continuation byte.
+        const uint8_t continuation_byte = entry_name[i];
+        if ((continuation_byte & 0xc0) != 0x80) {
+          return false;
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
+
+#endif  // LIBZIPARCHIVE_ENTRY_NAME_UTILS_INL_H_
--- a/libziparchive/entry_name_utils_test.cc
+++ b/libziparchive/entry_name_utils_test.cc
@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "entry_name_utils-inl.h"
+
+#include <gtest/gtest.h>
+
+TEST(entry_name_utils, NullChars) {
+  // 'A', 'R', '\0', 'S', 'E'
+  const uint8_t zeroes[] = { 0x41, 0x52, 0x00, 0x53, 0x45 };
+  ASSERT_FALSE(IsValidEntryName(zeroes, sizeof(zeroes)));
+
+  const uint8_t zeroes_continuation_chars[] = { 0xc2, 0xa1, 0xc2, 0x00 };
+  ASSERT_FALSE(IsValidEntryName(zeroes_continuation_chars,
+                                sizeof(zeroes_continuation_chars)));
+}
+
+TEST(entry_name_utils, InvalidSequence) {
+  // 0xfe is an invalid start byte
+  const uint8_t invalid[] = { 0x41, 0xfe };
+  ASSERT_FALSE(IsValidEntryName(invalid, sizeof(invalid)));
+
+  // 0x91 is an invalid start byte (it's a valid continuation byte).
+  const uint8_t invalid2[] = { 0x41, 0x91 };
+  ASSERT_FALSE(IsValidEntryName(invalid2, sizeof(invalid2)));
+}
+
+TEST(entry_name_utils, TruncatedContinuation) {
+  // Malayalam script with truncated bytes. There should be 2 bytes
+  // after 0xe0
+  const uint8_t truncated[] = { 0xe0, 0xb4, 0x85, 0xe0, 0xb4 };
+  ASSERT_FALSE(IsValidEntryName(truncated, sizeof(truncated)));
+
+  // 0xc2 is the start of a 2 byte sequence that we've subsequently
+  // dropped.
+  const uint8_t truncated2[] = { 0xc2, 0xc2, 0xa1 };
+  ASSERT_FALSE(IsValidEntryName(truncated2, sizeof(truncated2)));
+}
+
+TEST(entry_name_utils, BadContinuation) {
+  // 0x41 is an invalid continuation char, since it's MSBs
+  // aren't "10..." (are 01).
+  const uint8_t bad[] = { 0xc2, 0xa1, 0xc2, 0x41 };
+  ASSERT_FALSE(IsValidEntryName(bad, sizeof(bad)));
+
+  // 0x41 is an invalid continuation char, since it's MSBs
+  // aren't "10..." (are 11).
+  const uint8_t bad2[] = { 0xc2, 0xa1, 0xc2, 0xfe };
+  ASSERT_FALSE(IsValidEntryName(bad2, sizeof(bad2)));
+}
--- a/libziparchive/zip_archive.cc
+++ b/libziparchive/zip_archive.cc
@ -33,8 +33,10 @@

 #include <JNIHelp.h>  // TEMP_FAILURE_RETRY may or may not be in unistd

+#include "entry_name_utils-inl.h"
 #include "ziparchive/zip_archive.h"

+
 // This is for windows. If we don't open a file in binary mode, weird
 // things will happen.
 #ifndef O_BINARY
@ -641,9 +643,8 @@ static int32_t ParseZipArchive(ZipArchive* archive) {
    const uint16_t comment_length = cdr->comment_length;
    const uint8_t* file_name = ptr + sizeof(CentralDirectoryRecord);

-    /* check that file name doesn't contain \0 character */
-    if (memchr(file_name, 0, file_name_length) != NULL) {
-      ALOGW("Zip: entry name can't contain \\0 character");
+    /* check that file name is valid UTF-8 and doesn't contain NUL (U+0000) characters */
+    if (!IsValidEntryName(file_name, file_name_length)) {
      goto bail;
    }