From f899bd534b2dc51b9db8d27c76394b192fe51155 Mon Sep 17 00:00:00 2001 From: Narayan Kamath Date: Fri, 17 Apr 2015 11:53:14 +0100 Subject: [PATCH] Avoid mapping output to memory while writing to a file. It's unnecessary, and causes issues when the uncompressed output is large. Change-Id: I99cfb3933b094c2826c7e6c6de9aab03478fcc53 --- libziparchive/zip_archive.cc | 271 ++++++++++++++++++++---------- libziparchive/zip_archive_test.cc | 88 ++++++++++ 2 files changed, 269 insertions(+), 90 deletions(-) diff --git a/libziparchive/zip_archive.cc b/libziparchive/zip_archive.cc index 858234446..4ba91dfb9 100644 --- a/libziparchive/zip_archive.cc +++ b/libziparchive/zip_archive.cc @@ -85,7 +85,8 @@ struct EocdRecord { // Length of the central directory comment. uint16_t comment_length; private: - DISALLOW_IMPLICIT_CONSTRUCTORS(EocdRecord); + EocdRecord() = default; + DISALLOW_COPY_AND_ASSIGN(EocdRecord); } __attribute__((packed)); // A structure representing the fixed length fields for a single @@ -138,7 +139,8 @@ struct CentralDirectoryRecord { // beginning of this archive. uint32_t local_file_header_offset; private: - DISALLOW_IMPLICIT_CONSTRUCTORS(CentralDirectoryRecord); + CentralDirectoryRecord() = default; + DISALLOW_COPY_AND_ASSIGN(CentralDirectoryRecord); } __attribute__((packed)); // The local file header for a given entry. This duplicates information @@ -175,7 +177,8 @@ struct LocalFileHeader { // will appear immediately after the entry file name. uint16_t extra_field_length; private: - DISALLOW_IMPLICIT_CONSTRUCTORS(LocalFileHeader); + LocalFileHeader() = default; + DISALLOW_COPY_AND_ASSIGN(LocalFileHeader); } __attribute__((packed)); struct DataDescriptor { @@ -189,10 +192,10 @@ struct DataDescriptor { // Uncompressed size of the entry. uint32_t uncompressed_size; private: - DISALLOW_IMPLICIT_CONSTRUCTORS(DataDescriptor); + DataDescriptor() = default; + DISALLOW_COPY_AND_ASSIGN(DataDescriptor); } __attribute__((packed)); -#undef DISALLOW_IMPLICIT_CONSTRUCTORS static const uint32_t kGPBDDFlagMask = 0x0008; // mask value that signifies that the entry has a DD @@ -324,35 +327,6 @@ struct ZipArchive { } }; -static int32_t CopyFileToFile(int fd, uint8_t* begin, const uint32_t length, uint64_t *crc_out) { - static const uint32_t kBufSize = 32768; - uint8_t buf[kBufSize]; - - uint32_t count = 0; - uint64_t crc = 0; - while (count < length) { - uint32_t remaining = length - count; - - // Safe conversion because kBufSize is narrow enough for a 32 bit signed - // value. - ssize_t get_size = (remaining > kBufSize) ? kBufSize : remaining; - ssize_t actual = TEMP_FAILURE_RETRY(read(fd, buf, get_size)); - - if (actual != get_size) { - ALOGW("CopyFileToFile: copy read failed (" ZD " vs " ZD ")", actual, get_size); - return kIoError; - } - - memcpy(begin + count, buf, get_size); - crc = crc32(crc, buf, get_size); - count += get_size; - } - - *crc_out = crc; - - return 0; -} - /* * Round up to the next highest power of 2. * @@ -972,6 +946,128 @@ int32_t Next(void* cookie, ZipEntry* data, ZipEntryName* name) { return kIterationEnd; } +class Writer { + public: + virtual bool Append(uint8_t* buf, size_t buf_size) = 0; + virtual ~Writer() {} + protected: + Writer() = default; + private: + DISALLOW_COPY_AND_ASSIGN(Writer); +}; + +// A Writer that writes data to a fixed size memory region. +// The size of the memory region must be equal to the total size of +// the data appended to it. +class MemoryWriter : public Writer { + public: + MemoryWriter(uint8_t* buf, size_t size) : Writer(), + buf_(buf), size_(size), bytes_written_(0) { + } + + virtual bool Append(uint8_t* buf, size_t buf_size) override { + if (bytes_written_ + buf_size > size_) { + ALOGW("Zip: Unexpected size " ZD " (declared) vs " ZD " (actual)", + size_, bytes_written_ + buf_size); + return false; + } + + memcpy(buf_ + bytes_written_, buf, buf_size); + bytes_written_ += buf_size; + return true; + } + + private: + uint8_t* const buf_; + const size_t size_; + size_t bytes_written_; +}; + +// A Writer that appends data to a file |fd| at its current position. +// The file will be truncated to the end of the written data. +class FileWriter : public Writer { + public: + + // Creates a FileWriter for |fd| and prepare to write |entry| to it, + // guaranteeing that the file descriptor is valid and that there's enough + // space on the volume to write out the entry completely and that the file + // is truncated to the correct length. + // + // Returns a valid FileWriter on success, |nullptr| if an error occurred. + static std::unique_ptr Create(int fd, const ZipEntry* entry) { + const uint32_t declared_length = entry->uncompressed_length; + const off64_t current_offset = lseek64(fd, 0, SEEK_CUR); + if (current_offset == -1) { + ALOGW("Zip: unable to seek to current location on fd %d: %s", fd, strerror(errno)); + return nullptr; + } + + int result = 0; +#if defined(__linux__) + if (declared_length > 0) { + // Make sure we have enough space on the volume to extract the compressed + // entry. Note that the call to ftruncate below will change the file size but + // will not allocate space on disk and this call to fallocate will not + // change the file size. + result = TEMP_FAILURE_RETRY(fallocate(fd, 0, current_offset, declared_length)); + if (result == -1) { + ALOGW("Zip: unable to allocate space for file to %" PRId64 ": %s", + static_cast(declared_length + current_offset), strerror(errno)); + return std::unique_ptr(nullptr); + } + } +#endif // __linux__ + + result = TEMP_FAILURE_RETRY(ftruncate(fd, declared_length + current_offset)); + if (result == -1) { + ALOGW("Zip: unable to truncate file to %" PRId64 ": %s", + static_cast(declared_length + current_offset), strerror(errno)); + return std::unique_ptr(nullptr); + } + + return std::unique_ptr(new FileWriter(fd, declared_length)); + } + + virtual bool Append(uint8_t* buf, size_t buf_size) override { + if (total_bytes_written_ + buf_size > declared_length_) { + ALOGW("Zip: Unexpected size " ZD " (declared) vs " ZD " (actual)", + declared_length_, total_bytes_written_ + buf_size); + return false; + } + + // Keep track of the start position so we can calculate the + // total number of bytes written. + const uint8_t* const start = buf; + size_t bytes_written = 0; + while (buf_size > 0) { + ssize_t bytes_written = TEMP_FAILURE_RETRY(write(fd_, buf, buf_size)); + if (bytes_written == -1) { + ALOGW("Zip: unable to write " ZD " bytes to file; %s", buf_size, strerror(errno)); + return false; + } + + buf_size -= bytes_written; + buf += bytes_written; + } + + total_bytes_written_ += static_cast( + reinterpret_cast(buf) - reinterpret_cast(start)); + + return true; + } + private: + FileWriter(const int fd, const size_t declared_length) : + Writer(), + fd_(fd), + declared_length_(declared_length), + total_bytes_written_(0) { + } + + const int fd_; + const size_t declared_length_; + size_t total_bytes_written_; +}; + // This method is using libz macros with old-style-casts #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wold-style-cast" @@ -980,9 +1076,8 @@ static inline int zlib_inflateInit2(z_stream* stream, int window_bits) { } #pragma GCC diagnostic pop -static int32_t InflateToFile(int fd, const ZipEntry* entry, - uint8_t* begin, uint32_t length, - uint64_t* crc_out) { +static int32_t InflateEntryToWriter(int fd, const ZipEntry* entry, + Writer* writer, uint64_t* crc_out) { const size_t kBufSize = 32768; std::vector read_buf(kBufSize); std::vector write_buf(kBufSize); @@ -1057,12 +1152,10 @@ static int32_t InflateToFile(int fd, const ZipEntry* entry, if (zstream.avail_out == 0 || (zerr == Z_STREAM_END && zstream.avail_out != kBufSize)) { const size_t write_size = zstream.next_out - &write_buf[0]; - // The file might have declared a bogus length. - if (write_size + write_count > length) { - return -1; + if (!writer->Append(&write_buf[0], write_size)) { + // The file might have declared a bogus length. + return kInconsistentInformation; } - memcpy(begin + write_count, &write_buf[0], write_size); - write_count += write_size; zstream.next_out = &write_buf[0]; zstream.avail_out = kBufSize; @@ -1083,8 +1176,41 @@ static int32_t InflateToFile(int fd, const ZipEntry* entry, return 0; } -int32_t ExtractToMemory(ZipArchiveHandle handle, - ZipEntry* entry, uint8_t* begin, uint32_t size) { +static int32_t CopyEntryToWriter(int fd, const ZipEntry* entry, Writer* writer, + uint64_t *crc_out) { + static const uint32_t kBufSize = 32768; + std::vector buf(kBufSize); + + const uint32_t length = entry->uncompressed_length; + uint32_t count = 0; + uint64_t crc = 0; + while (count < length) { + uint32_t remaining = length - count; + + // Safe conversion because kBufSize is narrow enough for a 32 bit signed + // value. + const ssize_t block_size = (remaining > kBufSize) ? kBufSize : remaining; + const ssize_t actual = TEMP_FAILURE_RETRY(read(fd, &buf[0], block_size)); + + if (actual != block_size) { + ALOGW("CopyFileToFile: copy read failed (" ZD " vs " ZD ")", actual, block_size); + return kIoError; + } + + if (!writer->Append(&buf[0], block_size)) { + return kIoError; + } + crc = crc32(crc, &buf[0], block_size); + count += block_size; + } + + *crc_out = crc; + + return 0; +} + +int32_t ExtractToWriter(ZipArchiveHandle handle, + ZipEntry* entry, Writer* writer) { ZipArchive* archive = reinterpret_cast(handle); const uint16_t method = entry->method; off64_t data_offset = entry->offset; @@ -1098,9 +1224,9 @@ int32_t ExtractToMemory(ZipArchiveHandle handle, int32_t return_value = -1; uint64_t crc = 0; if (method == kCompressStored) { - return_value = CopyFileToFile(archive->fd, begin, size, &crc); + return_value = CopyEntryToWriter(archive->fd, entry, writer, &crc); } else if (method == kCompressDeflated) { - return_value = InflateToFile(archive->fd, entry, begin, size, &crc); + return_value = InflateEntryToWriter(archive->fd, entry, writer, &crc); } if (!return_value && entry->has_data_descriptor) { @@ -1120,55 +1246,20 @@ int32_t ExtractToMemory(ZipArchiveHandle handle, return return_value; } +int32_t ExtractToMemory(ZipArchiveHandle handle, ZipEntry* entry, + uint8_t* begin, uint32_t size) { + std::unique_ptr writer(new MemoryWriter(begin, size)); + return ExtractToWriter(handle, entry, writer.get()); +} + int32_t ExtractEntryToFile(ZipArchiveHandle handle, ZipEntry* entry, int fd) { - const uint32_t declared_length = entry->uncompressed_length; - - const off64_t current_offset = lseek64(fd, 0, SEEK_CUR); - if (current_offset == -1) { - ALOGW("Zip: unable to seek to current location on fd %d: %s", fd, - strerror(errno)); + std::unique_ptr writer(FileWriter::Create(fd, entry)); + if (writer.get() == nullptr) { return kIoError; } - int result = 0; -#if defined(__linux__) - // Make sure we have enough space on the volume to extract the compressed - // entry. Note that the call to ftruncate below will change the file size but - // will not allocate space on disk. - if (declared_length > 0) { - result = TEMP_FAILURE_RETRY(fallocate(fd, 0, current_offset, declared_length)); - if (result == -1) { - ALOGW("Zip: unable to allocate space for file to %" PRId64 ": %s", - static_cast(declared_length + current_offset), strerror(errno)); - return kIoError; - } - } -#endif // defined(__linux__) - - result = TEMP_FAILURE_RETRY(ftruncate(fd, declared_length + current_offset)); - if (result == -1) { - ALOGW("Zip: unable to truncate file to %" PRId64 ": %s", - static_cast(declared_length + current_offset), strerror(errno)); - return kIoError; - } - - // Don't attempt to map a region of length 0. We still need the - // ftruncate() though, since the API guarantees that we will truncate - // the file to the end of the uncompressed output. - if (declared_length == 0) { - return 0; - } - - android::FileMap map; - if (!map.create(kTempMappingFileName, fd, current_offset, declared_length, false)) { - return kMmapFailed; - } - - const int32_t error = ExtractToMemory(handle, entry, - reinterpret_cast(map.getDataPtr()), - map.getDataLength()); - return error; + return ExtractToWriter(handle, entry, writer.get()); } const char* ErrorCodeString(int32_t error_code) { diff --git a/libziparchive/zip_archive_test.cc b/libziparchive/zip_archive_test.cc index 64faa6de2..f8952ce65 100644 --- a/libziparchive/zip_archive_test.cc +++ b/libziparchive/zip_archive_test.cc @@ -23,6 +23,7 @@ #include #include +#include #include static std::string test_data_dir; @@ -228,6 +229,44 @@ static const uint32_t kEmptyEntriesZip[] = { 0x54557478, 0x13030005, 0x7552e25c, 0x01000b78, 0x00428904, 0x13880400, 0x4b500000, 0x00000605, 0x00010000, 0x004f0001, 0x00430000, 0x00000000 }; +// This is a zip file containing a single entry (ab.txt) that contains +// 90072 repetitions of the string "ab\n" and has an uncompressed length +// of 270216 bytes. +static const uint16_t kAbZip[] = { + 0x4b50, 0x0403, 0x0014, 0x0000, 0x0008, 0x51d2, 0x4698, 0xc4b0, + 0x2cda, 0x011b, 0x0000, 0x1f88, 0x0004, 0x0006, 0x001c, 0x6261, + 0x742e, 0x7478, 0x5455, 0x0009, 0x7c03, 0x3a09, 0x7c55, 0x3a09, + 0x7555, 0x0b78, 0x0100, 0x8904, 0x0042, 0x0400, 0x1388, 0x0000, + 0xc2ed, 0x0d31, 0x0000, 0x030c, 0x7fa0, 0x3b2e, 0x22ff, 0xa2aa, + 0x841f, 0x45fc, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, + 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, + 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, + 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, + 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, + 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, + 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, + 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, + 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, + 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, + 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, + 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, + 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, + 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, + 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, + 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, + 0x5555, 0x5555, 0x5555, 0x5555, 0xdd55, 0x502c, 0x014b, 0x1e02, + 0x1403, 0x0000, 0x0800, 0xd200, 0x9851, 0xb046, 0xdac4, 0x1b2c, + 0x0001, 0x8800, 0x041f, 0x0600, 0x1800, 0x0000, 0x0000, 0x0100, + 0x0000, 0xa000, 0x0081, 0x0000, 0x6100, 0x2e62, 0x7874, 0x5574, + 0x0554, 0x0300, 0x097c, 0x553a, 0x7875, 0x000b, 0x0401, 0x4289, + 0x0000, 0x8804, 0x0013, 0x5000, 0x054b, 0x0006, 0x0000, 0x0100, + 0x0100, 0x4c00, 0x0000, 0x5b00, 0x0001, 0x0000, 0x0000 +}; + +static const uint8_t kAbTxtName[] = { 'a', 'b', '.', 't', 'x', 't' }; +static const uint16_t kAbTxtNameLength = sizeof(kAbTxtName); +static const size_t kAbUncompressedSize = 270216; + static int make_temporary_file(const char* file_name_pattern) { char full_path[1024]; // Account for differences between the host and the target. @@ -275,6 +314,55 @@ TEST(ziparchive, EmptyEntries) { close(output_fd); } +TEST(ziparchive, EntryLargerThan32K) { + char temp_file_pattern[] = "entry_larger_than_32k_test_XXXXXX"; + int fd = make_temporary_file(temp_file_pattern); + ASSERT_NE(-1, fd); + ASSERT_TRUE(android::base::WriteFully(fd, reinterpret_cast(kAbZip), + sizeof(kAbZip) - 1)); + ZipArchiveHandle handle; + ASSERT_EQ(0, OpenArchiveFd(fd, "EntryLargerThan32KTest", &handle)); + + ZipEntry entry; + ZipEntryName ab_name; + ab_name.name = kAbTxtName; + ab_name.name_length = kAbTxtNameLength; + ASSERT_EQ(0, FindEntry(handle, ab_name, &entry)); + ASSERT_EQ(kAbUncompressedSize, entry.uncompressed_length); + + // Extract the entry to memory. + std::vector buffer(kAbUncompressedSize); + ASSERT_EQ(0, ExtractToMemory(handle, &entry, &buffer[0], buffer.size())); + + // Extract the entry to a file. + char output_file_pattern[] = "entry_larger_than_32k_test_output_XXXXXX"; + int output_fd = make_temporary_file(output_file_pattern); + ASSERT_NE(-1, output_fd); + ASSERT_EQ(0, ExtractEntryToFile(handle, &entry, output_fd)); + + // Make sure the extracted file size is as expected. + struct stat stat_buf; + ASSERT_EQ(0, fstat(output_fd, &stat_buf)); + ASSERT_EQ(kAbUncompressedSize, static_cast(stat_buf.st_size)); + + // Read the file back to a buffer and make sure the contents are + // the same as the memory buffer we extracted directly to. + std::vector file_contents(kAbUncompressedSize); + ASSERT_EQ(0, lseek64(output_fd, 0, SEEK_SET)); + ASSERT_TRUE(android::base::ReadFully(output_fd, &file_contents[0], file_contents.size())); + ASSERT_EQ(file_contents, buffer); + + for (int i = 0; i < 90072; ++i) { + const uint8_t* line = &file_contents[0] + (3 * i); + ASSERT_EQ('a', line[0]); + ASSERT_EQ('b', line[1]); + ASSERT_EQ('\n', line[2]); + } + + close(fd); + close(output_fd); +} + TEST(ziparchive, TrailerAfterEOCD) { char temp_file_pattern[] = "trailer_after_eocd_test_XXXXXX"; int fd = make_temporary_file(temp_file_pattern);