From 537713bace8975d02576225536ec55bd61f9ca91 Mon Sep 17 00:00:00 2001 From: Adam Lesinski Date: Thu, 16 Mar 2017 13:23:51 -0700 Subject: [PATCH] libziparchive: Add ability to backup in ZipWriter Based on the compressed size of a file entry, the decision needs to be made to instead store the file uncompressed. This adds support to ZipWriter to backup its last file entry. The file is now always truncated when the EOCD is written out, to account for the case where a file entry is backed-up and the resulting file written is much smaller, leaving garbage data at the end of the file. This change also includes a rename of FileInfo -> FileEntry. This struct was private (now public), so it shouldn't affect any clients. Bug: 35461578 Test: make ziparchive-tests Change-Id: I23dc584406274ab7b8ce62b3fbc3562ca4c2603e --- include/ziparchive/zip_writer.h | 56 ++++++++---- libziparchive/zip_writer.cc | 139 +++++++++++++++++----------- libziparchive/zip_writer_test.cc | 151 +++++++++++++++++++++++++------ 3 files changed, 245 insertions(+), 101 deletions(-) diff --git a/include/ziparchive/zip_writer.h b/include/ziparchive/zip_writer.h index 0b6ede45c..41ca2e1a3 100644 --- a/include/ziparchive/zip_writer.h +++ b/include/ziparchive/zip_writer.h @@ -17,15 +17,16 @@ #ifndef LIBZIPARCHIVE_ZIPWRITER_H_ #define LIBZIPARCHIVE_ZIPWRITER_H_ -#include "android-base/macros.h" -#include - #include #include +#include + #include #include #include -#include + +#include "android-base/macros.h" +#include "utils/Compat.h" /** * Writes a Zip file via a stateful interface. @@ -63,6 +64,20 @@ public: kAlign32 = 0x02, }; + /** + * A struct representing a zip file entry. + */ + struct FileEntry { + std::string path; + uint16_t compression_method; + uint32_t crc32; + uint32_t compressed_size; + uint32_t uncompressed_size; + uint16_t last_mod_time; + uint16_t last_mod_date; + uint32_t local_file_header_offset; + }; + static const char* ErrorCodeString(int32_t error_code); /** @@ -121,6 +136,19 @@ public: */ int32_t FinishEntry(); + /** + * Discards the last-written entry. Can only be called after an entry has been written using + * FinishEntry(). + * Returns 0 on success, and an error value < 0 on failure. + */ + int32_t DiscardLastEntry(); + + /** + * Sets `out_entry` to the last entry written after a call to FinishEntry(). + * Returns 0 on success, and an error value < 0 if no entries have been written. + */ + int32_t GetLastEntry(FileEntry* out_entry); + /** * Writes the Central Directory Headers and flushes the zip file stream. * Returns 0 on success, and an error value < 0 on failure. @@ -130,22 +158,11 @@ public: private: DISALLOW_COPY_AND_ASSIGN(ZipWriter); - struct FileInfo { - std::string path; - uint16_t compression_method; - uint32_t crc32; - uint32_t compressed_size; - uint32_t uncompressed_size; - uint16_t last_mod_time; - uint16_t last_mod_date; - uint32_t local_file_header_offset; - }; - int32_t HandleError(int32_t error_code); int32_t PrepareDeflate(); - int32_t StoreBytes(FileInfo* file, const void* data, size_t len); - int32_t CompressBytes(FileInfo* file, const void* data, size_t len); - int32_t FlushCompressedBytes(FileInfo* file); + int32_t StoreBytes(FileEntry* file, const void* data, size_t len); + int32_t CompressBytes(FileEntry* file, const void* data, size_t len); + int32_t FlushCompressedBytes(FileEntry* file); enum class State { kWritingZip, @@ -157,7 +174,8 @@ private: FILE* file_; off64_t current_offset_; State state_; - std::vector files_; + std::vector files_; + FileEntry current_file_entry_; std::unique_ptr z_stream_; std::vector buffer_; diff --git a/libziparchive/zip_writer.cc b/libziparchive/zip_writer.cc index b72ed7f0f..ddd4dd5b6 100644 --- a/libziparchive/zip_writer.cc +++ b/libziparchive/zip_writer.cc @@ -14,21 +14,22 @@ * limitations under the License. */ -#include "entry_name_utils-inl.h" -#include "zip_archive_common.h" #include "ziparchive/zip_writer.h" -#include - -#include - -#include #include -#include -#include +#include #include #define DEF_MEM_LEVEL 8 // normally in zutil.h? +#include +#include + +#include "android-base/logging.h" +#include "utils/Log.h" + +#include "entry_name_utils-inl.h" +#include "zip_archive_common.h" + #if !defined(powerof2) #define powerof2(x) ((((x)-1)&(x))==0) #endif @@ -171,12 +172,12 @@ int32_t ZipWriter::StartAlignedEntryWithTime(const char* path, size_t flags, return kInvalidAlignment; } - FileInfo fileInfo = {}; - fileInfo.path = std::string(path); - fileInfo.local_file_header_offset = current_offset_; + current_file_entry_ = {}; + current_file_entry_.path = path; + current_file_entry_.local_file_header_offset = current_offset_; - if (!IsValidEntryName(reinterpret_cast(fileInfo.path.data()), - fileInfo.path.size())) { + if (!IsValidEntryName(reinterpret_cast(current_file_entry_.path.data()), + current_file_entry_.path.size())) { return kInvalidEntryName; } @@ -188,24 +189,24 @@ int32_t ZipWriter::StartAlignedEntryWithTime(const char* path, size_t flags, header.gpb_flags |= kGPBDDFlagMask; if (flags & ZipWriter::kCompress) { - fileInfo.compression_method = kCompressDeflated; + current_file_entry_.compression_method = kCompressDeflated; int32_t result = PrepareDeflate(); if (result != kNoError) { return result; } } else { - fileInfo.compression_method = kCompressStored; + current_file_entry_.compression_method = kCompressStored; } - header.compression_method = fileInfo.compression_method; + header.compression_method = current_file_entry_.compression_method; - ExtractTimeAndDate(time, &fileInfo.last_mod_time, &fileInfo.last_mod_date); - header.last_mod_time = fileInfo.last_mod_time; - header.last_mod_date = fileInfo.last_mod_date; + ExtractTimeAndDate(time, ¤t_file_entry_.last_mod_time, ¤t_file_entry_.last_mod_date); + header.last_mod_time = current_file_entry_.last_mod_time; + header.last_mod_date = current_file_entry_.last_mod_date; - header.file_name_length = fileInfo.path.size(); + header.file_name_length = current_file_entry_.path.size(); - off64_t offset = current_offset_ + sizeof(header) + fileInfo.path.size(); + off64_t offset = current_offset_ + sizeof(header) + current_file_entry_.path.size(); std::vector zero_padding; if (alignment != 0 && (offset & (alignment - 1))) { // Pad the extra field so the data will be aligned. @@ -220,7 +221,8 @@ int32_t ZipWriter::StartAlignedEntryWithTime(const char* path, size_t flags, return HandleError(kIoError); } - if (fwrite(path, sizeof(*path), fileInfo.path.size(), file_) != fileInfo.path.size()) { + if (fwrite(path, sizeof(*path), current_file_entry_.path.size(), file_) + != current_file_entry_.path.size()) { return HandleError(kIoError); } @@ -230,15 +232,37 @@ int32_t ZipWriter::StartAlignedEntryWithTime(const char* path, size_t flags, return HandleError(kIoError); } - files_.emplace_back(std::move(fileInfo)); - current_offset_ = offset; state_ = State::kWritingEntry; return kNoError; } +int32_t ZipWriter::DiscardLastEntry() { + if (state_ != State::kWritingZip || files_.empty()) { + return kInvalidState; + } + + FileEntry& last_entry = files_.back(); + current_offset_ = last_entry.local_file_header_offset; + if (fseeko(file_, current_offset_, SEEK_SET) != 0) { + return HandleError(kIoError); + } + files_.pop_back(); + return kNoError; +} + +int32_t ZipWriter::GetLastEntry(FileEntry* out_entry) { + CHECK(out_entry != nullptr); + + if (files_.empty()) { + return kInvalidState; + } + *out_entry = files_.back(); + return kNoError; +} + int32_t ZipWriter::PrepareDeflate() { - assert(state_ == State::kWritingZip); + CHECK(state_ == State::kWritingZip); // Initialize the z_stream for compression. z_stream_ = std::unique_ptr(new z_stream(), DeleteZStream); @@ -269,25 +293,25 @@ int32_t ZipWriter::WriteBytes(const void* data, size_t len) { return HandleError(kInvalidState); } - FileInfo& currentFile = files_.back(); int32_t result = kNoError; - if (currentFile.compression_method & kCompressDeflated) { - result = CompressBytes(¤tFile, data, len); + if (current_file_entry_.compression_method & kCompressDeflated) { + result = CompressBytes(¤t_file_entry_, data, len); } else { - result = StoreBytes(¤tFile, data, len); + result = StoreBytes(¤t_file_entry_, data, len); } if (result != kNoError) { return result; } - currentFile.crc32 = crc32(currentFile.crc32, reinterpret_cast(data), len); - currentFile.uncompressed_size += len; + current_file_entry_.crc32 = crc32(current_file_entry_.crc32, + reinterpret_cast(data), len); + current_file_entry_.uncompressed_size += len; return kNoError; } -int32_t ZipWriter::StoreBytes(FileInfo* file, const void* data, size_t len) { - assert(state_ == State::kWritingEntry); +int32_t ZipWriter::StoreBytes(FileEntry* file, const void* data, size_t len) { + CHECK(state_ == State::kWritingEntry); if (fwrite(data, 1, len, file_) != len) { return HandleError(kIoError); @@ -297,11 +321,11 @@ int32_t ZipWriter::StoreBytes(FileInfo* file, const void* data, size_t len) { return kNoError; } -int32_t ZipWriter::CompressBytes(FileInfo* file, const void* data, size_t len) { - assert(state_ == State::kWritingEntry); - assert(z_stream_); - assert(z_stream_->next_out != nullptr); - assert(z_stream_->avail_out != 0); +int32_t ZipWriter::CompressBytes(FileEntry* file, const void* data, size_t len) { + CHECK(state_ == State::kWritingEntry); + CHECK(z_stream_); + CHECK(z_stream_->next_out != nullptr); + CHECK(z_stream_->avail_out != 0); // Prepare the input. z_stream_->next_in = reinterpret_cast(data); @@ -331,17 +355,17 @@ int32_t ZipWriter::CompressBytes(FileInfo* file, const void* data, size_t len) { return kNoError; } -int32_t ZipWriter::FlushCompressedBytes(FileInfo* file) { - assert(state_ == State::kWritingEntry); - assert(z_stream_); - assert(z_stream_->next_out != nullptr); - assert(z_stream_->avail_out != 0); +int32_t ZipWriter::FlushCompressedBytes(FileEntry* file) { + CHECK(state_ == State::kWritingEntry); + CHECK(z_stream_); + CHECK(z_stream_->next_out != nullptr); + CHECK(z_stream_->avail_out != 0); // Keep deflating while there isn't enough space in the buffer to // to complete the compress. int zerr; while ((zerr = deflate(z_stream_.get(), Z_FINISH)) == Z_OK) { - assert(z_stream_->avail_out == 0); + CHECK(z_stream_->avail_out == 0); size_t write_bytes = z_stream_->next_out - buffer_.data(); if (fwrite(buffer_.data(), 1, write_bytes, file_) != write_bytes) { return HandleError(kIoError); @@ -373,9 +397,8 @@ int32_t ZipWriter::FinishEntry() { return kInvalidState; } - FileInfo& currentFile = files_.back(); - if (currentFile.compression_method & kCompressDeflated) { - int32_t result = FlushCompressedBytes(¤tFile); + if (current_file_entry_.compression_method & kCompressDeflated) { + int32_t result = FlushCompressedBytes(¤t_file_entry_); if (result != kNoError) { return result; } @@ -388,13 +411,15 @@ int32_t ZipWriter::FinishEntry() { } DataDescriptor dd = {}; - dd.crc32 = currentFile.crc32; - dd.compressed_size = currentFile.compressed_size; - dd.uncompressed_size = currentFile.uncompressed_size; + dd.crc32 = current_file_entry_.crc32; + dd.compressed_size = current_file_entry_.compressed_size; + dd.uncompressed_size = current_file_entry_.uncompressed_size; if (fwrite(&dd, sizeof(dd), 1, file_) != 1) { return HandleError(kIoError); } + files_.emplace_back(std::move(current_file_entry_)); + current_offset_ += sizeof(DataDescriptor::kOptSignature) + sizeof(dd); state_ = State::kWritingZip; return kNoError; @@ -406,7 +431,7 @@ int32_t ZipWriter::Finish() { } off64_t startOfCdr = current_offset_; - for (FileInfo& file : files_) { + for (FileEntry& file : files_) { CentralDirectoryRecord cdr = {}; cdr.record_signature = CentralDirectoryRecord::kSignature; cdr.gpb_flags |= kGPBDDFlagMask; @@ -442,11 +467,19 @@ int32_t ZipWriter::Finish() { return HandleError(kIoError); } + current_offset_ += sizeof(er); + + // Since we can BackUp() and potentially finish writing at an offset less than one we had + // already written at, we must truncate the file. + + if (ftruncate64(fileno(file_), current_offset_) != 0) { + return HandleError(kIoError); + } + if (fflush(file_) != 0) { return HandleError(kIoError); } - current_offset_ += sizeof(er); state_ = State::kDone; return kNoError; } diff --git a/libziparchive/zip_writer_test.cc b/libziparchive/zip_writer_test.cc index 16a574d99..259bcff54 100644 --- a/libziparchive/zip_writer_test.cc +++ b/libziparchive/zip_writer_test.cc @@ -23,6 +23,10 @@ #include #include +static ::testing::AssertionResult AssertFileEntryContentsEq(const std::string& expected, + ZipArchiveHandle handle, + ZipEntry* zip_entry); + struct zipwriter : public ::testing::Test { TemporaryFile* temp_file_; int fd_; @@ -59,16 +63,10 @@ TEST_F(zipwriter, WriteUncompressedZipWithOneFile) { ZipEntry data; ASSERT_EQ(0, FindEntry(handle, ZipString("file.txt"), &data)); - EXPECT_EQ(strlen(expected), data.compressed_length); - EXPECT_EQ(strlen(expected), data.uncompressed_length); EXPECT_EQ(kCompressStored, data.method); - - char buffer[6]; - EXPECT_EQ(0, - ExtractToMemory(handle, &data, reinterpret_cast(&buffer), sizeof(buffer))); - buffer[5] = 0; - - EXPECT_STREQ(expected, buffer); + EXPECT_EQ(strlen(expected), data.compressed_length); + ASSERT_EQ(strlen(expected), data.uncompressed_length); + ASSERT_TRUE(AssertFileEntryContentsEq(expected, handle, &data)); CloseArchive(handle); } @@ -94,26 +92,19 @@ TEST_F(zipwriter, WriteUncompressedZipWithMultipleFiles) { ZipArchiveHandle handle; ASSERT_EQ(0, OpenArchiveFd(fd_, "temp", &handle, false)); - char buffer[4]; ZipEntry data; ASSERT_EQ(0, FindEntry(handle, ZipString("file.txt"), &data)); EXPECT_EQ(kCompressStored, data.method); EXPECT_EQ(2u, data.compressed_length); - EXPECT_EQ(2u, data.uncompressed_length); - ASSERT_EQ(0, - ExtractToMemory(handle, &data, reinterpret_cast(buffer), arraysize(buffer))); - buffer[2] = 0; - EXPECT_STREQ("he", buffer); + ASSERT_EQ(2u, data.uncompressed_length); + ASSERT_TRUE(AssertFileEntryContentsEq("he", handle, &data)); ASSERT_EQ(0, FindEntry(handle, ZipString("file/file.txt"), &data)); EXPECT_EQ(kCompressStored, data.method); EXPECT_EQ(3u, data.compressed_length); - EXPECT_EQ(3u, data.uncompressed_length); - ASSERT_EQ(0, - ExtractToMemory(handle, &data, reinterpret_cast(buffer), arraysize(buffer))); - buffer[3] = 0; - EXPECT_STREQ("llo", buffer); + ASSERT_EQ(3u, data.uncompressed_length); + ASSERT_TRUE(AssertFileEntryContentsEq("llo", handle, &data)); ASSERT_EQ(0, FindEntry(handle, ZipString("file/file2.txt"), &data)); EXPECT_EQ(kCompressStored, data.method); @@ -143,7 +134,7 @@ TEST_F(zipwriter, WriteUncompressedZipFileWithAlignedFlag) { CloseArchive(handle); } -void ConvertZipTimeToTm(uint32_t& zip_time, struct tm* tm) { +static void ConvertZipTimeToTm(uint32_t& zip_time, struct tm* tm) { memset(tm, 0, sizeof(struct tm)); tm->tm_hour = (zip_time >> 11) & 0x1f; tm->tm_min = (zip_time >> 5) & 0x3f; @@ -264,14 +255,8 @@ TEST_F(zipwriter, WriteCompressedZipWithOneFile) { ZipEntry data; ASSERT_EQ(0, FindEntry(handle, ZipString("file.txt"), &data)); EXPECT_EQ(kCompressDeflated, data.method); - EXPECT_EQ(4u, data.uncompressed_length); - - char buffer[5]; - ASSERT_EQ(0, - ExtractToMemory(handle, &data, reinterpret_cast(buffer), arraysize(buffer))); - buffer[4] = 0; - - EXPECT_STREQ("helo", buffer); + ASSERT_EQ(4u, data.uncompressed_length); + ASSERT_TRUE(AssertFileEntryContentsEq("helo", handle, &data)); CloseArchive(handle); } @@ -319,3 +304,111 @@ TEST_F(zipwriter, CheckStartEntryErrors) { ASSERT_EQ(-5, writer.StartAlignedEntry("align.txt", ZipWriter::kAlign32, 4096)); ASSERT_EQ(-6, writer.StartAlignedEntry("align.txt", 0, 3)); } + +TEST_F(zipwriter, BackupRemovesTheLastFile) { + ZipWriter writer(file_); + + const char* kKeepThis = "keep this"; + const char* kDropThis = "drop this"; + const char* kReplaceWithThis = "replace with this"; + + ZipWriter::FileEntry entry; + EXPECT_LT(writer.GetLastEntry(&entry), 0); + + ASSERT_EQ(0, writer.StartEntry("keep.txt", 0)); + ASSERT_EQ(0, writer.WriteBytes(kKeepThis, strlen(kKeepThis))); + ASSERT_EQ(0, writer.FinishEntry()); + + ASSERT_EQ(0, writer.GetLastEntry(&entry)); + EXPECT_EQ("keep.txt", entry.path); + + ASSERT_EQ(0, writer.StartEntry("drop.txt", 0)); + ASSERT_EQ(0, writer.WriteBytes(kDropThis, strlen(kDropThis))); + ASSERT_EQ(0, writer.FinishEntry()); + + ASSERT_EQ(0, writer.GetLastEntry(&entry)); + EXPECT_EQ("drop.txt", entry.path); + + ASSERT_EQ(0, writer.DiscardLastEntry()); + + ASSERT_EQ(0, writer.GetLastEntry(&entry)); + EXPECT_EQ("keep.txt", entry.path); + + ASSERT_EQ(0, writer.StartEntry("replace.txt", 0)); + ASSERT_EQ(0, writer.WriteBytes(kReplaceWithThis, strlen(kReplaceWithThis))); + ASSERT_EQ(0, writer.FinishEntry()); + + ASSERT_EQ(0, writer.GetLastEntry(&entry)); + EXPECT_EQ("replace.txt", entry.path); + + ASSERT_EQ(0, writer.Finish()); + + // Verify that "drop.txt" does not exist. + + ASSERT_GE(0, lseek(fd_, 0, SEEK_SET)); + + ZipArchiveHandle handle; + ASSERT_EQ(0, OpenArchiveFd(fd_, "temp", &handle, false)); + + ZipEntry data; + ASSERT_EQ(0, FindEntry(handle, ZipString("keep.txt"), &data)); + ASSERT_TRUE(AssertFileEntryContentsEq(kKeepThis, handle, &data)); + + ASSERT_NE(0, FindEntry(handle, ZipString("drop.txt"), &data)); + + ASSERT_EQ(0, FindEntry(handle, ZipString("replace.txt"), &data)); + ASSERT_TRUE(AssertFileEntryContentsEq(kReplaceWithThis, handle, &data)); + + CloseArchive(handle); +} + +TEST_F(zipwriter, TruncateFileAfterBackup) { + ZipWriter writer(file_); + + const char* kSmall = "small"; + + ASSERT_EQ(0, writer.StartEntry("small.txt", 0)); + ASSERT_EQ(0, writer.WriteBytes(kSmall, strlen(kSmall))); + ASSERT_EQ(0, writer.FinishEntry()); + + ASSERT_EQ(0, writer.StartEntry("large.txt", 0)); + std::vector data; + data.resize(1024*1024, 0xef); + ASSERT_EQ(0, writer.WriteBytes(data.data(), data.size())); + ASSERT_EQ(0, writer.FinishEntry()); + + off64_t before_len = ftello64(file_); + + ZipWriter::FileEntry entry; + ASSERT_EQ(0, writer.GetLastEntry(&entry)); + ASSERT_EQ(0, writer.DiscardLastEntry()); + + ASSERT_EQ(0, writer.Finish()); + + off64_t after_len = ftello64(file_); + + ASSERT_GT(before_len, after_len); +} + +static ::testing::AssertionResult AssertFileEntryContentsEq(const std::string& expected, + ZipArchiveHandle handle, + ZipEntry* zip_entry) { + if (expected.size() != zip_entry->uncompressed_length) { + return ::testing::AssertionFailure() << "uncompressed entry size " + << zip_entry->uncompressed_length << " does not match expected size " << expected.size(); + } + + std::string actual; + actual.resize(expected.size()); + + uint8_t* buffer = reinterpret_cast(&*actual.begin()); + if (ExtractToMemory(handle, zip_entry, buffer, actual.size()) != 0) { + return ::testing::AssertionFailure() << "failed to extract entry"; + } + + if (expected != actual) { + return ::testing::AssertionFailure() << "actual zip_entry data '" << actual + << "' does not match expected '" << expected << "'"; + } + return ::testing::AssertionSuccess(); +}