Avoid mapping output to memory while writing to a file.

It's unnecessary, and causes issues when the uncompressed
output is large.

Bug: http://b/21558406
Change-Id: I99cfb3933b094c2826c7e6c6de9aab03478fcc53
(cherry picked from commit f899bd534b)
This commit is contained in:
Narayan Kamath 2015-04-17 11:53:14 +01:00 committed by Elliott Hughes
parent b8216007c6
commit 785a128aec
2 changed files with 269 additions and 90 deletions

View File

@ -85,7 +85,8 @@ struct EocdRecord {
// Length of the central directory comment.
uint16_t comment_length;
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(EocdRecord);
EocdRecord() = default;
DISALLOW_COPY_AND_ASSIGN(EocdRecord);
} __attribute__((packed));
// A structure representing the fixed length fields for a single
@ -138,7 +139,8 @@ struct CentralDirectoryRecord {
// beginning of this archive.
uint32_t local_file_header_offset;
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(CentralDirectoryRecord);
CentralDirectoryRecord() = default;
DISALLOW_COPY_AND_ASSIGN(CentralDirectoryRecord);
} __attribute__((packed));
// The local file header for a given entry. This duplicates information
@ -175,7 +177,8 @@ struct LocalFileHeader {
// will appear immediately after the entry file name.
uint16_t extra_field_length;
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(LocalFileHeader);
LocalFileHeader() = default;
DISALLOW_COPY_AND_ASSIGN(LocalFileHeader);
} __attribute__((packed));
struct DataDescriptor {
@ -189,10 +192,10 @@ struct DataDescriptor {
// Uncompressed size of the entry.
uint32_t uncompressed_size;
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DataDescriptor);
DataDescriptor() = default;
DISALLOW_COPY_AND_ASSIGN(DataDescriptor);
} __attribute__((packed));
#undef DISALLOW_IMPLICIT_CONSTRUCTORS
static const uint32_t kGPBDDFlagMask = 0x0008; // mask value that signifies that the entry has a DD
@ -324,35 +327,6 @@ struct ZipArchive {
}
};
static int32_t CopyFileToFile(int fd, uint8_t* begin, const uint32_t length, uint64_t *crc_out) {
static const uint32_t kBufSize = 32768;
uint8_t buf[kBufSize];
uint32_t count = 0;
uint64_t crc = 0;
while (count < length) {
uint32_t remaining = length - count;
// Safe conversion because kBufSize is narrow enough for a 32 bit signed
// value.
ssize_t get_size = (remaining > kBufSize) ? kBufSize : remaining;
ssize_t actual = TEMP_FAILURE_RETRY(read(fd, buf, get_size));
if (actual != get_size) {
ALOGW("CopyFileToFile: copy read failed (" ZD " vs " ZD ")", actual, get_size);
return kIoError;
}
memcpy(begin + count, buf, get_size);
crc = crc32(crc, buf, get_size);
count += get_size;
}
*crc_out = crc;
return 0;
}
/*
* Round up to the next highest power of 2.
*
@ -972,6 +946,128 @@ int32_t Next(void* cookie, ZipEntry* data, ZipEntryName* name) {
return kIterationEnd;
}
class Writer {
public:
virtual bool Append(uint8_t* buf, size_t buf_size) = 0;
virtual ~Writer() {}
protected:
Writer() = default;
private:
DISALLOW_COPY_AND_ASSIGN(Writer);
};
// A Writer that writes data to a fixed size memory region.
// The size of the memory region must be equal to the total size of
// the data appended to it.
class MemoryWriter : public Writer {
public:
MemoryWriter(uint8_t* buf, size_t size) : Writer(),
buf_(buf), size_(size), bytes_written_(0) {
}
virtual bool Append(uint8_t* buf, size_t buf_size) override {
if (bytes_written_ + buf_size > size_) {
ALOGW("Zip: Unexpected size " ZD " (declared) vs " ZD " (actual)",
size_, bytes_written_ + buf_size);
return false;
}
memcpy(buf_ + bytes_written_, buf, buf_size);
bytes_written_ += buf_size;
return true;
}
private:
uint8_t* const buf_;
const size_t size_;
size_t bytes_written_;
};
// A Writer that appends data to a file |fd| at its current position.
// The file will be truncated to the end of the written data.
class FileWriter : public Writer {
public:
// Creates a FileWriter for |fd| and prepare to write |entry| to it,
// guaranteeing that the file descriptor is valid and that there's enough
// space on the volume to write out the entry completely and that the file
// is truncated to the correct length.
//
// Returns a valid FileWriter on success, |nullptr| if an error occurred.
static std::unique_ptr<FileWriter> Create(int fd, const ZipEntry* entry) {
const uint32_t declared_length = entry->uncompressed_length;
const off64_t current_offset = lseek64(fd, 0, SEEK_CUR);
if (current_offset == -1) {
ALOGW("Zip: unable to seek to current location on fd %d: %s", fd, strerror(errno));
return nullptr;
}
int result = 0;
#if defined(__linux__)
if (declared_length > 0) {
// Make sure we have enough space on the volume to extract the compressed
// entry. Note that the call to ftruncate below will change the file size but
// will not allocate space on disk and this call to fallocate will not
// change the file size.
result = TEMP_FAILURE_RETRY(fallocate(fd, 0, current_offset, declared_length));
if (result == -1) {
ALOGW("Zip: unable to allocate space for file to %" PRId64 ": %s",
static_cast<int64_t>(declared_length + current_offset), strerror(errno));
return std::unique_ptr<FileWriter>(nullptr);
}
}
#endif // __linux__
result = TEMP_FAILURE_RETRY(ftruncate(fd, declared_length + current_offset));
if (result == -1) {
ALOGW("Zip: unable to truncate file to %" PRId64 ": %s",
static_cast<int64_t>(declared_length + current_offset), strerror(errno));
return std::unique_ptr<FileWriter>(nullptr);
}
return std::unique_ptr<FileWriter>(new FileWriter(fd, declared_length));
}
virtual bool Append(uint8_t* buf, size_t buf_size) override {
if (total_bytes_written_ + buf_size > declared_length_) {
ALOGW("Zip: Unexpected size " ZD " (declared) vs " ZD " (actual)",
declared_length_, total_bytes_written_ + buf_size);
return false;
}
// Keep track of the start position so we can calculate the
// total number of bytes written.
const uint8_t* const start = buf;
size_t bytes_written = 0;
while (buf_size > 0) {
ssize_t bytes_written = TEMP_FAILURE_RETRY(write(fd_, buf, buf_size));
if (bytes_written == -1) {
ALOGW("Zip: unable to write " ZD " bytes to file; %s", buf_size, strerror(errno));
return false;
}
buf_size -= bytes_written;
buf += bytes_written;
}
total_bytes_written_ += static_cast<size_t>(
reinterpret_cast<uintptr_t>(buf) - reinterpret_cast<uintptr_t>(start));
return true;
}
private:
FileWriter(const int fd, const size_t declared_length) :
Writer(),
fd_(fd),
declared_length_(declared_length),
total_bytes_written_(0) {
}
const int fd_;
const size_t declared_length_;
size_t total_bytes_written_;
};
// This method is using libz macros with old-style-casts
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wold-style-cast"
@ -980,9 +1076,8 @@ static inline int zlib_inflateInit2(z_stream* stream, int window_bits) {
}
#pragma GCC diagnostic pop
static int32_t InflateToFile(int fd, const ZipEntry* entry,
uint8_t* begin, uint32_t length,
uint64_t* crc_out) {
static int32_t InflateEntryToWriter(int fd, const ZipEntry* entry,
Writer* writer, uint64_t* crc_out) {
const size_t kBufSize = 32768;
std::vector<uint8_t> read_buf(kBufSize);
std::vector<uint8_t> write_buf(kBufSize);
@ -1057,12 +1152,10 @@ static int32_t InflateToFile(int fd, const ZipEntry* entry,
if (zstream.avail_out == 0 ||
(zerr == Z_STREAM_END && zstream.avail_out != kBufSize)) {
const size_t write_size = zstream.next_out - &write_buf[0];
// The file might have declared a bogus length.
if (write_size + write_count > length) {
return -1;
if (!writer->Append(&write_buf[0], write_size)) {
// The file might have declared a bogus length.
return kInconsistentInformation;
}
memcpy(begin + write_count, &write_buf[0], write_size);
write_count += write_size;
zstream.next_out = &write_buf[0];
zstream.avail_out = kBufSize;
@ -1083,8 +1176,41 @@ static int32_t InflateToFile(int fd, const ZipEntry* entry,
return 0;
}
int32_t ExtractToMemory(ZipArchiveHandle handle,
ZipEntry* entry, uint8_t* begin, uint32_t size) {
static int32_t CopyEntryToWriter(int fd, const ZipEntry* entry, Writer* writer,
uint64_t *crc_out) {
static const uint32_t kBufSize = 32768;
std::vector<uint8_t> buf(kBufSize);
const uint32_t length = entry->uncompressed_length;
uint32_t count = 0;
uint64_t crc = 0;
while (count < length) {
uint32_t remaining = length - count;
// Safe conversion because kBufSize is narrow enough for a 32 bit signed
// value.
const ssize_t block_size = (remaining > kBufSize) ? kBufSize : remaining;
const ssize_t actual = TEMP_FAILURE_RETRY(read(fd, &buf[0], block_size));
if (actual != block_size) {
ALOGW("CopyFileToFile: copy read failed (" ZD " vs " ZD ")", actual, block_size);
return kIoError;
}
if (!writer->Append(&buf[0], block_size)) {
return kIoError;
}
crc = crc32(crc, &buf[0], block_size);
count += block_size;
}
*crc_out = crc;
return 0;
}
int32_t ExtractToWriter(ZipArchiveHandle handle,
ZipEntry* entry, Writer* writer) {
ZipArchive* archive = reinterpret_cast<ZipArchive*>(handle);
const uint16_t method = entry->method;
off64_t data_offset = entry->offset;
@ -1098,9 +1224,9 @@ int32_t ExtractToMemory(ZipArchiveHandle handle,
int32_t return_value = -1;
uint64_t crc = 0;
if (method == kCompressStored) {
return_value = CopyFileToFile(archive->fd, begin, size, &crc);
return_value = CopyEntryToWriter(archive->fd, entry, writer, &crc);
} else if (method == kCompressDeflated) {
return_value = InflateToFile(archive->fd, entry, begin, size, &crc);
return_value = InflateEntryToWriter(archive->fd, entry, writer, &crc);
}
if (!return_value && entry->has_data_descriptor) {
@ -1120,55 +1246,20 @@ int32_t ExtractToMemory(ZipArchiveHandle handle,
return return_value;
}
int32_t ExtractToMemory(ZipArchiveHandle handle, ZipEntry* entry,
uint8_t* begin, uint32_t size) {
std::unique_ptr<Writer> writer(new MemoryWriter(begin, size));
return ExtractToWriter(handle, entry, writer.get());
}
int32_t ExtractEntryToFile(ZipArchiveHandle handle,
ZipEntry* entry, int fd) {
const uint32_t declared_length = entry->uncompressed_length;
const off64_t current_offset = lseek64(fd, 0, SEEK_CUR);
if (current_offset == -1) {
ALOGW("Zip: unable to seek to current location on fd %d: %s", fd,
strerror(errno));
std::unique_ptr<Writer> writer(FileWriter::Create(fd, entry));
if (writer.get() == nullptr) {
return kIoError;
}
int result = 0;
#if defined(__linux__)
// Make sure we have enough space on the volume to extract the compressed
// entry. Note that the call to ftruncate below will change the file size but
// will not allocate space on disk.
if (declared_length > 0) {
result = TEMP_FAILURE_RETRY(fallocate(fd, 0, current_offset, declared_length));
if (result == -1) {
ALOGW("Zip: unable to allocate space for file to %" PRId64 ": %s",
static_cast<int64_t>(declared_length + current_offset), strerror(errno));
return kIoError;
}
}
#endif // defined(__linux__)
result = TEMP_FAILURE_RETRY(ftruncate(fd, declared_length + current_offset));
if (result == -1) {
ALOGW("Zip: unable to truncate file to %" PRId64 ": %s",
static_cast<int64_t>(declared_length + current_offset), strerror(errno));
return kIoError;
}
// Don't attempt to map a region of length 0. We still need the
// ftruncate() though, since the API guarantees that we will truncate
// the file to the end of the uncompressed output.
if (declared_length == 0) {
return 0;
}
android::FileMap map;
if (!map.create(kTempMappingFileName, fd, current_offset, declared_length, false)) {
return kMmapFailed;
}
const int32_t error = ExtractToMemory(handle, entry,
reinterpret_cast<uint8_t*>(map.getDataPtr()),
map.getDataLength());
return error;
return ExtractToWriter(handle, entry, writer.get());
}
const char* ErrorCodeString(int32_t error_code) {

View File

@ -23,6 +23,7 @@
#include <unistd.h>
#include <vector>
#include <base/file.h>
#include <gtest/gtest.h>
static std::string test_data_dir;
@ -228,6 +229,44 @@ static const uint32_t kEmptyEntriesZip[] = {
0x54557478, 0x13030005, 0x7552e25c, 0x01000b78, 0x00428904, 0x13880400,
0x4b500000, 0x00000605, 0x00010000, 0x004f0001, 0x00430000, 0x00000000 };
// This is a zip file containing a single entry (ab.txt) that contains
// 90072 repetitions of the string "ab\n" and has an uncompressed length
// of 270216 bytes.
static const uint16_t kAbZip[] = {
0x4b50, 0x0403, 0x0014, 0x0000, 0x0008, 0x51d2, 0x4698, 0xc4b0,
0x2cda, 0x011b, 0x0000, 0x1f88, 0x0004, 0x0006, 0x001c, 0x6261,
0x742e, 0x7478, 0x5455, 0x0009, 0x7c03, 0x3a09, 0x7c55, 0x3a09,
0x7555, 0x0b78, 0x0100, 0x8904, 0x0042, 0x0400, 0x1388, 0x0000,
0xc2ed, 0x0d31, 0x0000, 0x030c, 0x7fa0, 0x3b2e, 0x22ff, 0xa2aa,
0x841f, 0x45fc, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555,
0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555,
0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555,
0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555,
0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555,
0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555,
0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555,
0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555,
0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555,
0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555,
0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555,
0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555,
0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555,
0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555,
0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555,
0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555, 0x5555,
0x5555, 0x5555, 0x5555, 0x5555, 0xdd55, 0x502c, 0x014b, 0x1e02,
0x1403, 0x0000, 0x0800, 0xd200, 0x9851, 0xb046, 0xdac4, 0x1b2c,
0x0001, 0x8800, 0x041f, 0x0600, 0x1800, 0x0000, 0x0000, 0x0100,
0x0000, 0xa000, 0x0081, 0x0000, 0x6100, 0x2e62, 0x7874, 0x5574,
0x0554, 0x0300, 0x097c, 0x553a, 0x7875, 0x000b, 0x0401, 0x4289,
0x0000, 0x8804, 0x0013, 0x5000, 0x054b, 0x0006, 0x0000, 0x0100,
0x0100, 0x4c00, 0x0000, 0x5b00, 0x0001, 0x0000, 0x0000
};
static const uint8_t kAbTxtName[] = { 'a', 'b', '.', 't', 'x', 't' };
static const uint16_t kAbTxtNameLength = sizeof(kAbTxtName);
static const size_t kAbUncompressedSize = 270216;
static int make_temporary_file(const char* file_name_pattern) {
char full_path[1024];
// Account for differences between the host and the target.
@ -275,6 +314,55 @@ TEST(ziparchive, EmptyEntries) {
close(output_fd);
}
TEST(ziparchive, EntryLargerThan32K) {
char temp_file_pattern[] = "entry_larger_than_32k_test_XXXXXX";
int fd = make_temporary_file(temp_file_pattern);
ASSERT_NE(-1, fd);
ASSERT_TRUE(android::base::WriteFully(fd, reinterpret_cast<const uint8_t*>(kAbZip),
sizeof(kAbZip) - 1));
ZipArchiveHandle handle;
ASSERT_EQ(0, OpenArchiveFd(fd, "EntryLargerThan32KTest", &handle));
ZipEntry entry;
ZipEntryName ab_name;
ab_name.name = kAbTxtName;
ab_name.name_length = kAbTxtNameLength;
ASSERT_EQ(0, FindEntry(handle, ab_name, &entry));
ASSERT_EQ(kAbUncompressedSize, entry.uncompressed_length);
// Extract the entry to memory.
std::vector<uint8_t> buffer(kAbUncompressedSize);
ASSERT_EQ(0, ExtractToMemory(handle, &entry, &buffer[0], buffer.size()));
// Extract the entry to a file.
char output_file_pattern[] = "entry_larger_than_32k_test_output_XXXXXX";
int output_fd = make_temporary_file(output_file_pattern);
ASSERT_NE(-1, output_fd);
ASSERT_EQ(0, ExtractEntryToFile(handle, &entry, output_fd));
// Make sure the extracted file size is as expected.
struct stat stat_buf;
ASSERT_EQ(0, fstat(output_fd, &stat_buf));
ASSERT_EQ(kAbUncompressedSize, static_cast<size_t>(stat_buf.st_size));
// Read the file back to a buffer and make sure the contents are
// the same as the memory buffer we extracted directly to.
std::vector<uint8_t> file_contents(kAbUncompressedSize);
ASSERT_EQ(0, lseek64(output_fd, 0, SEEK_SET));
ASSERT_TRUE(android::base::ReadFully(output_fd, &file_contents[0], file_contents.size()));
ASSERT_EQ(file_contents, buffer);
for (int i = 0; i < 90072; ++i) {
const uint8_t* line = &file_contents[0] + (3 * i);
ASSERT_EQ('a', line[0]);
ASSERT_EQ('b', line[1]);
ASSERT_EQ('\n', line[2]);
}
close(fd);
close(output_fd);
}
TEST(ziparchive, TrailerAfterEOCD) {
char temp_file_pattern[] = "trailer_after_eocd_test_XXXXXX";
int fd = make_temporary_file(temp_file_pattern);