Merge "blockimgdiff: add a function to select and convert 'diff' to new" am: 28b6654ee7

am: 134178e370

Change-Id: Id0c2a97ffb3a5c5c824644b8819fa31046407dfd
This commit is contained in:
Tianjie Xu 2018-12-20 12:33:56 -08:00 committed by android-build-merger
commit e6610488f0
2 changed files with 99 additions and 12 deletions

View File

@ -471,6 +471,23 @@ class BlockImageDiff(object):
# Ensure the runtime stash size is under the limit.
if common.OPTIONS.cache_size is not None:
stash_limit = (common.OPTIONS.cache_size *
common.OPTIONS.stash_threshold / self.tgt.blocksize)
# Ignore the stash limit and calculate the maximum simultaneously stashed
# blocks needed.
_, max_stashed_blocks = self.ReviseStashSize(ignore_stash_limit=True)
# We cannot stash more blocks than the stash limit simultaneously. As a
# result, some 'diff' commands will be converted to new; leading to an
# unintended large package. To mitigate this issue, we can carefully
# choose the transfers for conversion. The number '1024' can be further
# tweaked here to balance the package size and build time.
if max_stashed_blocks > stash_limit + 1024:
self.SelectAndConvertDiffTransfersToNew()
# Regenerate the sequence as the graph has changed.
self.FindSequenceForTransfers()
# Revise the stash size again to keep the size under limit.
self.ReviseStashSize()
# Double-check our work.
@ -700,7 +717,21 @@ class BlockImageDiff(object):
"max stashed blocks: %d (%d bytes), limit: <unknown>\n",
max_stashed_blocks, self._max_stashed_size)
def ReviseStashSize(self):
def ReviseStashSize(self, ignore_stash_limit=False):
""" Revises the transfers to keep the stash size within the size limit.
Iterates through the transfer list and calculates the stash size each
transfer generates. Converts the affected transfers to new if we reach the
stash limit.
Args:
ignore_stash_limit: Ignores the stash limit and calculates the max
simultaneous stashed blocks instead. No change will be made to the
transfer list with this flag.
Return:
A tuple of (tgt blocks converted to new, max stashed blocks)
"""
logger.info("Revising stash size...")
stash_map = {}
@ -715,16 +746,19 @@ class BlockImageDiff(object):
for stash_raw_id, _ in xf.use_stash:
stash_map[stash_raw_id] += (xf,)
# Compute the maximum blocks available for stash based on /cache size and
# the threshold.
cache_size = common.OPTIONS.cache_size
stash_threshold = common.OPTIONS.stash_threshold
max_allowed = cache_size * stash_threshold / self.tgt.blocksize
max_allowed_blocks = None
if not ignore_stash_limit:
# Compute the maximum blocks available for stash based on /cache size and
# the threshold.
cache_size = common.OPTIONS.cache_size
stash_threshold = common.OPTIONS.stash_threshold
max_allowed_blocks = cache_size * stash_threshold / self.tgt.blocksize
# See the comments for 'stashes' in WriteTransfers().
stashes = {}
stashed_blocks = 0
new_blocks = 0
max_stashed_blocks = 0
# Now go through all the commands. Compute the required stash size on the
# fly. If a command requires excess stash than available, it deletes the
@ -741,7 +775,7 @@ class BlockImageDiff(object):
if sh not in stashes:
stashed_blocks_after += sr.size()
if stashed_blocks_after > max_allowed:
if max_allowed_blocks and stashed_blocks_after > max_allowed_blocks:
# We cannot stash this one for a later command. Find out the command
# that will use this stash and replace the command with "new".
use_cmd = stash_map[stash_raw_id][2]
@ -754,15 +788,21 @@ class BlockImageDiff(object):
else:
stashes[sh] = 1
stashed_blocks = stashed_blocks_after
max_stashed_blocks = max(max_stashed_blocks, stashed_blocks)
# "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
# ComputePatches(), they both have the style of "diff".
if xf.style == "diff":
assert xf.tgt_ranges and xf.src_ranges
if xf.src_ranges.overlaps(xf.tgt_ranges):
if stashed_blocks + xf.src_ranges.size() > max_allowed:
if (max_allowed_blocks and
stashed_blocks + xf.src_ranges.size() > max_allowed_blocks):
replaced_cmds.append(xf)
logger.info("%10d %9s %s", xf.src_ranges.size(), "implicit", xf)
else:
# The whole source ranges will be stashed for implicit stashes.
max_stashed_blocks = max(max_stashed_blocks,
stashed_blocks + xf.src_ranges.size())
# Replace the commands in replaced_cmds with "new"s.
for cmd in replaced_cmds:
@ -791,7 +831,7 @@ class BlockImageDiff(object):
logger.info(
" Total %d blocks (%d bytes) are packed as new blocks due to "
"insufficient cache size.", new_blocks, num_of_bytes)
return new_blocks
return new_blocks, max_stashed_blocks
def ComputePatches(self, prefix):
logger.info("Reticulating splines...")
@ -1299,6 +1339,53 @@ class BlockImageDiff(object):
return patches
def SelectAndConvertDiffTransfersToNew(self):
"""Converts the diff transfers to reduce the max simultaneous stash.
Since the 'new' data is compressed with deflate, we can select the 'diff'
transfers for conversion by comparing its patch size with the size of the
compressed data. Ideally, we want to convert the transfers with a small
size increase, but using a large number of stashed blocks.
"""
logger.info("Selecting diff commands to convert to new.")
diff_queue = []
for xf in self.transfers:
if xf.style == "diff" and xf.src_sha1 != xf.tgt_sha1:
use_imgdiff = self.CanUseImgdiff(xf.tgt_name, xf.tgt_ranges,
xf.src_ranges)
diff_queue.append((xf.order, use_imgdiff, len(diff_queue)))
# Remove the 'move' transfers, and compute the patch & compressed size
# for the remaining.
result = self.ComputePatchesForInputList(diff_queue, True)
removed_stashed_blocks = 0
for xf_index, patch_info, compressed_size in result:
xf = self.transfers[xf_index]
if not xf.patch_info:
xf.patch_info = patch_info
size_ratio = len(xf.patch_info.content) * 100.0 / compressed_size
diff_style = "imgdiff" if xf.patch_info.imgdiff else "bsdiff"
logger.info("%s, target size: %d, style: %s, patch size: %d,"
" compression_size: %d, ratio %.2f%%", xf.tgt_name,
xf.tgt_ranges.size(), diff_style,
len(xf.patch_info.content), compressed_size, size_ratio)
# Convert the transfer to new if the compressed size is smaller or equal.
# We don't need to maintain the stash_before lists here because the
# graph will be regenerated later.
if len(xf.patch_info.content) >= compressed_size:
removed_stashed_blocks += sum(sr.size() for _, sr in xf.use_stash)
logger.info("Converting %s to new", xf.tgt_name)
xf.ConvertToNew()
# TODO(xunchang) convert more transfers by sorting:
# (compressed size - patch_size) / used_stashed_blocks
logger.info("Removed %d stashed blocks", removed_stashed_blocks)
def FindTransfers(self):
"""Parse the file_map to generate all the transfers."""

View File

@ -127,11 +127,11 @@ class BlockImageDiffTest(ReleaseToolsTestCase):
# Sufficient cache to stash 5 blocks (size * 0.8 >= 5).
common.OPTIONS.cache_size = 7 * 4096
self.assertEqual(0, block_image_diff.ReviseStashSize())
self.assertEqual((0, 5), block_image_diff.ReviseStashSize())
# Insufficient cache to stash 5 blocks (size * 0.8 < 5).
common.OPTIONS.cache_size = 6 * 4096
self.assertEqual(10, block_image_diff.ReviseStashSize())
self.assertEqual((10, 0), block_image_diff.ReviseStashSize())
def test_ReviseStashSize_bug_33687949(self):
"""ReviseStashSize() should "free" the used stash _after_ the command.
@ -169,7 +169,7 @@ class BlockImageDiffTest(ReleaseToolsTestCase):
# Insufficient cache to stash 15 blocks (size * 0.8 < 15).
common.OPTIONS.cache_size = 15 * 4096
self.assertEqual(15, block_image_diff.ReviseStashSize())
self.assertEqual((15, 5), block_image_diff.ReviseStashSize())
def test_FileTypeSupportedByImgdiff(self):
self.assertTrue(