blockimgdiff: add a function to select and convert 'diff' to new

Package size will be unintended large if we stash more blocks than the
stash limit specified by the cache size. To reduce the maximum size of
simultaneous stashed blocks, we will inevitably convert some 'diff'
commands to 'new' commands.

To mitigate the impact, we add a new function to smartly select the
transfers to convert based on their patch size and compressed size.
This cl converts the transfers that have a larger patch size than the
compressed target sizes. And there's a slightly improvement in the
final package size: from 194M -> 185M.

Bug: 120561199
Test: build a non-A/B incremental package, run simulator
Change-Id: Id73ff736ba4e6901d245ad5549d42310d0740284
This commit is contained in:
xunchang 2018-12-06 15:03:45 -08:00
parent 632d154393
commit 3df4d5eb6d
2 changed files with 99 additions and 12 deletions

View File

@ -471,6 +471,23 @@ class BlockImageDiff(object):
# Ensure the runtime stash size is under the limit. # Ensure the runtime stash size is under the limit.
if common.OPTIONS.cache_size is not None: if common.OPTIONS.cache_size is not None:
stash_limit = (common.OPTIONS.cache_size *
common.OPTIONS.stash_threshold / self.tgt.blocksize)
# Ignore the stash limit and calculate the maximum simultaneously stashed
# blocks needed.
_, max_stashed_blocks = self.ReviseStashSize(ignore_stash_limit=True)
# We cannot stash more blocks than the stash limit simultaneously. As a
# result, some 'diff' commands will be converted to new; leading to an
# unintended large package. To mitigate this issue, we can carefully
# choose the transfers for conversion. The number '1024' can be further
# tweaked here to balance the package size and build time.
if max_stashed_blocks > stash_limit + 1024:
self.SelectAndConvertDiffTransfersToNew()
# Regenerate the sequence as the graph has changed.
self.FindSequenceForTransfers()
# Revise the stash size again to keep the size under limit.
self.ReviseStashSize() self.ReviseStashSize()
# Double-check our work. # Double-check our work.
@ -700,7 +717,21 @@ class BlockImageDiff(object):
"max stashed blocks: %d (%d bytes), limit: <unknown>\n", "max stashed blocks: %d (%d bytes), limit: <unknown>\n",
max_stashed_blocks, self._max_stashed_size) max_stashed_blocks, self._max_stashed_size)
def ReviseStashSize(self): def ReviseStashSize(self, ignore_stash_limit=False):
""" Revises the transfers to keep the stash size within the size limit.
Iterates through the transfer list and calculates the stash size each
transfer generates. Converts the affected transfers to new if we reach the
stash limit.
Args:
ignore_stash_limit: Ignores the stash limit and calculates the max
simultaneous stashed blocks instead. No change will be made to the
transfer list with this flag.
Return:
A tuple of (tgt blocks converted to new, max stashed blocks)
"""
logger.info("Revising stash size...") logger.info("Revising stash size...")
stash_map = {} stash_map = {}
@ -715,16 +746,19 @@ class BlockImageDiff(object):
for stash_raw_id, _ in xf.use_stash: for stash_raw_id, _ in xf.use_stash:
stash_map[stash_raw_id] += (xf,) stash_map[stash_raw_id] += (xf,)
# Compute the maximum blocks available for stash based on /cache size and max_allowed_blocks = None
# the threshold. if not ignore_stash_limit:
cache_size = common.OPTIONS.cache_size # Compute the maximum blocks available for stash based on /cache size and
stash_threshold = common.OPTIONS.stash_threshold # the threshold.
max_allowed = cache_size * stash_threshold / self.tgt.blocksize cache_size = common.OPTIONS.cache_size
stash_threshold = common.OPTIONS.stash_threshold
max_allowed_blocks = cache_size * stash_threshold / self.tgt.blocksize
# See the comments for 'stashes' in WriteTransfers(). # See the comments for 'stashes' in WriteTransfers().
stashes = {} stashes = {}
stashed_blocks = 0 stashed_blocks = 0
new_blocks = 0 new_blocks = 0
max_stashed_blocks = 0
# Now go through all the commands. Compute the required stash size on the # Now go through all the commands. Compute the required stash size on the
# fly. If a command requires excess stash than available, it deletes the # fly. If a command requires excess stash than available, it deletes the
@ -741,7 +775,7 @@ class BlockImageDiff(object):
if sh not in stashes: if sh not in stashes:
stashed_blocks_after += sr.size() stashed_blocks_after += sr.size()
if stashed_blocks_after > max_allowed: if max_allowed_blocks and stashed_blocks_after > max_allowed_blocks:
# We cannot stash this one for a later command. Find out the command # We cannot stash this one for a later command. Find out the command
# that will use this stash and replace the command with "new". # that will use this stash and replace the command with "new".
use_cmd = stash_map[stash_raw_id][2] use_cmd = stash_map[stash_raw_id][2]
@ -754,15 +788,21 @@ class BlockImageDiff(object):
else: else:
stashes[sh] = 1 stashes[sh] = 1
stashed_blocks = stashed_blocks_after stashed_blocks = stashed_blocks_after
max_stashed_blocks = max(max_stashed_blocks, stashed_blocks)
# "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
# ComputePatches(), they both have the style of "diff". # ComputePatches(), they both have the style of "diff".
if xf.style == "diff": if xf.style == "diff":
assert xf.tgt_ranges and xf.src_ranges assert xf.tgt_ranges and xf.src_ranges
if xf.src_ranges.overlaps(xf.tgt_ranges): if xf.src_ranges.overlaps(xf.tgt_ranges):
if stashed_blocks + xf.src_ranges.size() > max_allowed: if (max_allowed_blocks and
stashed_blocks + xf.src_ranges.size() > max_allowed_blocks):
replaced_cmds.append(xf) replaced_cmds.append(xf)
logger.info("%10d %9s %s", xf.src_ranges.size(), "implicit", xf) logger.info("%10d %9s %s", xf.src_ranges.size(), "implicit", xf)
else:
# The whole source ranges will be stashed for implicit stashes.
max_stashed_blocks = max(max_stashed_blocks,
stashed_blocks + xf.src_ranges.size())
# Replace the commands in replaced_cmds with "new"s. # Replace the commands in replaced_cmds with "new"s.
for cmd in replaced_cmds: for cmd in replaced_cmds:
@ -791,7 +831,7 @@ class BlockImageDiff(object):
logger.info( logger.info(
" Total %d blocks (%d bytes) are packed as new blocks due to " " Total %d blocks (%d bytes) are packed as new blocks due to "
"insufficient cache size.", new_blocks, num_of_bytes) "insufficient cache size.", new_blocks, num_of_bytes)
return new_blocks return new_blocks, max_stashed_blocks
def ComputePatches(self, prefix): def ComputePatches(self, prefix):
logger.info("Reticulating splines...") logger.info("Reticulating splines...")
@ -1299,6 +1339,53 @@ class BlockImageDiff(object):
return patches return patches
def SelectAndConvertDiffTransfersToNew(self):
"""Converts the diff transfers to reduce the max simultaneous stash.
Since the 'new' data is compressed with deflate, we can select the 'diff'
transfers for conversion by comparing its patch size with the size of the
compressed data. Ideally, we want to convert the transfers with a small
size increase, but using a large number of stashed blocks.
"""
logger.info("Selecting diff commands to convert to new.")
diff_queue = []
for xf in self.transfers:
if xf.style == "diff" and xf.src_sha1 != xf.tgt_sha1:
use_imgdiff = self.CanUseImgdiff(xf.tgt_name, xf.tgt_ranges,
xf.src_ranges)
diff_queue.append((xf.order, use_imgdiff, len(diff_queue)))
# Remove the 'move' transfers, and compute the patch & compressed size
# for the remaining.
result = self.ComputePatchesForInputList(diff_queue, True)
removed_stashed_blocks = 0
for xf_index, patch_info, compressed_size in result:
xf = self.transfers[xf_index]
if not xf.patch_info:
xf.patch_info = patch_info
size_ratio = len(xf.patch_info.content) * 100.0 / compressed_size
diff_style = "imgdiff" if xf.patch_info.imgdiff else "bsdiff"
logger.info("%s, target size: %d, style: %s, patch size: %d,"
" compression_size: %d, ratio %.2f%%", xf.tgt_name,
xf.tgt_ranges.size(), diff_style,
len(xf.patch_info.content), compressed_size, size_ratio)
# Convert the transfer to new if the compressed size is smaller or equal.
# We don't need to maintain the stash_before lists here because the
# graph will be regenerated later.
if len(xf.patch_info.content) >= compressed_size:
removed_stashed_blocks += sum(sr.size() for _, sr in xf.use_stash)
logger.info("Converting %s to new", xf.tgt_name)
xf.ConvertToNew()
# TODO(xunchang) convert more transfers by sorting:
# (compressed size - patch_size) / used_stashed_blocks
logger.info("Removed %d stashed blocks", removed_stashed_blocks)
def FindTransfers(self): def FindTransfers(self):
"""Parse the file_map to generate all the transfers.""" """Parse the file_map to generate all the transfers."""

View File

@ -127,11 +127,11 @@ class BlockImageDiffTest(ReleaseToolsTestCase):
# Sufficient cache to stash 5 blocks (size * 0.8 >= 5). # Sufficient cache to stash 5 blocks (size * 0.8 >= 5).
common.OPTIONS.cache_size = 7 * 4096 common.OPTIONS.cache_size = 7 * 4096
self.assertEqual(0, block_image_diff.ReviseStashSize()) self.assertEqual((0, 5), block_image_diff.ReviseStashSize())
# Insufficient cache to stash 5 blocks (size * 0.8 < 5). # Insufficient cache to stash 5 blocks (size * 0.8 < 5).
common.OPTIONS.cache_size = 6 * 4096 common.OPTIONS.cache_size = 6 * 4096
self.assertEqual(10, block_image_diff.ReviseStashSize()) self.assertEqual((10, 0), block_image_diff.ReviseStashSize())
def test_ReviseStashSize_bug_33687949(self): def test_ReviseStashSize_bug_33687949(self):
"""ReviseStashSize() should "free" the used stash _after_ the command. """ReviseStashSize() should "free" the used stash _after_ the command.
@ -169,7 +169,7 @@ class BlockImageDiffTest(ReleaseToolsTestCase):
# Insufficient cache to stash 15 blocks (size * 0.8 < 15). # Insufficient cache to stash 15 blocks (size * 0.8 < 15).
common.OPTIONS.cache_size = 15 * 4096 common.OPTIONS.cache_size = 15 * 4096
self.assertEqual(15, block_image_diff.ReviseStashSize()) self.assertEqual((15, 5), block_image_diff.ReviseStashSize())
def test_FileTypeSupportedByImgdiff(self): def test_FileTypeSupportedByImgdiff(self):
self.assertTrue( self.assertTrue(