From fc44a515d46e6f4d5eaa0d32659b1cf3b9492305 Mon Sep 17 00:00:00 2001 From: Doug Zongker Date: Tue, 26 Aug 2014 13:10:25 -0700 Subject: [PATCH] new block OTA system tools Replace the xdelta/xz-based block OTA generation with a new system based on the existing bsdiff/imgdiff tools. Bug: 16984795 Change-Id: Ia9732516ffdfc12be86260b2cc4b1dd2d210e886 --- tools/releasetools/add_img_to_target_files.py | 184 +++--- tools/releasetools/blockimgdiff.py | 543 ++++++++++++++++++ tools/releasetools/build_image.py | 9 - tools/releasetools/common.py | 9 + tools/releasetools/edify_generator.py | 7 - tools/releasetools/ota_from_target_files | 267 ++++----- tools/releasetools/rangelib.py | 161 ++++++ tools/releasetools/simg_map.py | 148 ----- tools/releasetools/sparse_img.py | 184 ++++++ 9 files changed, 1074 insertions(+), 438 deletions(-) create mode 100644 tools/releasetools/blockimgdiff.py create mode 100644 tools/releasetools/rangelib.py delete mode 100644 tools/releasetools/simg_map.py create mode 100644 tools/releasetools/sparse_img.py diff --git a/tools/releasetools/add_img_to_target_files.py b/tools/releasetools/add_img_to_target_files.py index 568a3f132..e7ba28d38 100755 --- a/tools/releasetools/add_img_to_target_files.py +++ b/tools/releasetools/add_img_to_target_files.py @@ -46,44 +46,46 @@ import common OPTIONS = common.OPTIONS -def AddSystem(output_zip, sparse=True, prefix="IMAGES/"): +def AddSystem(output_zip, prefix="IMAGES/"): """Turn the contents of SYSTEM into a system image and store it in output_zip.""" - block_list = tempfile.NamedTemporaryFile() - data = BuildSystem(OPTIONS.input_tmp, OPTIONS.info_dict, sparse=sparse, + block_list = common.MakeTempFile(prefix="system-blocklist-", suffix=".map") + imgname = BuildSystem(OPTIONS.input_tmp, OPTIONS.info_dict, + block_list=block_list) + with open(imgname, "rb") as f: + common.ZipWriteStr(output_zip, prefix + "system.img", f.read()) + with open(block_list, "rb") as f: + common.ZipWriteStr(output_zip, prefix + "system.map", f.read()) + + +def BuildSystem(input_dir, info_dict, block_list=None): + """Build the (sparse) system image and return the name of a temp + file containing it.""" + return CreateImage(input_dir, info_dict, "system", block_list=block_list) + + +def AddVendor(output_zip, prefix="IMAGES/"): + """Turn the contents of VENDOR into a vendor image and store in it + output_zip.""" + block_list = common.MakeTempFile(prefix="vendor-blocklist-", suffix=".map") + imgname = BuildVendor(OPTIONS.input_tmp, OPTIONS.info_dict, block_list=block_list.name) - common.ZipWriteStr(output_zip, prefix + "system.img", data) - with open(block_list.name, "rb") as f: - block_list_data = f.read() - common.ZipWriteStr(output_zip, prefix + "system.map", block_list_data) - block_list.close() - -def BuildSystem(input_dir, info_dict, sparse=True, map_file=None, - block_list=None): - return CreateImage(input_dir, info_dict, "system", - sparse=sparse, map_file=map_file, block_list=block_list) - -def AddVendor(output_zip, sparse=True, prefix="IMAGES/"): - block_list = tempfile.NamedTemporaryFile() - data = BuildVendor(OPTIONS.input_tmp, OPTIONS.info_dict, sparse=sparse, - block_list=block_list.name) - common.ZipWriteStr(output_zip, prefix + "vendor.img", data) - with open(block_list.name, "rb") as f: - block_list_data = f.read() - common.ZipWriteStr(output_zip, prefix + "vendor.map", block_list_data) - block_list.close() - -def BuildVendor(input_dir, info_dict, sparse=True, map_file=None, - block_list=None): - return CreateImage(input_dir, info_dict, "vendor", - sparse=sparse, map_file=map_file, block_list=block_list) + with open(imgname, "rb") as f: + common.ZipWriteStr(output_zip, prefix + "vendor.img", f.read()) + with open(block_list, "rb") as f: + common.ZipWriteStr(output_zip, prefix + "vendor.map", f.read()) -def CreateImage(input_dir, info_dict, what, sparse=True, map_file=None, - block_list=None): +def BuildVendor(input_dir, info_dict, block_list=None): + """Build the (sparse) vendor image and return the name of a temp + file containing it.""" + return CreateImage(input_dir, info_dict, "vendor", block_list=block_list) + + +def CreateImage(input_dir, info_dict, what, block_list=None): print "creating " + what + ".img..." - img = tempfile.NamedTemporaryFile() + img = common.MakeTempFile(prefix=what + "-", suffix=".img") # The name of the directory it is making an image out of matters to # mkyaffs2image. It wants "system" but we have a directory named @@ -117,45 +119,13 @@ def CreateImage(input_dir, info_dict, what, sparse=True, map_file=None, if not os.path.exists(fc_config): fc_config = None succ = build_image.BuildImage(os.path.join(input_dir, what), - image_props, img.name, + image_props, img, fs_config=fs_config, fc_config=fc_config, block_list=block_list) assert succ, "build " + what + ".img image failed" - mapdata = None - - if sparse: - data = open(img.name).read() - img.close() - else: - success, name = build_image.UnsparseImage(img.name, replace=False) - if not success: - assert False, "unsparsing " + what + ".img failed" - - if map_file: - mmap = tempfile.NamedTemporaryFile() - mimg = tempfile.NamedTemporaryFile(delete=False) - success = build_image.MappedUnsparseImage( - img.name, name, mmap.name, mimg.name) - if not success: - assert False, "creating sparse map failed" - os.unlink(name) - name = mimg.name - - with open(mmap.name) as f: - mapdata = f.read() - - try: - with open(name) as f: - data = f.read() - finally: - os.unlink(name) - - if mapdata is None: - return data - else: - return mapdata, data + return img def AddUserdata(output_zip, prefix="IMAGES/"): @@ -226,57 +196,53 @@ def AddCache(output_zip, prefix="IMAGES/"): def AddImagesToTargetFiles(filename): OPTIONS.input_tmp, input_zip = common.UnzipTemp(filename) + + for n in input_zip.namelist(): + if n.startswith("IMAGES/"): + print "target_files appears to already contain images." + sys.exit(1) + try: + input_zip.getinfo("VENDOR/") + has_vendor = True + except KeyError: + has_vendor = False - for n in input_zip.namelist(): - if n.startswith("IMAGES/"): - print "target_files appears to already contain images." - sys.exit(1) + OPTIONS.info_dict = common.LoadInfoDict(input_zip) + if "selinux_fc" in OPTIONS.info_dict: + OPTIONS.info_dict["selinux_fc"] = os.path.join( + OPTIONS.input_tmp, "BOOT", "RAMDISK", "file_contexts") - try: - input_zip.getinfo("VENDOR/") - has_vendor = True - except KeyError: - has_vendor = False + input_zip.close() + output_zip = zipfile.ZipFile(filename, "a", + compression=zipfile.ZIP_DEFLATED) - OPTIONS.info_dict = common.LoadInfoDict(input_zip) - if "selinux_fc" in OPTIONS.info_dict: - OPTIONS.info_dict["selinux_fc"] = os.path.join( - OPTIONS.input_tmp, "BOOT", "RAMDISK", "file_contexts") + def banner(s): + print "\n\n++++ " + s + " ++++\n\n" - input_zip.close() - output_zip = zipfile.ZipFile(filename, "a", - compression=zipfile.ZIP_DEFLATED) + banner("boot") + boot_image = common.GetBootableImage( + "IMAGES/boot.img", "boot.img", OPTIONS.input_tmp, "BOOT") + if boot_image: + boot_image.AddToZip(output_zip) - def banner(s): - print "\n\n++++ " + s + " ++++\n\n" + banner("recovery") + recovery_image = common.GetBootableImage( + "IMAGES/recovery.img", "recovery.img", OPTIONS.input_tmp, "RECOVERY") + if recovery_image: + recovery_image.AddToZip(output_zip) - banner("boot") - boot_image = common.GetBootableImage( - "IMAGES/boot.img", "boot.img", OPTIONS.input_tmp, "BOOT") - if boot_image: - boot_image.AddToZip(output_zip) + banner("system") + AddSystem(output_zip) + if has_vendor: + banner("vendor") + AddVendor(output_zip) + banner("userdata") + AddUserdata(output_zip) + banner("cache") + AddCache(output_zip) - banner("recovery") - recovery_image = common.GetBootableImage( - "IMAGES/recovery.img", "recovery.img", OPTIONS.input_tmp, "RECOVERY") - if recovery_image: - recovery_image.AddToZip(output_zip) - - banner("system") - AddSystem(output_zip) - if has_vendor: - banner("vendor") - AddVendor(output_zip) - banner("userdata") - AddUserdata(output_zip) - banner("cache") - AddCache(output_zip) - - output_zip.close() - - finally: - shutil.rmtree(OPTIONS.input_tmp) + output_zip.close() def main(argv): @@ -298,3 +264,5 @@ if __name__ == '__main__': print " ERROR: %s" % (e,) print sys.exit(1) + finally: + common.Cleanup() diff --git a/tools/releasetools/blockimgdiff.py b/tools/releasetools/blockimgdiff.py new file mode 100644 index 000000000..7b01f711d --- /dev/null +++ b/tools/releasetools/blockimgdiff.py @@ -0,0 +1,543 @@ +from __future__ import print_function + +from collections import deque, OrderedDict +from hashlib import sha1 +import itertools +import multiprocessing +import os +import pprint +import re +import subprocess +import sys +import threading +import tempfile + +from rangelib import * + +def compute_patch(src, tgt, imgdiff=False): + srcfd, srcfile = tempfile.mkstemp(prefix="src-") + tgtfd, tgtfile = tempfile.mkstemp(prefix="tgt-") + patchfd, patchfile = tempfile.mkstemp(prefix="patch-") + os.close(patchfd) + + try: + with os.fdopen(srcfd, "wb") as f_src: + for p in src: + f_src.write(p) + + with os.fdopen(tgtfd, "wb") as f_tgt: + for p in tgt: + f_tgt.write(p) + try: + os.unlink(patchfile) + except OSError: + pass + if imgdiff: + p = subprocess.call(["imgdiff", "-z", srcfile, tgtfile, patchfile], + stdout=open("/dev/null", "a"), + stderr=subprocess.STDOUT) + else: + p = subprocess.call(["bsdiff", srcfile, tgtfile, patchfile]) + + if p: + raise ValueError("diff failed: " + str(p)) + + with open(patchfile, "rb") as f: + return f.read() + finally: + try: + os.unlink(srcfile) + os.unlink(tgtfile) + os.unlink(patchfile) + except OSError: + pass + +class EmptyImage(object): + """A zero-length image.""" + blocksize = 4096 + care_map = RangeSet() + total_blocks = 0 + file_map = {} + def ReadRangeSet(self, ranges): + return () + +class Transfer(object): + def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, style, by_id): + self.tgt_name = tgt_name + self.src_name = src_name + self.tgt_ranges = tgt_ranges + self.src_ranges = src_ranges + self.style = style + self.intact = (getattr(tgt_ranges, "monotonic", False) and + getattr(src_ranges, "monotonic", False)) + self.goes_before = {} + self.goes_after = {} + + self.id = len(by_id) + by_id.append(self) + + def __str__(self): + return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style + + " to " + str(self.tgt_ranges) + ">") + + +# BlockImageDiff works on two image objects. An image object is +# anything that provides the following attributes: +# +# blocksize: the size in bytes of a block, currently must be 4096. +# +# total_blocks: the total size of the partition/image, in blocks. +# +# care_map: a RangeSet containing which blocks (in the range [0, +# total_blocks) we actually care about; i.e. which blocks contain +# data. +# +# file_map: a dict that partitions the blocks contained in care_map +# into smaller domains that are useful for doing diffs on. +# (Typically a domain is a file, and the key in file_map is the +# pathname.) +# +# ReadRangeSet(): a function that takes a RangeSet and returns the +# data contained in the image blocks of that RangeSet. The data +# is returned as a list or tuple of strings; concatenating the +# elements together should produce the requested data. +# Implementations are free to break up the data into list/tuple +# elements in any way that is convenient. +# +# When creating a BlockImageDiff, the src image may be None, in which +# case the list of transfers produced will never read from the +# original image. + +class BlockImageDiff(object): + def __init__(self, tgt, src=None, threads=None): + if threads is None: + threads = multiprocessing.cpu_count() // 2 + if threads == 0: threads = 1 + self.threads = threads + + self.tgt = tgt + if src is None: + src = EmptyImage() + self.src = src + + # The updater code that installs the patch always uses 4k blocks. + assert tgt.blocksize == 4096 + assert src.blocksize == 4096 + + # The range sets in each filemap should comprise a partition of + # the care map. + self.AssertPartition(src.care_map, src.file_map.values()) + self.AssertPartition(tgt.care_map, tgt.file_map.values()) + + def Compute(self, prefix): + # When looking for a source file to use as the diff input for a + # target file, we try: + # 1) an exact path match if available, otherwise + # 2) a exact basename match if available, otherwise + # 3) a basename match after all runs of digits are replaced by + # "#" if available, otherwise + # 4) we have no source for this target. + self.AbbreviateSourceNames() + self.FindTransfers() + + # Find the ordering dependencies among transfers (this is O(n^2) + # in the number of transfers). + self.GenerateDigraph() + # Find a sequence of transfers that satisfies as many ordering + # dependencies as possible (heuristically). + self.FindVertexSequence() + # Fix up the ordering dependencies that the sequence didn't + # satisfy. + self.RemoveBackwardEdges() + # Double-check our work. + self.AssertSequenceGood() + + self.ComputePatches(prefix) + self.WriteTransfers(prefix) + + def WriteTransfers(self, prefix): + out = [] + + out.append("1\n") # format version number + total = 0 + performs_read = False + + for xf in self.transfers: + + # zero [rangeset] + # new [rangeset] + # bsdiff patchstart patchlen [src rangeset] [tgt rangeset] + # imgdiff patchstart patchlen [src rangeset] [tgt rangeset] + # move [src rangeset] [tgt rangeset] + # erase [rangeset] + + tgt_size = xf.tgt_ranges.size() + + if xf.style == "new": + assert xf.tgt_ranges + out.append("%s %s\n" % (xf.style, xf.tgt_ranges.to_string_raw())) + total += tgt_size + elif xf.style == "move": + performs_read = True + assert xf.tgt_ranges + assert xf.src_ranges.size() == tgt_size + if xf.src_ranges != xf.tgt_ranges: + out.append("%s %s %s\n" % ( + xf.style, + xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw())) + total += tgt_size + elif xf.style in ("bsdiff", "imgdiff"): + performs_read = True + assert xf.tgt_ranges + assert xf.src_ranges + out.append("%s %d %d %s %s\n" % ( + xf.style, xf.patch_start, xf.patch_len, + xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw())) + total += tgt_size + elif xf.style == "zero": + assert xf.tgt_ranges + to_zero = xf.tgt_ranges.subtract(xf.src_ranges) + if to_zero: + out.append("%s %s\n" % (xf.style, to_zero.to_string_raw())) + total += to_zero.size() + else: + raise ValueError, "unknown transfer style '%s'\n" % (xf.style,) + + out.insert(1, str(total) + "\n") + + all_tgt = RangeSet(data=(0, self.tgt.total_blocks)) + if performs_read: + # if some of the original data is used, then at the end we'll + # erase all the blocks on the partition that don't contain data + # in the new image. + new_dontcare = all_tgt.subtract(self.tgt.care_map) + if new_dontcare: + out.append("erase %s\n" % (new_dontcare.to_string_raw(),)) + else: + # if nothing is read (ie, this is a full OTA), then we can start + # by erasing the entire partition. + out.insert(2, "erase %s\n" % (all_tgt.to_string_raw(),)) + + with open(prefix + ".transfer.list", "wb") as f: + for i in out: + f.write(i) + + def ComputePatches(self, prefix): + print("Reticulating splines...") + diff_q = [] + patch_num = 0 + with open(prefix + ".new.dat", "wb") as new_f: + for xf in self.transfers: + if xf.style == "zero": + pass + elif xf.style == "new": + for piece in self.tgt.ReadRangeSet(xf.tgt_ranges): + new_f.write(piece) + elif xf.style == "diff": + src = self.src.ReadRangeSet(xf.src_ranges) + tgt = self.tgt.ReadRangeSet(xf.tgt_ranges) + + # We can't compare src and tgt directly because they may have + # the same content but be broken up into blocks differently, eg: + # + # ["he", "llo"] vs ["h", "ello"] + # + # We want those to compare equal, ideally without having to + # actually concatenate the strings (these may be tens of + # megabytes). + + src_sha1 = sha1() + for p in src: + src_sha1.update(p) + tgt_sha1 = sha1() + tgt_size = 0 + for p in tgt: + tgt_sha1.update(p) + tgt_size += len(p) + + if src_sha1.digest() == tgt_sha1.digest(): + # These are identical; we don't need to generate a patch, + # just issue copy commands on the device. + xf.style = "move" + else: + # For files in zip format (eg, APKs, JARs, etc.) we would + # like to use imgdiff -z if possible (because it usually + # produces significantly smaller patches than bsdiff). + # This is permissible if: + # + # - the source and target files are monotonic (ie, the + # data is stored with blocks in increasing order), and + # - we haven't removed any blocks from the source set. + # + # If these conditions are satisfied then appending all the + # blocks in the set together in order will produce a valid + # zip file (plus possibly extra zeros in the last block), + # which is what imgdiff needs to operate. (imgdiff is + # fine with extra zeros at the end of the file.) + imgdiff = (xf.intact and + xf.tgt_name.split(".")[-1].lower() + in ("apk", "jar", "zip")) + xf.style = "imgdiff" if imgdiff else "bsdiff" + diff_q.append((tgt_size, src, tgt, xf, patch_num)) + patch_num += 1 + + else: + assert False, "unknown style " + xf.style + + if diff_q: + if self.threads > 1: + print("Computing patches (using %d threads)..." % (self.threads,)) + else: + print("Computing patches...") + diff_q.sort() + + patches = [None] * patch_num + + lock = threading.Lock() + def diff_worker(): + while True: + with lock: + if not diff_q: return + tgt_size, src, tgt, xf, patchnum = diff_q.pop() + patch = compute_patch(src, tgt, imgdiff=(xf.style == "imgdiff")) + size = len(patch) + with lock: + patches[patchnum] = (patch, xf) + print("%10d %10d (%6.2f%%) %7s %s" % ( + size, tgt_size, size * 100.0 / tgt_size, xf.style, + xf.tgt_name if xf.tgt_name == xf.src_name else ( + xf.tgt_name + " (from " + xf.src_name + ")"))) + + threads = [threading.Thread(target=diff_worker) + for i in range(self.threads)] + for th in threads: + th.start() + while threads: + threads.pop().join() + else: + patches = [] + + p = 0 + with open(prefix + ".patch.dat", "wb") as patch_f: + for patch, xf in patches: + xf.patch_start = p + xf.patch_len = len(patch) + patch_f.write(patch) + p += len(patch) + + def AssertSequenceGood(self): + # Simulate the sequences of transfers we will output, and check that: + # - we never read a block after writing it, and + # - we write every block we care about exactly once. + + # Start with no blocks having been touched yet. + touched = RangeSet() + + # Imagine processing the transfers in order. + for xf in self.transfers: + # Check that the input blocks for this transfer haven't yet been touched. + assert not touched.overlaps(xf.src_ranges) + # Check that the output blocks for this transfer haven't yet been touched. + assert not touched.overlaps(xf.tgt_ranges) + # Touch all the blocks written by this transfer. + touched = touched.union(xf.tgt_ranges) + + # Check that we've written every target block. + assert touched == self.tgt.care_map + + def RemoveBackwardEdges(self): + print("Removing backward edges...") + in_order = 0 + out_of_order = 0 + lost_source = 0 + + for xf in self.transfers: + io = 0 + ooo = 0 + lost = 0 + size = xf.src_ranges.size() + for u in xf.goes_before: + # xf should go before u + if xf.order < u.order: + # it does, hurray! + io += 1 + else: + # it doesn't, boo. trim the blocks that u writes from xf's + # source, so that xf can go after u. + ooo += 1 + assert xf.src_ranges.overlaps(u.tgt_ranges) + xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges) + xf.intact = False + + if xf.style == "diff" and not xf.src_ranges: + # nothing left to diff from; treat as new data + xf.style = "new" + + lost = size - xf.src_ranges.size() + lost_source += lost + in_order += io + out_of_order += ooo + + print((" %d/%d dependencies (%.2f%%) were violated; " + "%d source blocks removed.") % + (out_of_order, in_order + out_of_order, + (out_of_order * 100.0 / (in_order + out_of_order)) + if (in_order + out_of_order) else 0.0, + lost_source)) + + def FindVertexSequence(self): + print("Finding vertex sequence...") + + # This is based on "A Fast & Effective Heuristic for the Feedback + # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of + # it as starting with the digraph G and moving all the vertices to + # be on a horizontal line in some order, trying to minimize the + # number of edges that end up pointing to the left. Left-pointing + # edges will get removed to turn the digraph into a DAG. In this + # case each edge has a weight which is the number of source blocks + # we'll lose if that edge is removed; we try to minimize the total + # weight rather than just the number of edges. + + # Make a copy of the edge set; this copy will get destroyed by the + # algorithm. + for xf in self.transfers: + xf.incoming = xf.goes_after.copy() + xf.outgoing = xf.goes_before.copy() + + # We use an OrderedDict instead of just a set so that the output + # is repeatable; otherwise it would depend on the hash values of + # the transfer objects. + G = OrderedDict() + for xf in self.transfers: + G[xf] = None + s1 = deque() # the left side of the sequence, built from left to right + s2 = deque() # the right side of the sequence, built from right to left + + while G: + + # Put all sinks at the end of the sequence. + while True: + sinks = [u for u in G if not u.outgoing] + if not sinks: break + for u in sinks: + s2.appendleft(u) + del G[u] + for iu in u.incoming: + del iu.outgoing[u] + + # Put all the sources at the beginning of the sequence. + while True: + sources = [u for u in G if not u.incoming] + if not sources: break + for u in sources: + s1.append(u) + del G[u] + for iu in u.outgoing: + del iu.incoming[u] + + if not G: break + + # Find the "best" vertex to put next. "Best" is the one that + # maximizes the net difference in source blocks saved we get by + # pretending it's a source rather than a sink. + + max_d = None + best_u = None + for u in G: + d = sum(u.outgoing.values()) - sum(u.incoming.values()) + if best_u is None or d > max_d: + max_d = d + best_u = u + + u = best_u + s1.append(u) + del G[u] + for iu in u.outgoing: + del iu.incoming[u] + for iu in u.incoming: + del iu.outgoing[u] + + # Now record the sequence in the 'order' field of each transfer, + # and by rearranging self.transfers to be in the chosen sequence. + + new_transfers = [] + for x in itertools.chain(s1, s2): + x.order = len(new_transfers) + new_transfers.append(x) + del x.incoming + del x.outgoing + + self.transfers = new_transfers + + def GenerateDigraph(self): + print("Generating digraph...") + for a in self.transfers: + for b in self.transfers: + if a is b: continue + + # If the blocks written by A are read by B, then B needs to go before A. + i = a.tgt_ranges.intersect(b.src_ranges) + if i: + size = i.size() + b.goes_before[a] = size + a.goes_after[b] = size + + def FindTransfers(self): + self.transfers = [] + empty = RangeSet() + for tgt_fn, tgt_ranges in self.tgt.file_map.items(): + if tgt_fn == "__ZERO": + # the special "__ZERO" domain is all the blocks not contained + # in any file and that are filled with zeros. We have a + # special transfer style for zero blocks. + src_ranges = self.src.file_map.get("__ZERO", empty) + Transfer(tgt_fn, None, tgt_ranges, src_ranges, "zero", self.transfers) + continue + + elif tgt_fn in self.src.file_map: + # Look for an exact pathname match in the source. + Transfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn], + "diff", self.transfers) + continue + + b = os.path.basename(tgt_fn) + if b in self.src_basenames: + # Look for an exact basename match in the source. + src_fn = self.src_basenames[b] + Transfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn], + "diff", self.transfers) + continue + + b = re.sub("[0-9]+", "#", b) + if b in self.src_numpatterns: + # Look for a 'number pattern' match (a basename match after + # all runs of digits are replaced by "#"). (This is useful + # for .so files that contain version numbers in the filename + # that get bumped.) + src_fn = self.src_numpatterns[b] + Transfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn], + "diff", self.transfers) + continue + + Transfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers) + + def AbbreviateSourceNames(self): + self.src_basenames = {} + self.src_numpatterns = {} + + for k in self.src.file_map.keys(): + b = os.path.basename(k) + self.src_basenames[b] = k + b = re.sub("[0-9]+", "#", b) + self.src_numpatterns[b] = k + + @staticmethod + def AssertPartition(total, seq): + """Assert that all the RangeSets in 'seq' form a partition of the + 'total' RangeSet (ie, they are nonintersecting and their union + equals 'total').""" + so_far = RangeSet() + for i in seq: + assert not so_far.overlaps(i) + so_far = so_far.union(i) + assert so_far == total diff --git a/tools/releasetools/build_image.py b/tools/releasetools/build_image.py index 5ae8d3c26..a010e8415 100755 --- a/tools/releasetools/build_image.py +++ b/tools/releasetools/build_image.py @@ -28,8 +28,6 @@ import commands import shutil import tempfile -import simg_map - FIXED_SALT = "aee087a5be3b982978c923f566a94613496b417f2af592639bc80d141e34dfe7" def RunCommand(cmd): @@ -150,13 +148,6 @@ def UnsparseImage(sparse_image_path, replace=True): return False, None return True, unsparse_image_path -def MappedUnsparseImage(sparse_image_path, unsparse_image_path, - map_path, mapped_unsparse_image_path): - if simg_map.ComputeMap(sparse_image_path, unsparse_image_path, - map_path, mapped_unsparse_image_path): - return False - return True - def MakeVerityEnabledImage(out_file, prop_dict): """Creates an image that is verifiable using dm-verity. diff --git a/tools/releasetools/common.py b/tools/releasetools/common.py index 09798bc7e..3f8cda7ec 100644 --- a/tools/releasetools/common.py +++ b/tools/releasetools/common.py @@ -652,6 +652,15 @@ def ParseOptions(argv, return args +def MakeTempFile(prefix=None, suffix=None): + """Make a temp file and add it to the list of things to be deleted + when Cleanup() is called. Return the filename.""" + fd, fn = tempfile.mkstemp(prefix=prefix, suffix=suffix) + os.close(fd) + OPTIONS.tempfiles.append(fn) + return fn + + def Cleanup(): for i in OPTIONS.tempfiles: if os.path.isdir(i): diff --git a/tools/releasetools/edify_generator.py b/tools/releasetools/edify_generator.py index 86208121e..2bd071d4b 100644 --- a/tools/releasetools/edify_generator.py +++ b/tools/releasetools/edify_generator.py @@ -334,10 +334,3 @@ class EdifyGenerator(object): data = open(input_path, "rb").read() common.ZipWriteStr(output_zip, "META-INF/com/google/android/update-binary", data, perms=0755) - - def Syspatch(self, filename, target_mapfile, target_sha, - source_mapfile, source_sha, patchfile): - """Applies a compressed binary patch to a block device.""" - call = 'syspatch("%s", "%s", "%s", "%s", "%s", "%s");' - self.script.append(call % (filename, target_mapfile, target_sha, - source_mapfile, source_sha, patchfile)) diff --git a/tools/releasetools/ota_from_target_files b/tools/releasetools/ota_from_target_files index 9f701679b..bcc3210f3 100755 --- a/tools/releasetools/ota_from_target_files +++ b/tools/releasetools/ota_from_target_files @@ -85,6 +85,7 @@ if sys.hexversion < 0x02070000: import copy import errno +import multiprocessing import os import re import subprocess @@ -92,14 +93,13 @@ import tempfile import time import zipfile -try: - from hashlib import sha1 as sha1 -except ImportError: - from sha import sha as sha1 +from hashlib import sha1 as sha1 import common import edify_generator import build_image +import blockimgdiff +import sparse_img OPTIONS = common.OPTIONS OPTIONS.package_key = None @@ -111,7 +111,9 @@ OPTIONS.wipe_user_data = False OPTIONS.omit_prereq = False OPTIONS.extra_script = None OPTIONS.aslr_mode = True -OPTIONS.worker_threads = 3 +OPTIONS.worker_threads = multiprocessing.cpu_count() // 2 +if OPTIONS.worker_threads == 0: + OPTIONS.worker_threads = 1 OPTIONS.two_step = False OPTIONS.no_signing = False OPTIONS.block_based = False @@ -418,44 +420,20 @@ def CalculateFingerprint(oem_props, oem_dict, info_dict): GetOemProperty("ro.product.device", oem_props, oem_dict, info_dict), GetBuildProp("ro.build.thumbprint", info_dict)) + def GetImage(which, tmpdir, info_dict): - # Return (mapdata, data) for the given image. which should be - # "system" or "vendor". + # Return an image object (suitable for passing to BlockImageDiff) + # for the 'which' partition (most be "system" or "vendor"). If a + # prebuilt image and file map are found in tmpdir they are used, + # otherwise they are reconstructed from the individual files. assert which in ("system", "vendor") path = os.path.join(tmpdir, "IMAGES", which + ".img") - if os.path.exists(path): + mappath = os.path.join(tmpdir, "IMAGES", which + ".map") + if os.path.exists(path) and os.path.exists(mappath): print "using %s.img from target-files" % (which,) - # This is a 'new' target-files, which already has the image in it. - # The image is a sparse image, though, so we need to unsparse it - # and extract the map data. - - success, name = build_image.UnsparseImage(path, replace=False) - if not success: - assert False, "unsparsing " + which + ".img failed" - - mmap = tempfile.NamedTemporaryFile() - mimg = tempfile.NamedTemporaryFile(delete=False) - success = build_image.MappedUnsparseImage( - path, name, mmap.name, mimg.name) - if not success: - assert False, "creating sparse map failed" - os.unlink(name) - name = mimg.name - - with open(mmap.name) as f: - mapdata = f.read() - - try: - with open(name) as f: - data = f.read() - finally: - os.unlink(name) - - print "unsparsed data sha1 is " + sha1(data).hexdigest() - return mapdata, data else: print "building %s.img from target-files" % (which,) @@ -463,16 +441,47 @@ def GetImage(which, tmpdir, info_dict): # This is an 'old' target-files, which does not contain images # already built. Build them. + mappath = tempfile.mkstemp()[1] + OPTIONS.tempfiles.append(mappath) + import add_img_to_target_files if which == "system": - mapdata, data = add_img_to_target_files.BuildSystem( - tmpdir, info_dict, sparse=False, map_file=True) + path = add_img_to_target_files.BuildSystem( + tmpdir, info_dict, block_list=mappath) elif which == "vendor": - mapdata, data = add_img_to_target_files.BuildVendor( - tmpdir, info_dict, sparse=False, map_file=True) + path = add_img_to_target_files.BuildVendor( + tmpdir, info_dict, block_list=mappath) - print "built data sha1 is " + sha1(data).hexdigest() - return mapdata, data + return sparse_img.SparseImage(path, mappath) + + +class BlockDifference: + def __init__(self, partition, tgt, src=None): + self.partition = partition + + b = blockimgdiff.BlockImageDiff(tgt, src, threads=OPTIONS.worker_threads) + tmpdir = tempfile.mkdtemp() + OPTIONS.tempfiles.append(tmpdir) + self.path = os.path.join(tmpdir, partition) + b.Compute(self.path) + + _, self.device = common.GetTypeAndDevice("/" + partition, OPTIONS.info_dict) + + def WriteScript(self, script, output_zip): + partition = self.partition + with open(self.path + ".transfer.list", "rb") as f: + common.ZipWriteStr(output_zip, partition + ".transfer.list", f.read()) + with open(self.path + ".new.dat", "rb") as f: + common.ZipWriteStr(output_zip, partition + ".new.dat", f.read()) + with open(self.path + ".patch.dat", "rb") as f: + common.ZipWriteStr(output_zip, partition + ".patch.dat", f.read(), + compression=zipfile.ZIP_STORED) + + call = (('block_image_update("%s", ' + 'package_extract_file("%s.transfer.list"), ' + '"%s.new.dat", "%s.patch.dat");\n') % + (self.device, partition, partition, partition)) + script.AppendExtra(script._WordWrap(call)) def WriteFullOTAPackage(input_zip, output_zip): @@ -571,12 +580,14 @@ else if get_stage("%(bcb_dev)s", "stage") == "3/3" then system_items = ItemSet("system", "META/filesystem_config.txt") script.ShowProgress(system_progress, 0) if block_based: - mapdata, data = GetImage("system", OPTIONS.input_tmp, OPTIONS.info_dict) - - common.ZipWriteStr(output_zip, "system.map", mapdata) - common.ZipWriteStr(output_zip, "system.muimg", data) - script.WipeBlockDevice("/system") - script.WriteRawImage("/system", "system.muimg", mapfn="system.map") + # Full OTA is done as an "incremental" against an empty source + # image. This has the effect of writing new data from the package + # to the entire partition, but lets us reuse the updater code that + # writes incrementals to do it. + system_tgt = GetImage("system", OPTIONS.input_tmp, OPTIONS.info_dict) + system_tgt.ResetFileMap() + system_diff = BlockDifference("system", system_tgt, src=None) + system_diff.WriteScript(script, output_zip) else: script.FormatPartition("/system") script.Mount("/system") @@ -606,12 +617,10 @@ else if get_stage("%(bcb_dev)s", "stage") == "3/3" then script.ShowProgress(0.1, 0) if block_based: - mapdata, data = GetImage("vendor", OPTIONS.input_tmp, OPTIONS.info_dict) - - common.ZipWriteStr(output_zip, "vendor.map", mapdata) - common.ZipWriteStr(output_zip, "vendor.muimg", data) - script.WipeBlockDevice("/vendor") - script.WriteRawImage("/vendor", "vendor.muimg", mapfn="vendor.map") + vendor_tgt = GetImage("vendor", OPTIONS.input_tmp, OPTIONS.info_dict) + vendor_tgt.ResetFileMap() + vendor_diff = BlockDifference("vendor", vendor_tgt) + vendor_diff.WriteScript(script, output_zip) else: script.FormatPartition("/vendor") script.Mount("/vendor") @@ -656,6 +665,7 @@ endif; script.AddToZip(input_zip, output_zip, input_path=OPTIONS.updater_binary) WriteMetadata(metadata, output_zip) + def WritePolicyConfig(file_context, output_zip): f = open(file_context, 'r'); basename = os.path.basename(file_context) @@ -667,6 +677,7 @@ def WriteMetadata(metadata, output_zip): "".join(["%s=%s\n" % kv for kv in sorted(metadata.iteritems())])) + def LoadPartitionFiles(z, partition): """Load all the files from the given partition in a given target-files ZipFile, and return a dict of {filename: File object}.""" @@ -688,6 +699,7 @@ def GetBuildProp(prop, info_dict): except KeyError: raise common.ExternalError("couldn't find %s in build.prop" % (prop,)) + def AddToKnownPaths(filename, known_paths): if filename[-1] == "/": return @@ -699,44 +711,6 @@ def AddToKnownPaths(filename, known_paths): known_paths.add(path) dirs.pop() -class BlockDifference: - def __init__(self, partition, output_zip): - with tempfile.NamedTemporaryFile() as src_file: - with tempfile.NamedTemporaryFile() as tgt_file: - print "building source " + partition + " image..." - src_file = tempfile.NamedTemporaryFile() - src_mapdata, src_data = GetImage(partition, - OPTIONS.source_tmp, - OPTIONS.source_info_dict) - - self.src_sha1 = sha1(src_data).hexdigest() - print "source " + partition + " sha1:", self.src_sha1 - src_file.write(src_data) - - print "building target " + partition + " image..." - tgt_file = tempfile.NamedTemporaryFile() - tgt_mapdata, tgt_data = GetImage(partition, - OPTIONS.target_tmp, - OPTIONS.target_info_dict) - self.tgt_sha1 = sha1(tgt_data).hexdigest() - print "target " + partition + " sha1:", self.tgt_sha1 - tgt_len = len(tgt_data) - tgt_file.write(tgt_data) - - system_type, self.device = common.GetTypeAndDevice("/" + partition, - OPTIONS.info_dict) - self.patch = common.MakePartitionPatch(src_file, tgt_file, partition) - - TestBlockPatch(src_data, src_mapdata, self.patch.data, - tgt_mapdata, self.tgt_sha1) - src_data = None - tgt_data = None - - self.patch.AddToZip(output_zip, compression=zipfile.ZIP_STORED) - self.src_mapfilename = self.patch.name + ".src.map" - common.ZipWriteStr(output_zip, self.src_mapfilename, src_mapdata) - self.tgt_mapfilename = self.patch.name + ".tgt.map" - common.ZipWriteStr(output_zip, self.tgt_mapfilename, tgt_mapdata) def WriteBlockIncrementalOTAPackage(target_zip, source_zip, output_zip): source_version = OPTIONS.source_info_dict["recovery_api_version"] @@ -784,11 +758,18 @@ def WriteBlockIncrementalOTAPackage(target_zip, source_zip, output_zip): "/tmp/recovery.img", "recovery.img", OPTIONS.target_tmp, "RECOVERY") updating_recovery = (source_recovery.data != target_recovery.data) - system_diff = BlockDifference("system", output_zip) + system_src = GetImage("system", OPTIONS.source_tmp, OPTIONS.source_info_dict) + system_tgt = GetImage("system", OPTIONS.target_tmp, OPTIONS.target_info_dict) + system_diff = BlockDifference("system", system_tgt, system_src) + if HasVendorPartition(target_zip): if not HasVendorPartition(source_zip): raise RuntimeError("can't generate incremental that adds /vendor") - vendor_diff = BlockDifference("vendor", output_zip) + vendor_src = GetImage("vendor", OPTIONS.source_tmp, OPTIONS.source_info_dict) + vendor_tgt = GetImage("vendor", OPTIONS.target_tmp, OPTIONS.target_info_dict) + vendor_diff = BlockDifference("vendor", vendor_tgt, vendor_src) + else: + vendor_diff = None oem_props = OPTIONS.target_info_dict.get("oem_fingerprint_properties") oem_dict = None @@ -886,23 +867,32 @@ else device_specific.IncrementalOTA_InstallBegin() - if HasVendorPartition(target_zip): + script.AppendExtra('if range_sha1("%s", "%s") == "%s" then' % + (system_diff.device, system_src.care_map.to_string_raw(), + system_src.TotalSha1())) + script.Print("Patching system image...") + script.ShowProgress(0.8 if vendor_diff else 0.9, 0) + system_diff.WriteScript(script, output_zip) + script.AppendExtra(('else\n' + ' (range_sha1("%s", "%s") == "%s") ||\n' + ' abort("system partition has unexpected contents");\n' + 'endif;') % + (system_diff.device, system_tgt.care_map.to_string_raw(), + system_tgt.TotalSha1())) + + if vendor_diff: + script.AppendExtra('if range_sha1("%s", "%s") == "%s" then' % + (vendor_diff.device, vendor_src.care_map.to_string_raw(), + vendor_src.TotalSha1())) script.Print("Patching vendor image...") script.ShowProgress(0.1, 0) - script.Syspatch(vendor_diff.device, - vendor_diff.tgt_mapfilename, vendor_diff.tgt_sha1, - vendor_diff.src_mapfilename, vendor_diff.src_sha1, - vendor_diff.patch.name) - sys_progress = 0.8 - else: - sys_progress = 0.9 - - script.Print("Patching system image...") - script.ShowProgress(sys_progress, 0) - script.Syspatch(system_diff.device, - system_diff.tgt_mapfilename, system_diff.tgt_sha1, - system_diff.src_mapfilename, system_diff.src_sha1, - system_diff.patch.name) + vendor_diff.WriteScript(script, output_zip) + script.AppendExtra(('else\n' + ' (range_sha1("%s", "%s") == "%s") ||\n' + ' abort("vendor partition has unexpected contents");\n' + 'endif;') % + (vendor_diff.device, vendor_tgt.care_map.to_string_raw(), + vendor_tgt.TotalSha1())) if OPTIONS.two_step: common.ZipWriteStr(output_zip, "boot.img", target_boot.data) @@ -953,61 +943,6 @@ endif; script.AddToZip(target_zip, output_zip, input_path=OPTIONS.updater_binary) WriteMetadata(metadata, output_zip) -def ParseMap(map_str): - x = map_str.split() - assert int(x[0]) == 4096 - assert int(x[1]) == len(x)-2 - return int(x[0]), [int(i) for i in x[2:]] - -def TestBlockPatch(src_muimg, src_map, patch_data, tgt_map, tgt_sha1): - src_blksize, src_regions = ParseMap(src_map) - tgt_blksize, tgt_regions = ParseMap(tgt_map) - - with tempfile.NamedTemporaryFile() as src_file,\ - tempfile.NamedTemporaryFile() as patch_file,\ - tempfile.NamedTemporaryFile() as src_map_file,\ - tempfile.NamedTemporaryFile() as tgt_map_file: - - src_total = sum(src_regions) * src_blksize - src_file.truncate(src_total) - p = 0 - for i in range(0, len(src_regions), 2): - c, dc = src_regions[i:i+2] - src_file.write(src_muimg[p:(p+c*src_blksize)]) - p += c*src_blksize - src_file.seek(dc*src_blksize, 1) - assert src_file.tell() == src_total - - patch_file.write(patch_data) - - src_map_file.write(src_map) - tgt_map_file.write(tgt_map) - - src_file.flush() - src_map_file.flush() - patch_file.flush() - tgt_map_file.flush() - - p = common.Run(["syspatch_host", src_file.name, src_map_file.name, - patch_file.name, src_file.name, tgt_map_file.name], - stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - stdoutdata, _ = p.communicate() - if p.returncode != 0: - print stdoutdata - raise ValueError("failed to reconstruct target system image from patch") - - h = sha1() - src_file.seek(0, 0) - for i in range(0, len(tgt_regions), 2): - c, dc = tgt_regions[i:i+2] - h.update(src_file.read(c*tgt_blksize)) - src_file.seek(dc*tgt_blksize, 1) - - if h.hexdigest() != tgt_sha1: - raise ValueError("patch reconstructed incorrect target system image") - - print "test of system image patch succeeded" - class FileDifference: def __init__(self, partition, source_zip, target_zip, output_zip): @@ -1616,8 +1551,6 @@ def main(argv): SignOutput(temp_zip_file.name, args[1]) temp_zip_file.close() - common.Cleanup() - print "done." @@ -1630,3 +1563,5 @@ if __name__ == '__main__': print " ERROR: %s" % (e,) print sys.exit(1) + finally: + common.Cleanup() diff --git a/tools/releasetools/rangelib.py b/tools/releasetools/rangelib.py new file mode 100644 index 000000000..b61714bb6 --- /dev/null +++ b/tools/releasetools/rangelib.py @@ -0,0 +1,161 @@ +from __future__ import print_function +import heapq +import itertools + +__all__ = ["RangeSet"] + +class RangeSet(object): + """A RangeSet represents a set of nonoverlapping ranges on the + integers (ie, a set of integers, but efficient when the set contains + lots of runs.""" + + def __init__(self, data=None): + if data: + self.data = tuple(self._remove_pairs(data)) + else: + self.data = () + + def __iter__(self): + for i in range(0, len(self.data), 2): + yield self.data[i:i+2] + + def __eq__(self, other): + return self.data == other.data + def __ne__(self, other): + return self.data != other.data + def __nonzero__(self): + return bool(self.data) + + def __str__(self): + if not self.data: + return "empty" + else: + return self.to_string() + + @classmethod + def parse(cls, text): + """Parse a text string consisting of a space-separated list of + blocks and ranges, eg "10-20 30 35-40". Ranges are interpreted to + include both their ends (so the above example represents 18 + individual blocks. Returns a RangeSet object. + + If the input has all its blocks in increasing order, then returned + RangeSet will have an extra attribute 'monotonic' that is set to + True. For example the input "10-20 30" is monotonic, but the input + "15-20 30 10-14" is not, even though they represent the same set + of blocks (and the two RangeSets will compare equal with ==). + """ + + data = [] + last = -1 + monotonic = True + for p in text.split(): + if "-" in p: + s, e = p.split("-") + data.append(int(s)) + data.append(int(e)+1) + if last <= s <= e: + last = e + else: + monotonic = False + else: + s = int(p) + data.append(s) + data.append(s+1) + if last <= s: + last = s+1 + else: + monotonic = True + data.sort() + r = RangeSet(cls._remove_pairs(data)) + r.monotonic = monotonic + return r + + @staticmethod + def _remove_pairs(source): + last = None + for i in source: + if i == last: + last = None + else: + if last is not None: + yield last + last = i + if last is not None: + yield last + + def to_string(self): + out = [] + for i in range(0, len(self.data), 2): + s, e = self.data[i:i+2] + if e == s+1: + out.append(str(s)) + else: + out.append(str(s) + "-" + str(e-1)) + return " ".join(out) + + def to_string_raw(self): + return str(len(self.data)) + "," + ",".join(str(i) for i in self.data) + + def union(self, other): + """Return a new RangeSet representing the union of this RangeSet + with the argument.""" + out = [] + z = 0 + for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))), + zip(other.data, itertools.cycle((+1, -1)))): + if (z == 0 and d == 1) or (z == 1 and d == -1): + out.append(p) + z += d + return RangeSet(data=out) + + def intersect(self, other): + """Return a new RangeSet representing the intersection of this + RangeSet with the argument.""" + out = [] + z = 0 + for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))), + zip(other.data, itertools.cycle((+1, -1)))): + if (z == 1 and d == 1) or (z == 2 and d == -1): + out.append(p) + z += d + return RangeSet(data=out) + + def subtract(self, other): + """Return a new RangeSet representing subtracting the argument + from this RangeSet.""" + + out = [] + z = 0 + for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))), + zip(other.data, itertools.cycle((-1, +1)))): + if (z == 0 and d == 1) or (z == 1 and d == -1): + out.append(p) + z += d + return RangeSet(data=out) + + def overlaps(self, other): + """Returns true if the argument has a nonempty overlap with this + RangeSet.""" + + # This is like intersect, but we can stop as soon as we discover the + # output is going to be nonempty. + z = 0 + for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))), + zip(other.data, itertools.cycle((+1, -1)))): + if (z == 1 and d == 1) or (z == 2 and d == -1): + return True + z += d + return False + + def size(self): + """Returns the total size of the RangeSet (ie, how many integers + are in the set).""" + + total = 0 + for i, p in enumerate(self.data): + if i % 2: + total += p + else: + total -= p + return total diff --git a/tools/releasetools/simg_map.py b/tools/releasetools/simg_map.py deleted file mode 100644 index 22dc8635d..000000000 --- a/tools/releasetools/simg_map.py +++ /dev/null @@ -1,148 +0,0 @@ -#! /usr/bin/env python - -# Copyright (C) 2012 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function -import getopt, posixpath, signal, struct, sys - -def main(): - if len(sys.argv) == 4: - print("No sparse_image_file specified") - usage(me) - - sparse_fn = sys.argv[1] - unsparse_fn = sys.argv[2] - map_file = sys.argv[3] - mapped_unsparse_fn = sys.argv[4] - - return ComputeMap(sparse_fn, unsparse_fn, map_file, mapped_unsparse_fn) - - -def ComputeMap(sparse_fn, unsparse_fn, map_file, mapped_unsparse_fn): - care_map = [] - - with open(sparse_fn, "rb") as FH: - header_bin = FH.read(28) - header = struct.unpack(" 0: + # continue with following chunks if this range spans multiple chunks. + idx += 1 + chunk_start, chunk_len, filepos = self.offset_map[idx] + f.seek(filepos, os.SEEK_SET) + this_read = min(chunk_len, to_read) + yield f.read(this_read * self.blocksize) + to_read -= this_read + + def LoadFileBlockMap(self, fn): + remaining = self.care_map + self.file_map = out = {} + + with open(fn) as f: + for line in f: + fn, ranges = line.split(None, 1) + ranges = RangeSet.parse(ranges) + out[fn] = ranges + assert ranges.size() == ranges.intersect(remaining).size() + remaining = remaining.subtract(ranges) + + # For all the remaining blocks in the care_map (ie, those that + # aren't part of the data for any file), divide them into blocks + # that are all zero and blocks that aren't. (Zero blocks are + # handled specially because (1) there are usually a lot of them + # and (2) bsdiff handles files with long sequences of repeated + # bytes especially poorly.) + + zero_blocks = [] + nonzero_blocks = [] + reference = '\0' * self.blocksize + + f = self.simg_f + for s, e in remaining: + for b in range(s, e): + idx = bisect.bisect_right(self.offset_index, b) - 1 + chunk_start, chunk_len, filepos = self.offset_map[idx] + filepos += (b-chunk_start) * self.blocksize + f.seek(filepos, os.SEEK_SET) + data = f.read(self.blocksize) + + if data == reference: + zero_blocks.append(b) + zero_blocks.append(b+1) + else: + nonzero_blocks.append(b) + nonzero_blocks.append(b+1) + + out["__ZERO"] = RangeSet(data=zero_blocks) + out["__NONZERO"] = RangeSet(data=nonzero_blocks) + + def ResetFileMap(self): + """Throw away the file map and treat the entire image as + undifferentiated data.""" + self.file_map = {"__DATA": self.care_map}