qcow2: Don't rely on free_cluster_index in alloc_refcount_block() (CVE-2014-0147)

free_cluster_index is only correct if update_refcount() was called from an allocation function, and even there it's brittle because it's used to protect unfinished allocations which still have a refcount of 0 - if it moves in the wrong place, the unfinished allocation can be corrupted. So not using it any more seems to be a good idea. Instead, use the first requested cluster to do the calculations. Return -EAGAIN if unfinished allocations could become invalid and let the caller restart its search for some free clusters. The context of creating a snapsnot is one situation where update_refcount() is called outside of a cluster allocation. For this case, the change fixes a buffer overflow if a cluster is referenced in an L2 table that cannot be represented by an existing refcount block. (new_table[refcount_table_index] was out of bounds) [Bump the qemu-iotests 026 refblock_alloc.write leak count from 10 to 11. --Stefan] Signed-off-by: Kevin Wolf <kwolf@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2014-03-28 18:06:31 +01:00 · 2014-03-28 18:06:31 +01:00 · b106ad9185
parent 6d33e8e7dc
commit b106ad9185
6 changed files with 65 additions and 44 deletions
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@ -194,10 +194,11 @@ static int alloc_refcount_block(BlockDriverState *bs,
     *   they can describe them themselves.
     *
     * - We need to consider that at this point we are inside update_refcounts
-     *   and doing the initial refcount increase. This means that some clusters
-     *   have already been allocated by the caller, but their refcount isn't
-     *   accurate yet. free_cluster_index tells us where this allocation ends
-     *   as long as we don't overwrite it by freeing clusters.
+     *   and potentially doing an initial refcount increase. This means that
+     *   some clusters have already been allocated by the caller, but their
+     *   refcount isn't accurate yet. If we allocate clusters for metadata, we
+     *   need to return -EAGAIN to signal the caller that it needs to restart
+     *   the search for free clusters.
     *
     * - alloc_clusters_noref and qcow2_free_clusters may load a different
     *   refcount block into the cache
@ -282,7 +283,10 @@ static int alloc_refcount_block(BlockDriverState *bs,
        }

        s->refcount_table[refcount_table_index] = new_block;
-        return 0;
+
+        /* The new refcount block may be where the caller intended to put its
+         * data, so let it restart the search. */
+        return -EAGAIN;
    }

    ret = qcow2_cache_put(bs, s->refcount_block_cache, (void**) refcount_block);
@ -305,8 +309,7 @@ static int alloc_refcount_block(BlockDriverState *bs,

    /* Calculate the number of refcount blocks needed so far */
    uint64_t refcount_block_clusters = 1 << (s->cluster_bits - REFCOUNT_SHIFT);
-    uint64_t blocks_used = (s->free_cluster_index +
-        refcount_block_clusters - 1) / refcount_block_clusters;
+    uint64_t blocks_used = DIV_ROUND_UP(cluster_index, refcount_block_clusters);

    /* And now we need at least one block more for the new metadata */
    uint64_t table_size = next_refcount_table_size(s, blocks_used + 1);
@ -339,8 +342,6 @@ static int alloc_refcount_block(BlockDriverState *bs,
    uint16_t *new_blocks = g_malloc0(blocks_clusters * s->cluster_size);
    uint64_t *new_table = g_malloc0(table_size * sizeof(uint64_t));

-    assert(meta_offset >= (s->free_cluster_index * s->cluster_size));
-
    /* Fill the new refcount table */
    memcpy(new_table, s->refcount_table,
        s->refcount_table_size * sizeof(uint64_t));
@ -403,18 +404,19 @@ static int alloc_refcount_block(BlockDriverState *bs,
    s->refcount_table_size = table_size;
    s->refcount_table_offset = table_offset;

-    /* Free old table. Remember, we must not change free_cluster_index */
-    uint64_t old_free_cluster_index = s->free_cluster_index;
+    /* Free old table. */
    qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t),
                        QCOW2_DISCARD_OTHER);
-    s->free_cluster_index = old_free_cluster_index;

    ret = load_refcount_block(bs, new_block, (void**) refcount_block);
    if (ret < 0) {
        return ret;
    }

-    return 0;
+    /* If we were trying to do the initial refcount update for some cluster
+     * allocation, we might have used the same clusters to store newly
+     * allocated metadata. Make the caller search some new space. */
+    return -EAGAIN;

 fail_table:
    g_free(new_table);
@ -660,12 +662,15 @@ int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size)
    int ret;

    BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC);
-    offset = alloc_clusters_noref(bs, size);
-    if (offset < 0) {
-        return offset;
-    }
+    do {
+        offset = alloc_clusters_noref(bs, size);
+        if (offset < 0) {
+            return offset;
+        }
+
+        ret = update_refcount(bs, offset, size, 1, QCOW2_DISCARD_NEVER);
+    } while (ret == -EAGAIN);

-    ret = update_refcount(bs, offset, size, 1, QCOW2_DISCARD_NEVER);
    if (ret < 0) {
        return ret;
    }
@ -678,7 +683,6 @@ int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
 {
    BDRVQcowState *s = bs->opaque;
    uint64_t cluster_index;
-    uint64_t old_free_cluster_index;
    uint64_t i;
    int refcount, ret;

@ -687,30 +691,28 @@ int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
        return 0;
    }

-    /* Check how many clusters there are free */
-    cluster_index = offset >> s->cluster_bits;
-    for(i = 0; i < nb_clusters; i++) {
-        refcount = get_refcount(bs, cluster_index++);
+    do {
+        /* Check how many clusters there are free */
+        cluster_index = offset >> s->cluster_bits;
+        for(i = 0; i < nb_clusters; i++) {
+            refcount = get_refcount(bs, cluster_index++);

-        if (refcount < 0) {
-            return refcount;
-        } else if (refcount != 0) {
-            break;
+            if (refcount < 0) {
+                return refcount;
+            } else if (refcount != 0) {
+                break;
+            }
        }
-    }

-    /* And then allocate them */
-    old_free_cluster_index = s->free_cluster_index;
-    s->free_cluster_index = cluster_index + i;
+        /* And then allocate them */
+        ret = update_refcount(bs, offset, i << s->cluster_bits, 1,
+                              QCOW2_DISCARD_NEVER);
+    } while (ret == -EAGAIN);

-    ret = update_refcount(bs, offset, i << s->cluster_bits, 1,
-                          QCOW2_DISCARD_NEVER);
    if (ret < 0) {
        return ret;
    }

-    s->free_cluster_index = old_free_cluster_index;
-
    return i;
 }

--- a/block/qcow2.c
+++ b/block/qcow2.c
@ -1602,7 +1602,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
     */
    BlockDriverState* bs;
    QCowHeader *header;
-    uint8_t* refcount_table;
+    uint64_t* refcount_table;
    Error *local_err = NULL;
    int ret;

@ -1654,9 +1654,10 @@ static int qcow2_create2(const char *filename, int64_t total_size,
        goto out;
    }

-    /* Write an empty refcount table */
-    refcount_table = g_malloc0(cluster_size);
-    ret = bdrv_pwrite(bs, cluster_size, refcount_table, cluster_size);
+    /* Write a refcount table with one refcount block */
+    refcount_table = g_malloc0(2 * cluster_size);
+    refcount_table[0] = cpu_to_be64(2 * cluster_size);
+    ret = bdrv_pwrite(bs, cluster_size, refcount_table, 2 * cluster_size);
    g_free(refcount_table);

    if (ret < 0) {
@ -1681,7 +1682,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
        goto out;
    }

-    ret = qcow2_alloc_clusters(bs, 2 * cluster_size);
+    ret = qcow2_alloc_clusters(bs, 3 * cluster_size);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 "
                         "header and refcount table");
--- a/tests/qemu-iotests/026.out
+++ b/tests/qemu-iotests/026.out
@ -475,7 +475,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
 Event: refblock_alloc.write_blocks; errno: 28; imm: off; once: off; write 
 write failed: No space left on device

-10 leaked clusters were found on the image.
+11 leaked clusters were found on the image.
 This means waste of disk space, but no harm to data.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 

@ -499,7 +499,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
 Event: refblock_alloc.write_table; errno: 28; imm: off; once: off; write 
 write failed: No space left on device

-10 leaked clusters were found on the image.
+11 leaked clusters were found on the image.
 This means waste of disk space, but no harm to data.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 

@ -523,7 +523,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
 Event: refblock_alloc.switch_table; errno: 28; imm: off; once: off; write 
 write failed: No space left on device

-10 leaked clusters were found on the image.
+11 leaked clusters were found on the image.
 This means waste of disk space, but no harm to data.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 

--- a/tests/qemu-iotests/044.out
+++ b/tests/qemu-iotests/044.out
@ -1,6 +1,6 @@
 No errors were found on the image.
 7292415/33554432 = 21.73% allocated, 0.00% fragmented, 0.00% compressed clusters
-Image end offset: 4296448000
+Image end offset: 4296152064
 .
 ----------------------------------------------------------------------
 Ran 1 tests
--- a/tests/qemu-iotests/080
+++ b/tests/qemu-iotests/080
@ -56,6 +56,8 @@ offset_header_size=100
 offset_ext_magic=$header_size
 offset_ext_size=$((header_size + 4))

+offset_l2_table_0=$((0x40000))
+
 echo
 echo "== Huge header size =="
 _make_test_img 64M
@ -143,6 +145,15 @@ poke_file "$TEST_IMG" "$offset_backing_file_offset" "\x00\x00\x00\x00\x00\x00\x1
 poke_file "$TEST_IMG" "$offset_backing_file_size" "\xff\xff\xff\xff"
 { $QEMU_IO -c "read 0 512" $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir

+echo
+echo "== Invalid L2 entry (huge physical offset) =="
+_make_test_img 64M
+{ $QEMU_IO -c "write 0 512" $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir
+poke_file "$TEST_IMG" "$offset_l2_table_0" "\xbf\xff\xff\xff\xff\xff\x00\x00"
+{ $QEMU_IMG snapshot -c test $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir
+poke_file "$TEST_IMG" "$offset_l2_table_0" "\x80\x00\x00\xff\xff\xff\x00\x00"
+{ $QEMU_IMG snapshot -c test $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir
+
 # success, all done
 echo "*** done"
 rm -f $seq.full
--- a/tests/qemu-iotests/080.out
+++ b/tests/qemu-iotests/080.out
@ -63,4 +63,11 @@ no file open, try 'help open'
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 
 qemu-io: can't open device TEST_DIR/t.qcow2: Backing file name too long
 no file open, try 'help open'
+
+== Invalid L2 entry (huge physical offset) ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 
+wrote 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-img: Could not create snapshot 'test': -27 (File too large)
+qemu-img: Could not create snapshot 'test': -11 (Resource temporarily unavailable)
 *** done