qcow2: Don't rely on free_cluster_index in alloc_refcount_block() (CVE-2014-0147)

free_cluster_index is only correct if update_refcount() was called from
an allocation function, and even there it's brittle because it's used to
protect unfinished allocations which still have a refcount of 0 - if it
moves in the wrong place, the unfinished allocation can be corrupted.

So not using it any more seems to be a good idea. Instead, use the
first requested cluster to do the calculations. Return -EAGAIN if
unfinished allocations could become invalid and let the caller restart
its search for some free clusters.

The context of creating a snapsnot is one situation where
update_refcount() is called outside of a cluster allocation. For this
case, the change fixes a buffer overflow if a cluster is referenced in
an L2 table that cannot be represented by an existing refcount block.
(new_table[refcount_table_index] was out of bounds)

[Bump the qemu-iotests 026 refblock_alloc.write leak count from 10 to
11.
--Stefan]

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
Kevin Wolf 2014-03-28 18:06:31 +01:00 committed by Stefan Hajnoczi
parent 6d33e8e7dc
commit b106ad9185
6 changed files with 65 additions and 44 deletions

View File

@ -194,10 +194,11 @@ static int alloc_refcount_block(BlockDriverState *bs,
* they can describe them themselves.
*
* - We need to consider that at this point we are inside update_refcounts
* and doing the initial refcount increase. This means that some clusters
* have already been allocated by the caller, but their refcount isn't
* accurate yet. free_cluster_index tells us where this allocation ends
* as long as we don't overwrite it by freeing clusters.
* and potentially doing an initial refcount increase. This means that
* some clusters have already been allocated by the caller, but their
* refcount isn't accurate yet. If we allocate clusters for metadata, we
* need to return -EAGAIN to signal the caller that it needs to restart
* the search for free clusters.
*
* - alloc_clusters_noref and qcow2_free_clusters may load a different
* refcount block into the cache
@ -282,7 +283,10 @@ static int alloc_refcount_block(BlockDriverState *bs,
}
s->refcount_table[refcount_table_index] = new_block;
return 0;
/* The new refcount block may be where the caller intended to put its
* data, so let it restart the search. */
return -EAGAIN;
}
ret = qcow2_cache_put(bs, s->refcount_block_cache, (void**) refcount_block);
@ -305,8 +309,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
/* Calculate the number of refcount blocks needed so far */
uint64_t refcount_block_clusters = 1 << (s->cluster_bits - REFCOUNT_SHIFT);
uint64_t blocks_used = (s->free_cluster_index +
refcount_block_clusters - 1) / refcount_block_clusters;
uint64_t blocks_used = DIV_ROUND_UP(cluster_index, refcount_block_clusters);
/* And now we need at least one block more for the new metadata */
uint64_t table_size = next_refcount_table_size(s, blocks_used + 1);
@ -339,8 +342,6 @@ static int alloc_refcount_block(BlockDriverState *bs,
uint16_t *new_blocks = g_malloc0(blocks_clusters * s->cluster_size);
uint64_t *new_table = g_malloc0(table_size * sizeof(uint64_t));
assert(meta_offset >= (s->free_cluster_index * s->cluster_size));
/* Fill the new refcount table */
memcpy(new_table, s->refcount_table,
s->refcount_table_size * sizeof(uint64_t));
@ -403,18 +404,19 @@ static int alloc_refcount_block(BlockDriverState *bs,
s->refcount_table_size = table_size;
s->refcount_table_offset = table_offset;
/* Free old table. Remember, we must not change free_cluster_index */
uint64_t old_free_cluster_index = s->free_cluster_index;
/* Free old table. */
qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t),
QCOW2_DISCARD_OTHER);
s->free_cluster_index = old_free_cluster_index;
ret = load_refcount_block(bs, new_block, (void**) refcount_block);
if (ret < 0) {
return ret;
}
return 0;
/* If we were trying to do the initial refcount update for some cluster
* allocation, we might have used the same clusters to store newly
* allocated metadata. Make the caller search some new space. */
return -EAGAIN;
fail_table:
g_free(new_table);
@ -660,12 +662,15 @@ int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size)
int ret;
BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC);
offset = alloc_clusters_noref(bs, size);
if (offset < 0) {
return offset;
}
do {
offset = alloc_clusters_noref(bs, size);
if (offset < 0) {
return offset;
}
ret = update_refcount(bs, offset, size, 1, QCOW2_DISCARD_NEVER);
} while (ret == -EAGAIN);
ret = update_refcount(bs, offset, size, 1, QCOW2_DISCARD_NEVER);
if (ret < 0) {
return ret;
}
@ -678,7 +683,6 @@ int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
{
BDRVQcowState *s = bs->opaque;
uint64_t cluster_index;
uint64_t old_free_cluster_index;
uint64_t i;
int refcount, ret;
@ -687,30 +691,28 @@ int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
return 0;
}
/* Check how many clusters there are free */
cluster_index = offset >> s->cluster_bits;
for(i = 0; i < nb_clusters; i++) {
refcount = get_refcount(bs, cluster_index++);
do {
/* Check how many clusters there are free */
cluster_index = offset >> s->cluster_bits;
for(i = 0; i < nb_clusters; i++) {
refcount = get_refcount(bs, cluster_index++);
if (refcount < 0) {
return refcount;
} else if (refcount != 0) {
break;
if (refcount < 0) {
return refcount;
} else if (refcount != 0) {
break;
}
}
}
/* And then allocate them */
old_free_cluster_index = s->free_cluster_index;
s->free_cluster_index = cluster_index + i;
/* And then allocate them */
ret = update_refcount(bs, offset, i << s->cluster_bits, 1,
QCOW2_DISCARD_NEVER);
} while (ret == -EAGAIN);
ret = update_refcount(bs, offset, i << s->cluster_bits, 1,
QCOW2_DISCARD_NEVER);
if (ret < 0) {
return ret;
}
s->free_cluster_index = old_free_cluster_index;
return i;
}

View File

@ -1602,7 +1602,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
*/
BlockDriverState* bs;
QCowHeader *header;
uint8_t* refcount_table;
uint64_t* refcount_table;
Error *local_err = NULL;
int ret;
@ -1654,9 +1654,10 @@ static int qcow2_create2(const char *filename, int64_t total_size,
goto out;
}
/* Write an empty refcount table */
refcount_table = g_malloc0(cluster_size);
ret = bdrv_pwrite(bs, cluster_size, refcount_table, cluster_size);
/* Write a refcount table with one refcount block */
refcount_table = g_malloc0(2 * cluster_size);
refcount_table[0] = cpu_to_be64(2 * cluster_size);
ret = bdrv_pwrite(bs, cluster_size, refcount_table, 2 * cluster_size);
g_free(refcount_table);
if (ret < 0) {
@ -1681,7 +1682,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
goto out;
}
ret = qcow2_alloc_clusters(bs, 2 * cluster_size);
ret = qcow2_alloc_clusters(bs, 3 * cluster_size);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 "
"header and refcount table");

View File

@ -475,7 +475,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
Event: refblock_alloc.write_blocks; errno: 28; imm: off; once: off; write
write failed: No space left on device
10 leaked clusters were found on the image.
11 leaked clusters were found on the image.
This means waste of disk space, but no harm to data.
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
@ -499,7 +499,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
Event: refblock_alloc.write_table; errno: 28; imm: off; once: off; write
write failed: No space left on device
10 leaked clusters were found on the image.
11 leaked clusters were found on the image.
This means waste of disk space, but no harm to data.
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
@ -523,7 +523,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
Event: refblock_alloc.switch_table; errno: 28; imm: off; once: off; write
write failed: No space left on device
10 leaked clusters were found on the image.
11 leaked clusters were found on the image.
This means waste of disk space, but no harm to data.
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824

View File

@ -1,6 +1,6 @@
No errors were found on the image.
7292415/33554432 = 21.73% allocated, 0.00% fragmented, 0.00% compressed clusters
Image end offset: 4296448000
Image end offset: 4296152064
.
----------------------------------------------------------------------
Ran 1 tests

View File

@ -56,6 +56,8 @@ offset_header_size=100
offset_ext_magic=$header_size
offset_ext_size=$((header_size + 4))
offset_l2_table_0=$((0x40000))
echo
echo "== Huge header size =="
_make_test_img 64M
@ -143,6 +145,15 @@ poke_file "$TEST_IMG" "$offset_backing_file_offset" "\x00\x00\x00\x00\x00\x00\x1
poke_file "$TEST_IMG" "$offset_backing_file_size" "\xff\xff\xff\xff"
{ $QEMU_IO -c "read 0 512" $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir
echo
echo "== Invalid L2 entry (huge physical offset) =="
_make_test_img 64M
{ $QEMU_IO -c "write 0 512" $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir
poke_file "$TEST_IMG" "$offset_l2_table_0" "\xbf\xff\xff\xff\xff\xff\x00\x00"
{ $QEMU_IMG snapshot -c test $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir
poke_file "$TEST_IMG" "$offset_l2_table_0" "\x80\x00\x00\xff\xff\xff\x00\x00"
{ $QEMU_IMG snapshot -c test $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir
# success, all done
echo "*** done"
rm -f $seq.full

View File

@ -63,4 +63,11 @@ no file open, try 'help open'
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
qemu-io: can't open device TEST_DIR/t.qcow2: Backing file name too long
no file open, try 'help open'
== Invalid L2 entry (huge physical offset) ==
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
wrote 512/512 bytes at offset 0
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
qemu-img: Could not create snapshot 'test': -27 (File too large)
qemu-img: Could not create snapshot 'test': -11 (Resource temporarily unavailable)
*** done