nbd: handle AioContext change correctly

v2: add my s-o-b marks to each commit
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEi5wmzbL9FHyIDoahVh8kwfGfefsFAmIGYU8ACgkQVh8kwfGf
 efslpQ/+OAEgA8z2frIOzoa5PKmyWDaE4xOAsr595EzhSnDeLi1QVIqSSXFap/m6
 1rzqglD+5udM9LT9FZBHeMDbWr1UqvODZTAah6/T8E6vVzXm9Jn+CivEj4kT4Dd2
 2Mrg3ydjkZsVivs1el4eG7QZ88TvSZkoNAIlT9lVYuT/TaRBKohrms1B4h25wqn9
 u78tnbdAdEGtlHUFy9qyOXPt0eGRt6a1CTo+u0wwJLxKjLpq8cht0DPA4/CQhZay
 Jm7cudJAJyTFxT7EUxpfROj4KuSMsUDmF/pNE8qwQuSnZC9G9u67ImhWpMFmwEZl
 yCAJbOTudWYefgZ3+BtKMH5SBVLA6AsQ60luWkXkqK1dUB4cWxt5lG/UAm1OMJsn
 FL6w3WJFkRTIC1OtHqqJnBtDFHWhTxNX9jjcAdb6lpjz4/wKywUVs+l5aSbxswog
 DMrE1XT+JIab1IqAnSGw0QTY0H9COiQyOlsJ8EjtFFsqwLACSX4qBowesGfoA7IF
 N0ZorAs6QuYYzEaSbRG1vB1lAusSCF5s/UnTg1mDLFWSqJOsqZN7jxXyQbbiFhsN
 f+OTlyIwv4t32eGLRrKHF0vaWUoKQ3skB8m9UFMdQbGBO/LakZL9L42ITxarzUMI
 CVeGLAzs3KvbZUlUyJWl97F2KkdrornmM/9CB57sjw13K94oNYo=
 =+lgn
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/vsementsov/tags/pull-nbd-2022-02-09-v2' into staging

nbd: handle AioContext change correctly

v2: add my s-o-b marks to each commit

# gpg: Signature made Fri 11 Feb 2022 13:14:55 GMT
# gpg:                using RSA key 8B9C26CDB2FD147C880E86A1561F24C1F19F79FB
# gpg: Good signature from "Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>" [unknown]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 8B9C 26CD B2FD 147C 880E  86A1 561F 24C1 F19F 79FB

* remotes/vsementsov/tags/pull-nbd-2022-02-09-v2:
  iotests/281: Let NBD connection yield in iothread
  block/nbd: Move s->ioc on AioContext change
  iotests/281: Test lingering timers
  iotests.py: Add QemuStorageDaemon class
  block/nbd: Assert there are no timers when closed
  block/nbd: Delete open timer when done
  block/nbd: Delete reconnect delay timer when done

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2022-02-12 22:04:07 +00:00
commit 48033ad678
4 changed files with 205 additions and 4 deletions

View File

@ -110,6 +110,10 @@ static void nbd_clear_bdrvstate(BlockDriverState *bs)
yank_unregister_instance(BLOCKDEV_YANK_INSTANCE(bs->node_name)); yank_unregister_instance(BLOCKDEV_YANK_INSTANCE(bs->node_name));
/* Must not leave timers behind that would access freed data */
assert(!s->reconnect_delay_timer);
assert(!s->open_timer);
object_unref(OBJECT(s->tlscreds)); object_unref(OBJECT(s->tlscreds));
qapi_free_SocketAddress(s->saddr); qapi_free_SocketAddress(s->saddr);
s->saddr = NULL; s->saddr = NULL;
@ -381,6 +385,13 @@ static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s)
} }
nbd_co_do_establish_connection(s->bs, NULL); nbd_co_do_establish_connection(s->bs, NULL);
/*
* The reconnect attempt is done (maybe successfully, maybe not), so
* we no longer need this timer. Delete it so it will not outlive
* this I/O request (so draining removes all timers).
*/
reconnect_delay_timer_del(s);
} }
static coroutine_fn int nbd_receive_replies(BDRVNBDState *s, uint64_t handle) static coroutine_fn int nbd_receive_replies(BDRVNBDState *s, uint64_t handle)
@ -1878,11 +1889,19 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
goto fail; goto fail;
} }
/*
* The connect attempt is done, so we no longer need this timer.
* Delete it, because we do not want it to be around when this node
* is drained or closed.
*/
open_timer_del(s);
nbd_client_connection_enable_retry(s->conn); nbd_client_connection_enable_retry(s->conn);
return 0; return 0;
fail: fail:
open_timer_del(s);
nbd_clear_bdrvstate(bs); nbd_clear_bdrvstate(bs);
return ret; return ret;
} }
@ -2036,6 +2055,42 @@ static void nbd_cancel_in_flight(BlockDriverState *bs)
nbd_co_establish_connection_cancel(s->conn); nbd_co_establish_connection_cancel(s->conn);
} }
static void nbd_attach_aio_context(BlockDriverState *bs,
AioContext *new_context)
{
BDRVNBDState *s = bs->opaque;
/* The open_timer is used only during nbd_open() */
assert(!s->open_timer);
/*
* The reconnect_delay_timer is scheduled in I/O paths when the
* connection is lost, to cancel the reconnection attempt after a
* given time. Once this attempt is done (successfully or not),
* nbd_reconnect_attempt() ensures the timer is deleted before the
* respective I/O request is resumed.
* Since the AioContext can only be changed when a node is drained,
* the reconnect_delay_timer cannot be active here.
*/
assert(!s->reconnect_delay_timer);
if (s->ioc) {
qio_channel_attach_aio_context(s->ioc, new_context);
}
}
static void nbd_detach_aio_context(BlockDriverState *bs)
{
BDRVNBDState *s = bs->opaque;
assert(!s->open_timer);
assert(!s->reconnect_delay_timer);
if (s->ioc) {
qio_channel_detach_aio_context(s->ioc);
}
}
static BlockDriver bdrv_nbd = { static BlockDriver bdrv_nbd = {
.format_name = "nbd", .format_name = "nbd",
.protocol_name = "nbd", .protocol_name = "nbd",
@ -2059,6 +2114,9 @@ static BlockDriver bdrv_nbd = {
.bdrv_dirname = nbd_dirname, .bdrv_dirname = nbd_dirname,
.strong_runtime_opts = nbd_strong_runtime_opts, .strong_runtime_opts = nbd_strong_runtime_opts,
.bdrv_cancel_in_flight = nbd_cancel_in_flight, .bdrv_cancel_in_flight = nbd_cancel_in_flight,
.bdrv_attach_aio_context = nbd_attach_aio_context,
.bdrv_detach_aio_context = nbd_detach_aio_context,
}; };
static BlockDriver bdrv_nbd_tcp = { static BlockDriver bdrv_nbd_tcp = {
@ -2084,6 +2142,9 @@ static BlockDriver bdrv_nbd_tcp = {
.bdrv_dirname = nbd_dirname, .bdrv_dirname = nbd_dirname,
.strong_runtime_opts = nbd_strong_runtime_opts, .strong_runtime_opts = nbd_strong_runtime_opts,
.bdrv_cancel_in_flight = nbd_cancel_in_flight, .bdrv_cancel_in_flight = nbd_cancel_in_flight,
.bdrv_attach_aio_context = nbd_attach_aio_context,
.bdrv_detach_aio_context = nbd_detach_aio_context,
}; };
static BlockDriver bdrv_nbd_unix = { static BlockDriver bdrv_nbd_unix = {
@ -2109,6 +2170,9 @@ static BlockDriver bdrv_nbd_unix = {
.bdrv_dirname = nbd_dirname, .bdrv_dirname = nbd_dirname,
.strong_runtime_opts = nbd_strong_runtime_opts, .strong_runtime_opts = nbd_strong_runtime_opts,
.bdrv_cancel_in_flight = nbd_cancel_in_flight, .bdrv_cancel_in_flight = nbd_cancel_in_flight,
.bdrv_attach_aio_context = nbd_attach_aio_context,
.bdrv_detach_aio_context = nbd_detach_aio_context,
}; };
static void bdrv_nbd_init(void) static void bdrv_nbd_init(void)

View File

@ -1,5 +1,5 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# group: rw quick # group: rw
# #
# Test cases for blockdev + IOThread interactions # Test cases for blockdev + IOThread interactions
# #
@ -20,8 +20,9 @@
# #
import os import os
import time
import iotests import iotests
from iotests import qemu_img from iotests import qemu_img, QemuStorageDaemon
image_len = 64 * 1024 * 1024 image_len = 64 * 1024 * 1024
@ -243,6 +244,102 @@ class TestBlockdevBackupAbort(iotests.QMPTestCase):
# Hangs on failure, we expect this error. # Hangs on failure, we expect this error.
self.assert_qmp(result, 'error/class', 'GenericError') self.assert_qmp(result, 'error/class', 'GenericError')
# Test for RHBZ#2033626
class TestYieldingAndTimers(iotests.QMPTestCase):
sock = os.path.join(iotests.sock_dir, 'nbd.sock')
qsd = None
def setUp(self):
self.create_nbd_export()
# Simple VM with an NBD block device connected to the NBD export
# provided by the QSD, and an (initially unused) iothread
self.vm = iotests.VM()
self.vm.add_object('iothread,id=iothr')
self.vm.add_blockdev('nbd,node-name=nbd,server.type=unix,' +
f'server.path={self.sock},export=exp,' +
'reconnect-delay=1,open-timeout=1')
self.vm.launch()
def tearDown(self):
self.stop_nbd_export()
self.vm.shutdown()
def test_timers_with_blockdev_del(self):
# The NBD BDS will have had an active open timer, because setUp() gave
# a positive value for @open-timeout. It should be gone once the BDS
# has been opened.
# (But there used to be a bug where it remained active, which will
# become important below.)
# Stop and restart the NBD server, and do some I/O on the client to
# trigger a reconnect and start the reconnect delay timer
self.stop_nbd_export()
self.create_nbd_export()
result = self.vm.qmp('human-monitor-command',
command_line='qemu-io nbd "write 0 512"')
self.assert_qmp(result, 'return', '')
# Reconnect is done, so the reconnect delay timer should be gone.
# (This is similar to how the open timer should be gone after open,
# and similarly there used to be a bug where it was not gone.)
# Delete the BDS to see whether both timers are gone. If they are not,
# they will remain active, fire later, and then access freed data.
# (Or, with "block/nbd: Assert there are no timers when closed"
# applied, the assertions added in that patch will fail.)
result = self.vm.qmp('blockdev-del', node_name='nbd')
self.assert_qmp(result, 'return', {})
# Give the timers some time to fire (both have a timeout of 1 s).
# (Sleeping in an iotest may ring some alarm bells, but note that if
# the timing is off here, the test will just always pass. If we kill
# the VM too early, then we just kill the timers before they can fire,
# thus not see the error, and so the test will pass.)
time.sleep(2)
def test_yield_in_iothread(self):
# Move the NBD node to the I/O thread; the NBD block driver should
# attach the connection's QIOChannel to that thread's AioContext, too
result = self.vm.qmp('x-blockdev-set-iothread',
node_name='nbd', iothread='iothr')
self.assert_qmp(result, 'return', {})
# Do some I/O that will be throttled by the QSD, so that the network
# connection hopefully will yield here. When it is resumed, it must
# then be resumed in the I/O thread's AioContext.
result = self.vm.qmp('human-monitor-command',
command_line='qemu-io nbd "read 0 128K"')
self.assert_qmp(result, 'return', '')
def create_nbd_export(self):
assert self.qsd is None
# Export a throttled null-co BDS: Reads are throttled (max 64 kB/s),
# writes are not.
self.qsd = QemuStorageDaemon(
'--object',
'throttle-group,id=thrgr,x-bps-read=65536,x-bps-read-max=65536',
'--blockdev',
'null-co,node-name=null,read-zeroes=true',
'--blockdev',
'throttle,node-name=thr,file=null,throttle-group=thrgr',
'--nbd-server',
f'addr.type=unix,addr.path={self.sock}',
'--export',
'nbd,id=exp,node-name=thr,name=exp,writable=true'
)
def stop_nbd_export(self):
self.qsd.stop()
self.qsd = None
if __name__ == '__main__': if __name__ == '__main__':
iotests.main(supported_fmts=['qcow2'], iotests.main(supported_fmts=['qcow2'],
supported_protocols=['file'], supported_protocols=['file'],

View File

@ -1,5 +1,5 @@
.... ......
---------------------------------------------------------------------- ----------------------------------------------------------------------
Ran 4 tests Ran 6 tests
OK OK

View File

@ -73,6 +73,8 @@
qemu_prog = os.environ.get('QEMU_PROG', 'qemu') qemu_prog = os.environ.get('QEMU_PROG', 'qemu')
qemu_opts = os.environ.get('QEMU_OPTIONS', '').strip().split(' ') qemu_opts = os.environ.get('QEMU_OPTIONS', '').strip().split(' ')
qsd_prog = os.environ.get('QSD_PROG', 'qemu-storage-daemon')
gdb_qemu_env = os.environ.get('GDB_OPTIONS') gdb_qemu_env = os.environ.get('GDB_OPTIONS')
qemu_gdb = [] qemu_gdb = []
if gdb_qemu_env: if gdb_qemu_env:
@ -345,6 +347,44 @@ def cmd(self, cmd):
return self._read_output() return self._read_output()
class QemuStorageDaemon:
def __init__(self, *args: str, instance_id: str = 'a'):
assert '--pidfile' not in args
self.pidfile = os.path.join(test_dir, f'qsd-{instance_id}-pid')
all_args = [qsd_prog] + list(args) + ['--pidfile', self.pidfile]
# Cannot use with here, we want the subprocess to stay around
# pylint: disable=consider-using-with
self._p = subprocess.Popen(all_args)
while not os.path.exists(self.pidfile):
if self._p.poll() is not None:
cmd = ' '.join(all_args)
raise RuntimeError(
'qemu-storage-daemon terminated with exit code ' +
f'{self._p.returncode}: {cmd}')
time.sleep(0.01)
with open(self.pidfile, encoding='utf-8') as f:
self._pid = int(f.read().strip())
assert self._pid == self._p.pid
def stop(self, kill_signal=15):
self._p.send_signal(kill_signal)
self._p.wait()
self._p = None
try:
os.remove(self.pidfile)
except OSError:
pass
def __del__(self):
if self._p is not None:
self.stop(kill_signal=9)
def qemu_nbd(*args): def qemu_nbd(*args):
'''Run qemu-nbd in daemon mode and return the parent's exit code''' '''Run qemu-nbd in daemon mode and return the parent's exit code'''
return subprocess.call(qemu_nbd_args + ['--fork'] + list(args)) return subprocess.call(qemu_nbd_args + ['--fork'] + list(args))