Monitor, virtiofsd and migration pull

HMP cleanups
 Migration fixes
   Note the change in behaviour of not allowing a postmigrate migrtion
   rather than crashing
 
 Virtiofsd cleanups and fixes
   --thread-pool-size=0 for no thread pool (faster for some workloads)
 
 Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEERfXHG0oMt/uXep+pBRYzHrxb/ecFAl/chukACgkQBRYzHrxb
 /ec+pRAAhrAEsynNLQ+zEO2DHXBg4l5XVVv0o9mYy1omeIbDZIdIuNLcMzzjS6Sz
 S3wLw7Aqt0UeOI/o5A7o0Roie008HkeOAmqjeVQyhbo8D+cW7NCbw+vwYbRdeakA
 ushKSdbndyRjuLMiQDBzmBp5olTXpDm1AXupl8HbDfnBUbaSmHg4THiSXHq2Sasb
 C4YvTd04B3gri28m5xHuDNomnLyztm5RLfGZZoBISwi9lhgAls9Lg9XvAX8bSmow
 l1xuUW1vp84r3vhs7u+VjiLgmRjTyLNzqfYX9vbrkY+0tXP35NSPRO/4yrH/cPJx
 5HLUIQchiduIulWfHX1GiWoea3R4OqYyVgI8hqLxQoG/Xdq20Wk/Vlv4by7/7fNI
 kL52wuJseFJCX1jpQkAGH78R88uDreIUNnvfibbmFAH+0XpNEuuDp9/Q8tIYyKrk
 qHwPc0w76dMIXJGm0wqR3VyV1mWnU5AUEqtFNg6y7fhvVUzcUzmEX62JLuXR5+qN
 lT32zDbbtSbETjo/IyfJCfHni9J2AJKHbzMQO8w96Dz3UcwOjPBBWWTb+YdM08eF
 THzdeWaLjzApsWTZ6Y499CZfwRRQ6P6tlI/wpKx7PcvfPxHaM0+37n5A/pZRFCW3
 eJ4pUPlfnXlCZCr8YBqWsIC6pTj4xeb0F4ZktaS4PgDQkyA1pC4=
 =8Hmk
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/dgilbert/tags/pull-migration-20201218a' into staging

Monitor, virtiofsd and migration pull

HMP cleanups
Migration fixes
  Note the change in behaviour of not allowing a postmigrate migrtion
  rather than crashing

Virtiofsd cleanups and fixes
  --thread-pool-size=0 for no thread pool (faster for some workloads)

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>

# gpg: Signature made Fri 18 Dec 2020 10:39:37 GMT
# gpg:                using RSA key 45F5C71B4A0CB7FB977A9FA90516331EBC5BFDE7
# gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>" [full]
# Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A  9FA9 0516 331E BC5B FDE7

* remotes/dgilbert/tags/pull-migration-20201218a:
  migration: Don't allow migration if vm is in POSTMIGRATE
  savevm: Delete snapshots just created in case of error
  savevm: Remove dead code in save_snapshot()
  docs/devel/migration: Improve debugging section a bit
  virtiofsd: Remove useless code about send_notify_iov
  virtiofsd: update FUSE_FORGET comment on "lo_inode.nlookup"
  virtiofsd: Check file type in lo_flush()
  virtiofsd: Disable posix_lock hash table if remote locks are not enabled
  virtiofsd: Set up posix_lock hash table for root inode
  virtiofsd: make the debug log timestamp on stderr more human-readable
  virtiofsd: Use --thread-pool-size=0 to mean no thread pool
  hmp-commands.hx: List abbreviation after command for cont, quit, print
  monitor:Don't use '#' flag of printf format ('%#') in format strings
  monitor:braces {} are necessary for all arms of this statement
  monitor:open brace '{' following struct go on the same line

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2020-12-31 15:55:11 +00:00
commit 3fb340ccf5
9 changed files with 130 additions and 152 deletions

View File

@ -53,22 +53,23 @@ savevm/loadvm functionality.
Debugging
=========
The migration stream can be analyzed thanks to `scripts/analyze_migration.py`.
The migration stream can be analyzed thanks to `scripts/analyze-migration.py`.
Example usage:
.. code-block:: shell
$ qemu-system-x86_64
(qemu) migrate "exec:cat > mig"
$ ./scripts/analyze_migration.py -f mig
$ qemu-system-x86_64 -display none -monitor stdio
(qemu) migrate "exec:cat > mig"
(qemu) q
$ ./scripts/analyze-migration.py -f mig
{
"ram (3)": {
"section sizes": {
"pc.ram": "0x0000000008000000",
...
See also ``analyze_migration.py -h`` help for more options.
See also ``analyze-migration.py -h`` help for more options.
Common infrastructure
=====================

View File

@ -40,7 +40,7 @@ SRST
ERST
{
.name = "q|quit",
.name = "quit|q",
.args_type = "",
.params = "",
.help = "quit the emulator",
@ -49,7 +49,7 @@ ERST
},
SRST
``q`` or ``quit``
``quit`` or ``q``
Quit the emulator.
ERST
@ -401,7 +401,7 @@ SRST
ERST
{
.name = "c|cont",
.name = "cont|c",
.args_type = "",
.params = "",
.help = "resume emulation",
@ -409,7 +409,7 @@ ERST
},
SRST
``c`` or ``cont``
``cont`` or ``c``
Resume emulation.
ERST
@ -554,7 +554,7 @@ SRST
ERST
{
.name = "p|print",
.name = "print|p",
.args_type = "fmt:/,val:l",
.params = "/fmt expr",
.help = "print expression value (use $reg for CPU register access)",
@ -562,7 +562,7 @@ ERST
},
SRST
``p`` or ``print/``\ *fmt* *expr*
``print`` or ``p/``\ *fmt* *expr*
Print expression value. Only the *format* part of *fmt* is
used.
ERST

View File

@ -2102,6 +2102,12 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc,
return false;
}
if (runstate_check(RUN_STATE_POSTMIGRATE)) {
error_setg(errp, "Can't migrate the vm that was paused due to "
"previous migration");
return false;
}
if (migration_is_blocked(errp)) {
return false;
}

View File

@ -2728,7 +2728,7 @@ int qemu_load_device_state(QEMUFile *f)
int save_snapshot(const char *name, Error **errp)
{
BlockDriverState *bs, *bs1;
QEMUSnapshotInfo sn1, *sn = &sn1, old_sn1, *old_sn = &old_sn1;
QEMUSnapshotInfo sn1, *sn = &sn1;
int ret = -1, ret2;
QEMUFile *f;
int saved_vm_running;
@ -2797,13 +2797,7 @@ int save_snapshot(const char *name, Error **errp)
}
if (name) {
ret = bdrv_snapshot_find(bs, old_sn, name);
if (ret >= 0) {
pstrcpy(sn->name, sizeof(sn->name), old_sn->name);
pstrcpy(sn->id_str, sizeof(sn->id_str), old_sn->id_str);
} else {
pstrcpy(sn->name, sizeof(sn->name), name);
}
pstrcpy(sn->name, sizeof(sn->name), name);
} else {
/* cast below needed for OpenBSD where tv_sec is still 'long' */
localtime_r((const time_t *)&tv.tv_sec, &tm);
@ -2839,6 +2833,7 @@ int save_snapshot(const char *name, Error **errp)
if (ret < 0) {
error_setg(errp, "Error while creating snapshot on '%s'",
bdrv_get_device_or_node_name(bs));
bdrv_all_delete_snapshot(sn->name, &bs, NULL);
goto the_end;
}

View File

@ -1549,8 +1549,7 @@ end:
hmp_handle_error(mon, err);
}
typedef struct HMPMigrationStatus
{
typedef struct HMPMigrationStatus {
QEMUTimer *timer;
Monitor *mon;
bool is_block_migration;

View File

@ -492,8 +492,10 @@ static void hmp_singlestep(Monitor *mon, const QDict *qdict)
static void hmp_gdbserver(Monitor *mon, const QDict *qdict)
{
const char *device = qdict_get_try_str(qdict, "device");
if (!device)
if (!device) {
device = "tcp::" DEFAULT_GDBSTUB_PORT;
}
if (gdbserver_start(device) < 0) {
monitor_printf(mon, "Could not open gdbserver on device '%s'\n",
device);
@ -559,10 +561,11 @@ static void memory_dump(Monitor *mon, int count, int format, int wsize,
}
len = wsize * count;
if (wsize == 1)
if (wsize == 1) {
line_size = 8;
else
} else {
line_size = 16;
}
max_digits = 0;
switch(format) {
@ -583,10 +586,11 @@ static void memory_dump(Monitor *mon, int count, int format, int wsize,
}
while (len > 0) {
if (is_physical)
if (is_physical) {
monitor_printf(mon, TARGET_FMT_plx ":", addr);
else
} else {
monitor_printf(mon, TARGET_FMT_lx ":", (target_ulong)addr);
}
l = len;
if (l > line_size)
l = line_size;
@ -915,7 +919,7 @@ static void hmp_ioport_read(Monitor *mon, const QDict *qdict)
suffix = 'l';
break;
}
monitor_printf(mon, "port%c[0x%04x] = %#0*x\n",
monitor_printf(mon, "port%c[0x%04x] = 0x%0*x\n",
suffix, addr, size * 2, val);
}

View File

@ -2143,104 +2143,6 @@ static void do_destroy(fuse_req_t req, fuse_ino_t nodeid,
send_reply_ok(req, NULL, 0);
}
static int send_notify_iov(struct fuse_session *se, int notify_code,
struct iovec *iov, int count)
{
struct fuse_out_header out = {
.error = notify_code,
};
if (!se->got_init) {
return -ENOTCONN;
}
iov[0].iov_base = &out;
iov[0].iov_len = sizeof(struct fuse_out_header);
return fuse_send_msg(se, NULL, iov, count);
}
int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph)
{
if (ph != NULL) {
struct fuse_notify_poll_wakeup_out outarg = {
.kh = ph->kh,
};
struct iovec iov[2];
iov[1].iov_base = &outarg;
iov[1].iov_len = sizeof(outarg);
return send_notify_iov(ph->se, FUSE_NOTIFY_POLL, iov, 2);
} else {
return 0;
}
}
int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino,
off_t off, off_t len)
{
struct fuse_notify_inval_inode_out outarg = {
.ino = ino,
.off = off,
.len = len,
};
struct iovec iov[2];
if (!se) {
return -EINVAL;
}
iov[1].iov_base = &outarg;
iov[1].iov_len = sizeof(outarg);
return send_notify_iov(se, FUSE_NOTIFY_INVAL_INODE, iov, 2);
}
int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent,
const char *name, size_t namelen)
{
struct fuse_notify_inval_entry_out outarg = {
.parent = parent,
.namelen = namelen,
};
struct iovec iov[3];
if (!se) {
return -EINVAL;
}
iov[1].iov_base = &outarg;
iov[1].iov_len = sizeof(outarg);
iov[2].iov_base = (void *)name;
iov[2].iov_len = namelen + 1;
return send_notify_iov(se, FUSE_NOTIFY_INVAL_ENTRY, iov, 3);
}
int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent,
fuse_ino_t child, const char *name,
size_t namelen)
{
struct fuse_notify_delete_out outarg = {
.parent = parent,
.child = child,
.namelen = namelen,
};
struct iovec iov[3];
if (!se) {
return -EINVAL;
}
iov[1].iov_base = &outarg;
iov[1].iov_len = sizeof(outarg);
iov[2].iov_base = (void *)name;
iov[2].iov_len = namelen + 1;
return send_notify_iov(se, FUSE_NOTIFY_DELETE, iov, 3);
}
int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino,
off_t offset, struct fuse_bufvec *bufv)
{

View File

@ -578,13 +578,18 @@ static void *fv_queue_thread(void *opaque)
struct VuDev *dev = &qi->virtio_dev->dev;
struct VuVirtq *q = vu_get_queue(dev, qi->qidx);
struct fuse_session *se = qi->virtio_dev->se;
GThreadPool *pool;
GThreadPool *pool = NULL;
GList *req_list = NULL;
pool = g_thread_pool_new(fv_queue_worker, qi, se->thread_pool_size, FALSE,
NULL);
if (!pool) {
fuse_log(FUSE_LOG_ERR, "%s: g_thread_pool_new failed\n", __func__);
return NULL;
if (se->thread_pool_size) {
fuse_log(FUSE_LOG_DEBUG, "%s: Creating thread pool for Queue %d\n",
__func__, qi->qidx);
pool = g_thread_pool_new(fv_queue_worker, qi, se->thread_pool_size,
FALSE, NULL);
if (!pool) {
fuse_log(FUSE_LOG_ERR, "%s: g_thread_pool_new failed\n", __func__);
return NULL;
}
}
fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__,
@ -659,14 +664,27 @@ static void *fv_queue_thread(void *opaque)
req->reply_sent = false;
g_thread_pool_push(pool, req, NULL);
if (!se->thread_pool_size) {
req_list = g_list_prepend(req_list, req);
} else {
g_thread_pool_push(pool, req, NULL);
}
}
pthread_mutex_unlock(&qi->vq_lock);
pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock);
/* Process all the requests. */
if (!se->thread_pool_size && req_list != NULL) {
g_list_foreach(req_list, fv_queue_worker, qi);
g_list_free(req_list);
req_list = NULL;
}
}
g_thread_pool_free(pool, FALSE, TRUE);
if (pool) {
g_thread_pool_free(pool, FALSE, TRUE);
}
return NULL;
}

View File

@ -101,7 +101,7 @@ struct lo_inode {
* This counter keeps the inode alive during the FUSE session.
* Incremented when the FUSE inode number is sent in a reply
* (FUSE_LOOKUP, FUSE_READDIRPLUS, etc). Decremented when an inode is
* released by requests like FUSE_FORGET, FUSE_RMDIR, FUSE_RENAME, etc.
* released by a FUSE_FORGET request.
*
* Note that this value is untrusted because the client can manipulate
* it arbitrarily using FUSE_FORGET requests.
@ -902,10 +902,11 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
inode->key.ino = e->attr.st_ino;
inode->key.dev = e->attr.st_dev;
inode->key.mnt_id = mnt_id;
pthread_mutex_init(&inode->plock_mutex, NULL);
inode->posix_locks = g_hash_table_new_full(
g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy);
if (lo->posix_lock) {
pthread_mutex_init(&inode->plock_mutex, NULL);
inode->posix_locks = g_hash_table_new_full(
g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy);
}
pthread_mutex_lock(&lo->mutex);
inode->fuse_ino = lo_add_inode_mapping(req, inode);
g_hash_table_insert(lo->inodes, &inode->key, inode);
@ -1291,12 +1292,13 @@ static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n)
if (!inode->nlookup) {
lo_map_remove(&lo->ino_map, inode->fuse_ino);
g_hash_table_remove(lo->inodes, &inode->key);
if (g_hash_table_size(inode->posix_locks)) {
fuse_log(FUSE_LOG_WARNING, "Hash table is not empty\n");
if (lo->posix_lock) {
if (g_hash_table_size(inode->posix_locks)) {
fuse_log(FUSE_LOG_WARNING, "Hash table is not empty\n");
}
g_hash_table_destroy(inode->posix_locks);
pthread_mutex_destroy(&inode->plock_mutex);
}
g_hash_table_destroy(inode->posix_locks);
pthread_mutex_destroy(&inode->plock_mutex);
/* Drop our refcount from lo_do_lookup() */
lo_inode_put(lo, &inode);
}
@ -1772,6 +1774,11 @@ static void lo_getlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
ino, fi->flags, fi->lock_owner, lock->l_type, lock->l_start,
lock->l_len);
if (!lo->posix_lock) {
fuse_reply_err(req, ENOSYS);
return;
}
inode = lo_inode(req, ino);
if (!inode) {
fuse_reply_err(req, EBADF);
@ -1817,6 +1824,11 @@ static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
ino, fi->flags, lock->l_type, lock->l_pid, fi->lock_owner, sleep,
lock->l_whence, lock->l_start, lock->l_len);
if (!lo->posix_lock) {
fuse_reply_err(req, ENOSYS);
return;
}
if (sleep) {
fuse_reply_err(req, EOPNOTSUPP);
return;
@ -1941,6 +1953,7 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
int res;
(void)ino;
struct lo_inode *inode;
struct lo_data *lo = lo_data(req);
inode = lo_inode(req, ino);
if (!inode) {
@ -1948,13 +1961,21 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
return;
}
/* An fd is going away. Cleanup associated posix locks */
pthread_mutex_lock(&inode->plock_mutex);
g_hash_table_remove(inode->posix_locks, GUINT_TO_POINTER(fi->lock_owner));
pthread_mutex_unlock(&inode->plock_mutex);
if (!S_ISREG(inode->filetype)) {
lo_inode_put(lo, &inode);
fuse_reply_err(req, EBADF);
return;
}
/* An fd is going away. Cleanup associated posix locks */
if (lo->posix_lock) {
pthread_mutex_lock(&inode->plock_mutex);
g_hash_table_remove(inode->posix_locks,
GUINT_TO_POINTER(fi->lock_owner));
pthread_mutex_unlock(&inode->plock_mutex);
}
res = close(dup(lo_fi_fd(req, fi)));
lo_inode_put(lo_data(req), &inode);
lo_inode_put(lo, &inode);
fuse_reply_err(req, res == -1 ? errno : 0);
}
@ -3284,18 +3305,38 @@ static void setup_nofile_rlimit(unsigned long rlimit_nofile)
static void log_func(enum fuse_log_level level, const char *fmt, va_list ap)
{
g_autofree char *localfmt = NULL;
struct timespec ts;
struct tm tm;
char sec_fmt[sizeof "2020-12-07 18:17:54"];
char zone_fmt[sizeof "+0100"];
if (current_log_level < level) {
return;
}
if (current_log_level == FUSE_LOG_DEBUG) {
if (!use_syslog) {
localfmt = g_strdup_printf("[%" PRId64 "] [ID: %08ld] %s",
get_clock(), syscall(__NR_gettid), fmt);
} else {
if (use_syslog) {
/* no timestamp needed */
localfmt = g_strdup_printf("[ID: %08ld] %s", syscall(__NR_gettid),
fmt);
} else {
/* try formatting a broken-down timestamp */
if (clock_gettime(CLOCK_REALTIME, &ts) != -1 &&
localtime_r(&ts.tv_sec, &tm) != NULL &&
strftime(sec_fmt, sizeof sec_fmt, "%Y-%m-%d %H:%M:%S",
&tm) != 0 &&
strftime(zone_fmt, sizeof zone_fmt, "%z", &tm) != 0) {
localfmt = g_strdup_printf("[%s.%02ld%s] [ID: %08ld] %s",
sec_fmt,
ts.tv_nsec / (10L * 1000 * 1000),
zone_fmt, syscall(__NR_gettid),
fmt);
} else {
/* fall back to a flat timestamp */
localfmt = g_strdup_printf("[%" PRId64 "] [ID: %08ld] %s",
get_clock(), syscall(__NR_gettid),
fmt);
}
}
fmt = localfmt;
}
@ -3360,6 +3401,11 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root)
root->key.mnt_id = mnt_id;
root->nlookup = 2;
g_atomic_int_set(&root->refcount, 2);
if (lo->posix_lock) {
pthread_mutex_init(&root->plock_mutex, NULL);
root->posix_locks = g_hash_table_new_full(
g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy);
}
}
static guint lo_key_hash(gconstpointer key)
@ -3382,6 +3428,10 @@ static void fuse_lo_data_cleanup(struct lo_data *lo)
if (lo->inodes) {
g_hash_table_destroy(lo->inodes);
}
if (lo->root.posix_locks) {
g_hash_table_destroy(lo->root.posix_locks);
}
lo_map_destroy(&lo->fd_map);
lo_map_destroy(&lo->dirp_map);
lo_map_destroy(&lo->ino_map);
@ -3416,6 +3466,9 @@ int main(int argc, char *argv[])
struct lo_map_elem *reserve_elem;
int ret = -1;
/* Initialize time conversion information for localtime_r(). */
tzset();
/* Don't mask creation mode, kernel already did that */
umask(0);