aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2020-12-31 15:55:11 +0000
committerPeter Maydell <peter.maydell@linaro.org>2020-12-31 15:55:11 +0000
commit3fb340ccf5f8385088a3d3b0e07763a8f5b85f4a (patch)
treea67fe3f2d895796d03ba7b76aa58d74fc6b3f98e
parent091774bfdee2b4f7dfd570061a200dfdc54374a6 (diff)
parent36d0fe65160d83cb065de9b6fe60114ee127d9f0 (diff)
downloadqemu-3fb340ccf5f8385088a3d3b0e07763a8f5b85f4a.zip
qemu-3fb340ccf5f8385088a3d3b0e07763a8f5b85f4a.tar.gz
qemu-3fb340ccf5f8385088a3d3b0e07763a8f5b85f4a.tar.bz2
Merge remote-tracking branch 'remotes/dgilbert/tags/pull-migration-20201218a' into staging
Monitor, virtiofsd and migration pull HMP cleanups Migration fixes Note the change in behaviour of not allowing a postmigrate migrtion rather than crashing Virtiofsd cleanups and fixes --thread-pool-size=0 for no thread pool (faster for some workloads) Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> # gpg: Signature made Fri 18 Dec 2020 10:39:37 GMT # gpg: using RSA key 45F5C71B4A0CB7FB977A9FA90516331EBC5BFDE7 # gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>" [full] # Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A 9FA9 0516 331E BC5B FDE7 * remotes/dgilbert/tags/pull-migration-20201218a: migration: Don't allow migration if vm is in POSTMIGRATE savevm: Delete snapshots just created in case of error savevm: Remove dead code in save_snapshot() docs/devel/migration: Improve debugging section a bit virtiofsd: Remove useless code about send_notify_iov virtiofsd: update FUSE_FORGET comment on "lo_inode.nlookup" virtiofsd: Check file type in lo_flush() virtiofsd: Disable posix_lock hash table if remote locks are not enabled virtiofsd: Set up posix_lock hash table for root inode virtiofsd: make the debug log timestamp on stderr more human-readable virtiofsd: Use --thread-pool-size=0 to mean no thread pool hmp-commands.hx: List abbreviation after command for cont, quit, print monitor:Don't use '#' flag of printf format ('%#') in format strings monitor:braces {} are necessary for all arms of this statement monitor:open brace '{' following struct go on the same line Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--docs/devel/migration.rst11
-rw-r--r--hmp-commands.hx12
-rw-r--r--migration/migration.c6
-rw-r--r--migration/savevm.c11
-rw-r--r--monitor/hmp-cmds.c3
-rw-r--r--monitor/misc.c16
-rw-r--r--tools/virtiofsd/fuse_lowlevel.c98
-rw-r--r--tools/virtiofsd/fuse_virtio.c36
-rw-r--r--tools/virtiofsd/passthrough_ll.c91
9 files changed, 131 insertions, 153 deletions
diff --git a/docs/devel/migration.rst b/docs/devel/migration.rst
index 49112bb..ad381b8 100644
--- a/docs/devel/migration.rst
+++ b/docs/devel/migration.rst
@@ -53,22 +53,23 @@ savevm/loadvm functionality.
Debugging
=========
-The migration stream can be analyzed thanks to `scripts/analyze_migration.py`.
+The migration stream can be analyzed thanks to `scripts/analyze-migration.py`.
Example usage:
.. code-block:: shell
- $ qemu-system-x86_64
- (qemu) migrate "exec:cat > mig"
- $ ./scripts/analyze_migration.py -f mig
+ $ qemu-system-x86_64 -display none -monitor stdio
+ (qemu) migrate "exec:cat > mig"
+ (qemu) q
+ $ ./scripts/analyze-migration.py -f mig
{
"ram (3)": {
"section sizes": {
"pc.ram": "0x0000000008000000",
...
-See also ``analyze_migration.py -h`` help for more options.
+See also ``analyze-migration.py -h`` help for more options.
Common infrastructure
=====================
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 470a420..73e0832 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -40,7 +40,7 @@ SRST
ERST
{
- .name = "q|quit",
+ .name = "quit|q",
.args_type = "",
.params = "",
.help = "quit the emulator",
@@ -49,7 +49,7 @@ ERST
},
SRST
-``q`` or ``quit``
+``quit`` or ``q``
Quit the emulator.
ERST
@@ -401,7 +401,7 @@ SRST
ERST
{
- .name = "c|cont",
+ .name = "cont|c",
.args_type = "",
.params = "",
.help = "resume emulation",
@@ -409,7 +409,7 @@ ERST
},
SRST
-``c`` or ``cont``
+``cont`` or ``c``
Resume emulation.
ERST
@@ -554,7 +554,7 @@ SRST
ERST
{
- .name = "p|print",
+ .name = "print|p",
.args_type = "fmt:/,val:l",
.params = "/fmt expr",
.help = "print expression value (use $reg for CPU register access)",
@@ -562,7 +562,7 @@ ERST
},
SRST
-``p`` or ``print/``\ *fmt* *expr*
+``print`` or ``p/``\ *fmt* *expr*
Print expression value. Only the *format* part of *fmt* is
used.
ERST
diff --git a/migration/migration.c b/migration/migration.c
index e0dbde4..f5d4a52 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2102,6 +2102,12 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc,
return false;
}
+ if (runstate_check(RUN_STATE_POSTMIGRATE)) {
+ error_setg(errp, "Can't migrate the vm that was paused due to "
+ "previous migration");
+ return false;
+ }
+
if (migration_is_blocked(errp)) {
return false;
}
diff --git a/migration/savevm.c b/migration/savevm.c
index 5f937a2..4a18c9d 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -2728,7 +2728,7 @@ int qemu_load_device_state(QEMUFile *f)
int save_snapshot(const char *name, Error **errp)
{
BlockDriverState *bs, *bs1;
- QEMUSnapshotInfo sn1, *sn = &sn1, old_sn1, *old_sn = &old_sn1;
+ QEMUSnapshotInfo sn1, *sn = &sn1;
int ret = -1, ret2;
QEMUFile *f;
int saved_vm_running;
@@ -2797,13 +2797,7 @@ int save_snapshot(const char *name, Error **errp)
}
if (name) {
- ret = bdrv_snapshot_find(bs, old_sn, name);
- if (ret >= 0) {
- pstrcpy(sn->name, sizeof(sn->name), old_sn->name);
- pstrcpy(sn->id_str, sizeof(sn->id_str), old_sn->id_str);
- } else {
- pstrcpy(sn->name, sizeof(sn->name), name);
- }
+ pstrcpy(sn->name, sizeof(sn->name), name);
} else {
/* cast below needed for OpenBSD where tv_sec is still 'long' */
localtime_r((const time_t *)&tv.tv_sec, &tm);
@@ -2839,6 +2833,7 @@ int save_snapshot(const char *name, Error **errp)
if (ret < 0) {
error_setg(errp, "Error while creating snapshot on '%s'",
bdrv_get_device_or_node_name(bs));
+ bdrv_all_delete_snapshot(sn->name, &bs, NULL);
goto the_end;
}
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 65d8ff4..79c8432 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -1549,8 +1549,7 @@ end:
hmp_handle_error(mon, err);
}
-typedef struct HMPMigrationStatus
-{
+typedef struct HMPMigrationStatus {
QEMUTimer *timer;
Monitor *mon;
bool is_block_migration;
diff --git a/monitor/misc.c b/monitor/misc.c
index fde6e36..6f5ae09 100644
--- a/monitor/misc.c
+++ b/monitor/misc.c
@@ -492,8 +492,10 @@ static void hmp_singlestep(Monitor *mon, const QDict *qdict)
static void hmp_gdbserver(Monitor *mon, const QDict *qdict)
{
const char *device = qdict_get_try_str(qdict, "device");
- if (!device)
+ if (!device) {
device = "tcp::" DEFAULT_GDBSTUB_PORT;
+ }
+
if (gdbserver_start(device) < 0) {
monitor_printf(mon, "Could not open gdbserver on device '%s'\n",
device);
@@ -559,10 +561,11 @@ static void memory_dump(Monitor *mon, int count, int format, int wsize,
}
len = wsize * count;
- if (wsize == 1)
+ if (wsize == 1) {
line_size = 8;
- else
+ } else {
line_size = 16;
+ }
max_digits = 0;
switch(format) {
@@ -583,10 +586,11 @@ static void memory_dump(Monitor *mon, int count, int format, int wsize,
}
while (len > 0) {
- if (is_physical)
+ if (is_physical) {
monitor_printf(mon, TARGET_FMT_plx ":", addr);
- else
+ } else {
monitor_printf(mon, TARGET_FMT_lx ":", (target_ulong)addr);
+ }
l = len;
if (l > line_size)
l = line_size;
@@ -915,7 +919,7 @@ static void hmp_ioport_read(Monitor *mon, const QDict *qdict)
suffix = 'l';
break;
}
- monitor_printf(mon, "port%c[0x%04x] = %#0*x\n",
+ monitor_printf(mon, "port%c[0x%04x] = 0x%0*x\n",
suffix, addr, size * 2, val);
}
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
index d4119e9..e94b711 100644
--- a/tools/virtiofsd/fuse_lowlevel.c
+++ b/tools/virtiofsd/fuse_lowlevel.c
@@ -2143,104 +2143,6 @@ static void do_destroy(fuse_req_t req, fuse_ino_t nodeid,
send_reply_ok(req, NULL, 0);
}
-static int send_notify_iov(struct fuse_session *se, int notify_code,
- struct iovec *iov, int count)
-{
- struct fuse_out_header out = {
- .error = notify_code,
- };
-
- if (!se->got_init) {
- return -ENOTCONN;
- }
-
- iov[0].iov_base = &out;
- iov[0].iov_len = sizeof(struct fuse_out_header);
-
- return fuse_send_msg(se, NULL, iov, count);
-}
-
-int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph)
-{
- if (ph != NULL) {
- struct fuse_notify_poll_wakeup_out outarg = {
- .kh = ph->kh,
- };
- struct iovec iov[2];
-
- iov[1].iov_base = &outarg;
- iov[1].iov_len = sizeof(outarg);
-
- return send_notify_iov(ph->se, FUSE_NOTIFY_POLL, iov, 2);
- } else {
- return 0;
- }
-}
-
-int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino,
- off_t off, off_t len)
-{
- struct fuse_notify_inval_inode_out outarg = {
- .ino = ino,
- .off = off,
- .len = len,
- };
- struct iovec iov[2];
-
- if (!se) {
- return -EINVAL;
- }
-
- iov[1].iov_base = &outarg;
- iov[1].iov_len = sizeof(outarg);
-
- return send_notify_iov(se, FUSE_NOTIFY_INVAL_INODE, iov, 2);
-}
-
-int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent,
- const char *name, size_t namelen)
-{
- struct fuse_notify_inval_entry_out outarg = {
- .parent = parent,
- .namelen = namelen,
- };
- struct iovec iov[3];
-
- if (!se) {
- return -EINVAL;
- }
-
- iov[1].iov_base = &outarg;
- iov[1].iov_len = sizeof(outarg);
- iov[2].iov_base = (void *)name;
- iov[2].iov_len = namelen + 1;
-
- return send_notify_iov(se, FUSE_NOTIFY_INVAL_ENTRY, iov, 3);
-}
-
-int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent,
- fuse_ino_t child, const char *name,
- size_t namelen)
-{
- struct fuse_notify_delete_out outarg = {
- .parent = parent,
- .child = child,
- .namelen = namelen,
- };
- struct iovec iov[3];
-
- if (!se) {
- return -EINVAL;
- }
-
- iov[1].iov_base = &outarg;
- iov[1].iov_len = sizeof(outarg);
- iov[2].iov_base = (void *)name;
- iov[2].iov_len = namelen + 1;
-
- return send_notify_iov(se, FUSE_NOTIFY_DELETE, iov, 3);
-}
-
int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino,
off_t offset, struct fuse_bufvec *bufv)
{
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
index b264dcb..ddcefee 100644
--- a/tools/virtiofsd/fuse_virtio.c
+++ b/tools/virtiofsd/fuse_virtio.c
@@ -578,13 +578,18 @@ static void *fv_queue_thread(void *opaque)
struct VuDev *dev = &qi->virtio_dev->dev;
struct VuVirtq *q = vu_get_queue(dev, qi->qidx);
struct fuse_session *se = qi->virtio_dev->se;
- GThreadPool *pool;
-
- pool = g_thread_pool_new(fv_queue_worker, qi, se->thread_pool_size, FALSE,
- NULL);
- if (!pool) {
- fuse_log(FUSE_LOG_ERR, "%s: g_thread_pool_new failed\n", __func__);
- return NULL;
+ GThreadPool *pool = NULL;
+ GList *req_list = NULL;
+
+ if (se->thread_pool_size) {
+ fuse_log(FUSE_LOG_DEBUG, "%s: Creating thread pool for Queue %d\n",
+ __func__, qi->qidx);
+ pool = g_thread_pool_new(fv_queue_worker, qi, se->thread_pool_size,
+ FALSE, NULL);
+ if (!pool) {
+ fuse_log(FUSE_LOG_ERR, "%s: g_thread_pool_new failed\n", __func__);
+ return NULL;
+ }
}
fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__,
@@ -659,14 +664,27 @@ static void *fv_queue_thread(void *opaque)
req->reply_sent = false;
- g_thread_pool_push(pool, req, NULL);
+ if (!se->thread_pool_size) {
+ req_list = g_list_prepend(req_list, req);
+ } else {
+ g_thread_pool_push(pool, req, NULL);
+ }
}
pthread_mutex_unlock(&qi->vq_lock);
pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock);
+
+ /* Process all the requests. */
+ if (!se->thread_pool_size && req_list != NULL) {
+ g_list_foreach(req_list, fv_queue_worker, qi);
+ g_list_free(req_list);
+ req_list = NULL;
+ }
}
- g_thread_pool_free(pool, FALSE, TRUE);
+ if (pool) {
+ g_thread_pool_free(pool, FALSE, TRUE);
+ }
return NULL;
}
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 12de321..5fb36d9 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -101,7 +101,7 @@ struct lo_inode {
* This counter keeps the inode alive during the FUSE session.
* Incremented when the FUSE inode number is sent in a reply
* (FUSE_LOOKUP, FUSE_READDIRPLUS, etc). Decremented when an inode is
- * released by requests like FUSE_FORGET, FUSE_RMDIR, FUSE_RENAME, etc.
+ * released by a FUSE_FORGET request.
*
* Note that this value is untrusted because the client can manipulate
* it arbitrarily using FUSE_FORGET requests.
@@ -902,10 +902,11 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
inode->key.ino = e->attr.st_ino;
inode->key.dev = e->attr.st_dev;
inode->key.mnt_id = mnt_id;
- pthread_mutex_init(&inode->plock_mutex, NULL);
- inode->posix_locks = g_hash_table_new_full(
- g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy);
-
+ if (lo->posix_lock) {
+ pthread_mutex_init(&inode->plock_mutex, NULL);
+ inode->posix_locks = g_hash_table_new_full(
+ g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy);
+ }
pthread_mutex_lock(&lo->mutex);
inode->fuse_ino = lo_add_inode_mapping(req, inode);
g_hash_table_insert(lo->inodes, &inode->key, inode);
@@ -1291,12 +1292,13 @@ static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n)
if (!inode->nlookup) {
lo_map_remove(&lo->ino_map, inode->fuse_ino);
g_hash_table_remove(lo->inodes, &inode->key);
- if (g_hash_table_size(inode->posix_locks)) {
- fuse_log(FUSE_LOG_WARNING, "Hash table is not empty\n");
+ if (lo->posix_lock) {
+ if (g_hash_table_size(inode->posix_locks)) {
+ fuse_log(FUSE_LOG_WARNING, "Hash table is not empty\n");
+ }
+ g_hash_table_destroy(inode->posix_locks);
+ pthread_mutex_destroy(&inode->plock_mutex);
}
- g_hash_table_destroy(inode->posix_locks);
- pthread_mutex_destroy(&inode->plock_mutex);
-
/* Drop our refcount from lo_do_lookup() */
lo_inode_put(lo, &inode);
}
@@ -1772,6 +1774,11 @@ static void lo_getlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
ino, fi->flags, fi->lock_owner, lock->l_type, lock->l_start,
lock->l_len);
+ if (!lo->posix_lock) {
+ fuse_reply_err(req, ENOSYS);
+ return;
+ }
+
inode = lo_inode(req, ino);
if (!inode) {
fuse_reply_err(req, EBADF);
@@ -1817,6 +1824,11 @@ static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
ino, fi->flags, lock->l_type, lock->l_pid, fi->lock_owner, sleep,
lock->l_whence, lock->l_start, lock->l_len);
+ if (!lo->posix_lock) {
+ fuse_reply_err(req, ENOSYS);
+ return;
+ }
+
if (sleep) {
fuse_reply_err(req, EOPNOTSUPP);
return;
@@ -1941,6 +1953,7 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
int res;
(void)ino;
struct lo_inode *inode;
+ struct lo_data *lo = lo_data(req);
inode = lo_inode(req, ino);
if (!inode) {
@@ -1948,13 +1961,21 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
return;
}
- /* An fd is going away. Cleanup associated posix locks */
- pthread_mutex_lock(&inode->plock_mutex);
- g_hash_table_remove(inode->posix_locks, GUINT_TO_POINTER(fi->lock_owner));
- pthread_mutex_unlock(&inode->plock_mutex);
+ if (!S_ISREG(inode->filetype)) {
+ lo_inode_put(lo, &inode);
+ fuse_reply_err(req, EBADF);
+ return;
+ }
+ /* An fd is going away. Cleanup associated posix locks */
+ if (lo->posix_lock) {
+ pthread_mutex_lock(&inode->plock_mutex);
+ g_hash_table_remove(inode->posix_locks,
+ GUINT_TO_POINTER(fi->lock_owner));
+ pthread_mutex_unlock(&inode->plock_mutex);
+ }
res = close(dup(lo_fi_fd(req, fi)));
- lo_inode_put(lo_data(req), &inode);
+ lo_inode_put(lo, &inode);
fuse_reply_err(req, res == -1 ? errno : 0);
}
@@ -3284,18 +3305,38 @@ static void setup_nofile_rlimit(unsigned long rlimit_nofile)
static void log_func(enum fuse_log_level level, const char *fmt, va_list ap)
{
g_autofree char *localfmt = NULL;
+ struct timespec ts;
+ struct tm tm;
+ char sec_fmt[sizeof "2020-12-07 18:17:54"];
+ char zone_fmt[sizeof "+0100"];
if (current_log_level < level) {
return;
}
if (current_log_level == FUSE_LOG_DEBUG) {
- if (!use_syslog) {
- localfmt = g_strdup_printf("[%" PRId64 "] [ID: %08ld] %s",
- get_clock(), syscall(__NR_gettid), fmt);
- } else {
+ if (use_syslog) {
+ /* no timestamp needed */
localfmt = g_strdup_printf("[ID: %08ld] %s", syscall(__NR_gettid),
fmt);
+ } else {
+ /* try formatting a broken-down timestamp */
+ if (clock_gettime(CLOCK_REALTIME, &ts) != -1 &&
+ localtime_r(&ts.tv_sec, &tm) != NULL &&
+ strftime(sec_fmt, sizeof sec_fmt, "%Y-%m-%d %H:%M:%S",
+ &tm) != 0 &&
+ strftime(zone_fmt, sizeof zone_fmt, "%z", &tm) != 0) {
+ localfmt = g_strdup_printf("[%s.%02ld%s] [ID: %08ld] %s",
+ sec_fmt,
+ ts.tv_nsec / (10L * 1000 * 1000),
+ zone_fmt, syscall(__NR_gettid),
+ fmt);
+ } else {
+ /* fall back to a flat timestamp */
+ localfmt = g_strdup_printf("[%" PRId64 "] [ID: %08ld] %s",
+ get_clock(), syscall(__NR_gettid),
+ fmt);
+ }
}
fmt = localfmt;
}
@@ -3360,6 +3401,11 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root)
root->key.mnt_id = mnt_id;
root->nlookup = 2;
g_atomic_int_set(&root->refcount, 2);
+ if (lo->posix_lock) {
+ pthread_mutex_init(&root->plock_mutex, NULL);
+ root->posix_locks = g_hash_table_new_full(
+ g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy);
+ }
}
static guint lo_key_hash(gconstpointer key)
@@ -3382,6 +3428,10 @@ static void fuse_lo_data_cleanup(struct lo_data *lo)
if (lo->inodes) {
g_hash_table_destroy(lo->inodes);
}
+
+ if (lo->root.posix_locks) {
+ g_hash_table_destroy(lo->root.posix_locks);
+ }
lo_map_destroy(&lo->fd_map);
lo_map_destroy(&lo->dirp_map);
lo_map_destroy(&lo->ino_map);
@@ -3416,6 +3466,9 @@ int main(int argc, char *argv[])
struct lo_map_elem *reserve_elem;
int ret = -1;
+ /* Initialize time conversion information for localtime_r(). */
+ tzset();
+
/* Don't mask creation mode, kernel already did that */
umask(0);