From 03556ea920b23c466ce7c1283199033de33ee671 Mon Sep 17 00:00:00 2001 From: Dan Robertson Date: Mon, 25 May 2020 10:38:03 +0200 Subject: 9pfs: include linux/limits.h for XATTR_SIZE_MAX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit linux/limits.h should be included for the XATTR_SIZE_MAX definition used by v9fs_xattrcreate. Fixes: 3b79ef2cf488 ("9pfs: limit xattr size in xattrcreate") Signed-off-by: Dan Robertson Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Christian Schoenebeck Message-Id: <20200515203015.7090-2-dan@dlrobertson.com> Signed-off-by: Greg Kurz --- hw/9pfs/9p.c | 1 + 1 file changed, 1 insertion(+) (limited to 'hw') diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c index a2a14b5..68c2df7 100644 --- a/hw/9pfs/9p.c +++ b/hw/9pfs/9p.c @@ -28,6 +28,7 @@ #include "sysemu/qtest.h" #include "qemu/xxhash.h" #include +#include int open_fd_hw; int total_open_fd; -- cgit v1.1 From ed463454efd0ac3042ff772bfe1b1d846dc281a5 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Mon, 25 May 2020 10:38:03 +0200 Subject: 9p: Lock directory streams with a CoMutex Locking was introduced in QEMU 2.7 to address the deprecation of readdir_r(3) in glibc 2.24. It turns out that the frontend code is the worst place to handle a critical section with a pthread mutex: the code runs in a coroutine on behalf of the QEMU mainloop and then yields control, waiting for the fsdev backend to process the request in a worker thread. If the client resends another readdir request for the same fid before the previous one finally unlocked the mutex, we're deadlocked. This never bit us because the linux client serializes readdir requests for the same fid, but it is quite easy to demonstrate with a custom client. A good solution could be to narrow the critical section in the worker thread code and to return a copy of the dirent to the frontend, but this causes quite some changes in both 9p.c and codir.c. So, instead of that, in order for people to easily backport the fix to older QEMU versions, let's simply use a CoMutex since all the users for this sit in coroutines. Fixes: 7cde47d4a89d ("9p: add locking to V9fsDir") Reviewed-by: Christian Schoenebeck Message-Id: <158981894794.109297.3530035833368944254.stgit@bahia.lan> Signed-off-by: Greg Kurz --- hw/9pfs/9p.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'hw') diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h index dd1c6cb..3ab5807 100644 --- a/hw/9pfs/9p.h +++ b/hw/9pfs/9p.h @@ -197,22 +197,22 @@ typedef struct V9fsXattr typedef struct V9fsDir { DIR *stream; - QemuMutex readdir_mutex; + CoMutex readdir_mutex; } V9fsDir; static inline void v9fs_readdir_lock(V9fsDir *dir) { - qemu_mutex_lock(&dir->readdir_mutex); + qemu_co_mutex_lock(&dir->readdir_mutex); } static inline void v9fs_readdir_unlock(V9fsDir *dir) { - qemu_mutex_unlock(&dir->readdir_mutex); + qemu_co_mutex_unlock(&dir->readdir_mutex); } static inline void v9fs_readdir_init(V9fsDir *dir) { - qemu_mutex_init(&dir->readdir_mutex); + qemu_co_mutex_init(&dir->readdir_mutex); } /* -- cgit v1.1 From cf45183b718f02b1369e18c795dc51bc1821245d Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 21 May 2020 12:26:25 -0700 Subject: Revert "9p: init_in_iov_from_pdu can truncate the size" This reverts commit 16724a173049ac29c7b5ade741da93a0f46edff7. It causes https://bugs.launchpad.net/bugs/1877688. Signed-off-by: Stefano Stabellini Reviewed-by: Christian Schoenebeck Message-Id: <20200521192627.15259-1-sstabellini@kernel.org> Signed-off-by: Greg Kurz --- hw/9pfs/9p.c | 33 +++++++++++---------------------- hw/9pfs/9p.h | 2 +- hw/9pfs/virtio-9p-device.c | 11 ++++------- hw/9pfs/xen-9p-backend.c | 15 ++++++--------- 4 files changed, 22 insertions(+), 39 deletions(-) (limited to 'hw') diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c index 68c2df7..45a788f 100644 --- a/hw/9pfs/9p.c +++ b/hw/9pfs/9p.c @@ -2103,29 +2103,22 @@ out_nofid: * with qemu_iovec_destroy(). */ static void v9fs_init_qiov_from_pdu(QEMUIOVector *qiov, V9fsPDU *pdu, - size_t skip, size_t *size, + size_t skip, size_t size, bool is_write) { QEMUIOVector elem; struct iovec *iov; unsigned int niov; - size_t alloc_size = *size + skip; if (is_write) { - pdu->s->transport->init_out_iov_from_pdu(pdu, &iov, &niov, alloc_size); + pdu->s->transport->init_out_iov_from_pdu(pdu, &iov, &niov, size + skip); } else { - pdu->s->transport->init_in_iov_from_pdu(pdu, &iov, &niov, &alloc_size); - } - - if (alloc_size < skip) { - *size = 0; - } else { - *size = alloc_size - skip; + pdu->s->transport->init_in_iov_from_pdu(pdu, &iov, &niov, size + skip); } qemu_iovec_init_external(&elem, iov, niov); qemu_iovec_init(qiov, niov); - qemu_iovec_concat(qiov, &elem, skip, *size); + qemu_iovec_concat(qiov, &elem, skip, size); } static int v9fs_xattr_read(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp, @@ -2133,14 +2126,15 @@ static int v9fs_xattr_read(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp, { ssize_t err; size_t offset = 7; - size_t read_count; + uint64_t read_count; QEMUIOVector qiov_full; if (fidp->fs.xattr.len < off) { read_count = 0; - } else if (fidp->fs.xattr.len - off < max_count) { - read_count = fidp->fs.xattr.len - off; } else { + read_count = fidp->fs.xattr.len - off; + } + if (read_count > max_count) { read_count = max_count; } err = pdu_marshal(pdu, offset, "d", read_count); @@ -2149,7 +2143,7 @@ static int v9fs_xattr_read(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp, } offset += err; - v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, &read_count, false); + v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, read_count, false); err = v9fs_pack(qiov_full.iov, qiov_full.niov, 0, ((char *)fidp->fs.xattr.value) + off, read_count); @@ -2278,11 +2272,9 @@ static void coroutine_fn v9fs_read(void *opaque) QEMUIOVector qiov_full; QEMUIOVector qiov; int32_t len; - size_t size = max_count; - v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset + 4, &size, false); + v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset + 4, max_count, false); qemu_iovec_init(&qiov, qiov_full.niov); - max_count = size; do { qemu_iovec_reset(&qiov); qemu_iovec_concat(&qiov, &qiov_full, count, qiov_full.size - count); @@ -2533,7 +2525,6 @@ static void coroutine_fn v9fs_write(void *opaque) int32_t len = 0; int32_t total = 0; size_t offset = 7; - size_t size; V9fsFidState *fidp; V9fsPDU *pdu = opaque; V9fsState *s = pdu->s; @@ -2546,9 +2537,7 @@ static void coroutine_fn v9fs_write(void *opaque) return; } offset += err; - size = count; - v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, &size, true); - count = size; + v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, count, true); trace_v9fs_write(pdu->tag, pdu->id, fid, off, count, qiov_full.niov); fidp = get_fid(pdu, fid); diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h index 3ab5807..ee22716 100644 --- a/hw/9pfs/9p.h +++ b/hw/9pfs/9p.h @@ -436,7 +436,7 @@ struct V9fsTransport { ssize_t (*pdu_vunmarshal)(V9fsPDU *pdu, size_t offset, const char *fmt, va_list ap); void (*init_in_iov_from_pdu)(V9fsPDU *pdu, struct iovec **piov, - unsigned int *pniov, size_t *size); + unsigned int *pniov, size_t size); void (*init_out_iov_from_pdu)(V9fsPDU *pdu, struct iovec **piov, unsigned int *pniov, size_t size); void (*push_and_notify)(V9fsPDU *pdu); diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c index e5b4497..36f3aa9 100644 --- a/hw/9pfs/virtio-9p-device.c +++ b/hw/9pfs/virtio-9p-device.c @@ -147,22 +147,19 @@ static ssize_t virtio_pdu_vunmarshal(V9fsPDU *pdu, size_t offset, } static void virtio_init_in_iov_from_pdu(V9fsPDU *pdu, struct iovec **piov, - unsigned int *pniov, size_t *size) + unsigned int *pniov, size_t size) { V9fsState *s = pdu->s; V9fsVirtioState *v = container_of(s, V9fsVirtioState, state); VirtQueueElement *elem = v->elems[pdu->idx]; size_t buf_size = iov_size(elem->in_sg, elem->in_num); - if (buf_size < P9_IOHDRSZ) { + if (buf_size < size) { VirtIODevice *vdev = VIRTIO_DEVICE(v); virtio_error(vdev, - "VirtFS reply type %d needs %zu bytes, buffer has %zu, less than minimum", - pdu->id + 1, *size, buf_size); - } - if (buf_size < *size) { - *size = buf_size; + "VirtFS reply type %d needs %zu bytes, buffer has %zu", + pdu->id + 1, size, buf_size); } *piov = elem->in_sg; diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c index f04caab..fc197f6 100644 --- a/hw/9pfs/xen-9p-backend.c +++ b/hw/9pfs/xen-9p-backend.c @@ -188,7 +188,7 @@ static void xen_9pfs_init_out_iov_from_pdu(V9fsPDU *pdu, static void xen_9pfs_init_in_iov_from_pdu(V9fsPDU *pdu, struct iovec **piov, unsigned int *pniov, - size_t *size) + size_t size) { Xen9pfsDev *xen_9pfs = container_of(pdu->s, Xen9pfsDev, state); Xen9pfsRing *ring = &xen_9pfs->rings[pdu->tag % xen_9pfs->num_rings]; @@ -198,19 +198,16 @@ static void xen_9pfs_init_in_iov_from_pdu(V9fsPDU *pdu, g_free(ring->sg); ring->sg = g_new0(struct iovec, 2); - xen_9pfs_in_sg(ring, ring->sg, &num, pdu->idx, *size); + xen_9pfs_in_sg(ring, ring->sg, &num, pdu->idx, size); buf_size = iov_size(ring->sg, num); - if (buf_size < P9_IOHDRSZ) { - xen_pv_printf(&xen_9pfs->xendev, 0, "Xen 9pfs reply type %d needs " - "%zu bytes, buffer has %zu, less than minimum\n", - pdu->id + 1, *size, buf_size); + if (buf_size < size) { + xen_pv_printf(&xen_9pfs->xendev, 0, "Xen 9pfs request type %d" + "needs %zu bytes, buffer has %zu\n", pdu->id, size, + buf_size); xen_be_set_state(&xen_9pfs->xendev, XenbusStateClosing); xen_9pfs_disconnect(&xen_9pfs->xendev); } - if (buf_size < *size) { - *size = buf_size; - } *piov = ring->sg; *pniov = num; -- cgit v1.1 From a4c4d462729466c4756bac8a0a8d77eb63b21ef7 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 21 May 2020 12:26:26 -0700 Subject: xen/9pfs: yield when there isn't enough room on the ring Instead of truncating replies, which is problematic, wait until the client reads more data and frees bytes on the reply ring. Do that by calling qemu_coroutine_yield(). The corresponding qemu_coroutine_enter_if_inactive() is called from xen_9pfs_bh upon receiving the next notification from the client. We need to be careful to avoid races in case xen_9pfs_bh and the coroutine are both active at the same time. In xen_9pfs_bh, wait until either the critical section is over (ring->co == NULL) or until the coroutine becomes inactive (qemu_coroutine_yield() was called) before continuing. Then, simply wake up the coroutine if it is inactive. Signed-off-by: Stefano Stabellini Reviewed-by: Christian Schoenebeck Message-Id: <20200521192627.15259-2-sstabellini@kernel.org> Signed-off-by: Greg Kurz --- hw/9pfs/xen-9p-backend.c | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) (limited to 'hw') diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c index fc197f6..3c84c86 100644 --- a/hw/9pfs/xen-9p-backend.c +++ b/hw/9pfs/xen-9p-backend.c @@ -37,6 +37,7 @@ typedef struct Xen9pfsRing { struct iovec *sg; QEMUBH *bh; + Coroutine *co; /* local copies, so that we can read/write PDU data directly from * the ring */ @@ -198,16 +199,20 @@ static void xen_9pfs_init_in_iov_from_pdu(V9fsPDU *pdu, g_free(ring->sg); ring->sg = g_new0(struct iovec, 2); - xen_9pfs_in_sg(ring, ring->sg, &num, pdu->idx, size); + ring->co = qemu_coroutine_self(); + /* make sure other threads see ring->co changes before continuing */ + smp_wmb(); +again: + xen_9pfs_in_sg(ring, ring->sg, &num, pdu->idx, size); buf_size = iov_size(ring->sg, num); if (buf_size < size) { - xen_pv_printf(&xen_9pfs->xendev, 0, "Xen 9pfs request type %d" - "needs %zu bytes, buffer has %zu\n", pdu->id, size, - buf_size); - xen_be_set_state(&xen_9pfs->xendev, XenbusStateClosing); - xen_9pfs_disconnect(&xen_9pfs->xendev); + qemu_coroutine_yield(); + goto again; } + ring->co = NULL; + /* make sure other threads see ring->co changes before continuing */ + smp_wmb(); *piov = ring->sg; *pniov = num; @@ -292,6 +297,20 @@ static int xen_9pfs_receive(Xen9pfsRing *ring) static void xen_9pfs_bh(void *opaque) { Xen9pfsRing *ring = opaque; + bool wait; + +again: + wait = ring->co != NULL && qemu_coroutine_entered(ring->co); + /* paired with the smb_wmb barriers in xen_9pfs_init_in_iov_from_pdu */ + smp_rmb(); + if (wait) { + cpu_relax(); + goto again; + } + + if (ring->co != NULL) { + qemu_coroutine_enter_if_inactive(ring->co); + } xen_9pfs_receive(ring); } -- cgit v1.1 From 84af75577cceb195b044e2d5ba6d940206b169ca Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 21 May 2020 12:26:27 -0700 Subject: xen/9pfs: increase max ring order to 9 The max order allowed by the protocol is 9. Increase the max order supported by QEMU to 9 to increase performance. Signed-off-by: Stefano Stabellini Reviewed-by: Christian Schoenebeck Message-Id: <20200521192627.15259-3-sstabellini@kernel.org> Signed-off-by: Greg Kurz --- hw/9pfs/xen-9p-backend.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'hw') diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c index 3c84c86..a969fcc 100644 --- a/hw/9pfs/xen-9p-backend.c +++ b/hw/9pfs/xen-9p-backend.c @@ -21,7 +21,7 @@ #define VERSIONS "1" #define MAX_RINGS 8 -#define MAX_RING_ORDER 8 +#define MAX_RING_ORDER 9 typedef struct Xen9pfsRing { struct Xen9pfsDev *priv; -- cgit v1.1