From 0254c4d19df3e89e964f121df1e73f2d871fd46e Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 18 Jan 2023 18:55:47 +0000 Subject: hw/xen: Add xenstore wire implementation and implementation stubs This implements the basic wire protocol for the XenStore commands, punting all the actual implementation to xs_impl_* functions which all just return errors for now. Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant --- hw/i386/kvm/meson.build | 1 + hw/i386/kvm/trace-events | 15 + hw/i386/kvm/xen_xenstore.c | 871 +++++++++++++++++++++++++++++++++++++++++++- hw/i386/kvm/xenstore_impl.c | 117 ++++++ hw/i386/kvm/xenstore_impl.h | 58 +++ 5 files changed, 1054 insertions(+), 8 deletions(-) create mode 100644 hw/i386/kvm/xenstore_impl.c create mode 100644 hw/i386/kvm/xenstore_impl.h (limited to 'hw') diff --git a/hw/i386/kvm/meson.build b/hw/i386/kvm/meson.build index 82dd6ae..6621ba5 100644 --- a/hw/i386/kvm/meson.build +++ b/hw/i386/kvm/meson.build @@ -9,6 +9,7 @@ i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files( 'xen_evtchn.c', 'xen_gnttab.c', 'xen_xenstore.c', + 'xenstore_impl.c', )) i386_ss.add_all(when: 'CONFIG_KVM', if_true: i386_kvm_ss) diff --git a/hw/i386/kvm/trace-events b/hw/i386/kvm/trace-events index b83c3eb..e4c82de 100644 --- a/hw/i386/kvm/trace-events +++ b/hw/i386/kvm/trace-events @@ -3,3 +3,18 @@ kvm_xen_unmap_pirq(int pirq, int gsi) "pirq %d gsi %d" kvm_xen_get_free_pirq(int pirq, int type) "pirq %d type %d" kvm_xen_bind_pirq(int pirq, int port) "pirq %d port %d" kvm_xen_unmask_pirq(int pirq, char *dev, int vector) "pirq %d dev %s vector %d" +xenstore_error(unsigned int id, unsigned int tx_id, const char *err) "req %u tx %u err %s" +xenstore_read(unsigned int tx_id, const char *path) "tx %u path %s" +xenstore_write(unsigned int tx_id, const char *path) "tx %u path %s" +xenstore_mkdir(unsigned int tx_id, const char *path) "tx %u path %s" +xenstore_directory(unsigned int tx_id, const char *path) "tx %u path %s" +xenstore_directory_part(unsigned int tx_id, const char *path, unsigned int offset) "tx %u path %s offset %u" +xenstore_transaction_start(unsigned int new_tx) "new_tx %u" +xenstore_transaction_end(unsigned int tx_id, bool commit) "tx %u commit %d" +xenstore_rm(unsigned int tx_id, const char *path) "tx %u path %s" +xenstore_get_perms(unsigned int tx_id, const char *path) "tx %u path %s" +xenstore_set_perms(unsigned int tx_id, const char *path) "tx %u path %s" +xenstore_watch(const char *path, const char *token) "path %s token %s" +xenstore_unwatch(const char *path, const char *token) "path %s token %s" +xenstore_reset_watches(void) "" +xenstore_watch_event(const char *path, const char *token) "path %s token %s" diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c index 14193ef..64d8f1a 100644 --- a/hw/i386/kvm/xen_xenstore.c +++ b/hw/i386/kvm/xen_xenstore.c @@ -28,6 +28,10 @@ #include "sysemu/kvm.h" #include "sysemu/kvm_xen.h" +#include "trace.h" + +#include "xenstore_impl.h" + #include "hw/xen/interface/io/xs_wire.h" #include "hw/xen/interface/event_channel.h" @@ -47,6 +51,9 @@ struct XenXenstoreState { SysBusDevice busdev; /*< public >*/ + XenstoreImplState *impl; + GList *watch_events; + MemoryRegion xenstore_page; struct xenstore_domain_interface *xs; uint8_t req_data[XENSTORE_HEADER_SIZE + XENSTORE_PAYLOAD_MAX]; @@ -64,6 +71,7 @@ struct XenXenstoreState { struct XenXenstoreState *xen_xenstore_singleton; static void xen_xenstore_event(void *opaque); +static void fire_watch_cb(void *opaque, const char *path, const char *token); static void xen_xenstore_realize(DeviceState *dev, Error **errp) { @@ -89,6 +97,8 @@ static void xen_xenstore_realize(DeviceState *dev, Error **errp) } aio_set_fd_handler(qemu_get_aio_context(), xen_be_evtchn_fd(s->eh), true, xen_xenstore_event, NULL, NULL, NULL, s); + + s->impl = xs_impl_create(); } static bool xen_xenstore_is_needed(void *opaque) @@ -213,20 +223,761 @@ static void reset_rsp(XenXenstoreState *s) s->rsp_offset = 0; } +static void xs_error(XenXenstoreState *s, unsigned int id, + xs_transaction_t tx_id, int errnum) +{ + struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data; + const char *errstr = NULL; + + for (unsigned int i = 0; i < ARRAY_SIZE(xsd_errors); i++) { + struct xsd_errors *xsd_error = &xsd_errors[i]; + + if (xsd_error->errnum == errnum) { + errstr = xsd_error->errstring; + break; + } + } + assert(errstr); + + trace_xenstore_error(id, tx_id, errstr); + + rsp->type = XS_ERROR; + rsp->req_id = id; + rsp->tx_id = tx_id; + rsp->len = (uint32_t)strlen(errstr) + 1; + + memcpy(&rsp[1], errstr, rsp->len); +} + +static void xs_ok(XenXenstoreState *s, unsigned int type, unsigned int req_id, + xs_transaction_t tx_id) +{ + struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data; + const char *okstr = "OK"; + + rsp->type = type; + rsp->req_id = req_id; + rsp->tx_id = tx_id; + rsp->len = (uint32_t)strlen(okstr) + 1; + + memcpy(&rsp[1], okstr, rsp->len); +} + +/* + * The correct request and response formats are documented in xen.git: + * docs/misc/xenstore.txt. A summary is given below for convenience. + * The '|' symbol represents a NUL character. + * + * ---------- Database read, write and permissions operations ---------- + * + * READ | + * WRITE | + * Store and read the octet string at . + * WRITE creates any missing parent paths, with empty values. + * + * MKDIR | + * Ensures that the exists, by necessary by creating + * it and any missing parents with empty values. If + * or any parent already exists, its value is left unchanged. + * + * RM | + * Ensures that the does not exist, by deleting + * it and all of its children. It is not an error if does + * not exist, but it _is_ an error if 's immediate parent + * does not exist either. + * + * DIRECTORY | |* + * Gives a list of the immediate children of , as only the + * leafnames. The resulting children are each named + * /. + * + * DIRECTORY_PART | ||* + * Same as DIRECTORY, but to be used for children lists longer than + * XENSTORE_PAYLOAD_MAX. Input are and the byte offset into + * the list of children to return. Return values are the generation + * count of the node (to be used to ensure the node hasn't + * changed between two reads: being the same for multiple + * reads guarantees the node hasn't changed) and the list of children + * starting at the specified of the complete list. + * + * GET_PERMS | |+ + * SET_PERMS ||+? + * is one of the following + * w write only + * r read only + * b both read and write + * n no access + * See https://wiki.xen.org/wiki/XenBus section + * `Permissions' for details of the permissions system. + * It is possible to set permissions for the special watch paths + * "@introduceDomain" and "@releaseDomain" to enable receiving those + * watches in unprivileged domains. + * + * ---------- Watches ---------- + * + * WATCH ||? + * Adds a watch. + * + * When a is modified (including path creation, removal, + * contents change or permissions change) this generates an event + * on the changed . Changes made in transactions cause an + * event only if and when committed. Each occurring event is + * matched against all the watches currently set up, and each + * matching watch results in a WATCH_EVENT message (see below). + * + * The event's path matches the watch's if it is an child + * of . + * + * can be a to watch or @. In the + * latter case may have any syntax but it matches + * (according to the rules above) only the following special + * events which are invented by xenstored: + * @introduceDomain occurs on INTRODUCE + * @releaseDomain occurs on any domain crash or + * shutdown, and also on RELEASE + * and domain destruction + * events are sent to privileged callers or explicitly + * via SET_PERMS enabled domains only. + * + * When a watch is first set up it is triggered once straight + * away, with equal to . Watches may be triggered + * spuriously. The tx_id in a WATCH request is ignored. + * + * Watches are supposed to be restricted by the permissions + * system but in practice the implementation is imperfect. + * Applications should not rely on being sent a notification for + * paths that they cannot read; however, an application may rely + * on being sent a watch when a path which it _is_ able to read + * is deleted even if that leaves only a nonexistent unreadable + * parent. A notification may omitted if a node's permissions + * are changed so as to make it unreadable, in which case future + * notifications may be suppressed (and if the node is later made + * readable, some notifications may have been lost). + * + * WATCH_EVENT || + * Unsolicited `reply' generated for matching modification events + * as described above. req_id and tx_id are both 0. + * + * is the event's path, ie the actual path that was + * modified; however if the event was the recursive removal of an + * parent of , is just + * (rather than the actual path which was removed). So + * is a child of , regardless. + * + * Iff for the watch was specified as a relative pathname, + * the path will also be relative (with the same base, + * obviously). + * + * UNWATCH ||? + * + * RESET_WATCHES | + * Reset all watches and transactions of the caller. + * + * ---------- Transactions ---------- + * + * TRANSACTION_START | | + * is an opaque uint32_t allocated by xenstored + * represented as unsigned decimal. After this, transaction may + * be referenced by using (as 32-bit binary) in the + * tx_id request header field. When transaction is started whole + * db is copied; reads and writes happen on the copy. + * It is not legal to send non-0 tx_id in TRANSACTION_START. + * + * TRANSACTION_END T| + * TRANSACTION_END F| + * tx_id must refer to existing transaction. After this + * request the tx_id is no longer valid and may be reused by + * xenstore. If F, the transaction is discarded. If T, + * it is committed: if there were any other intervening writes + * then our END gets get EAGAIN. + * + * The plan is that in the future only intervening `conflicting' + * writes cause EAGAIN, meaning only writes or other commits + * which changed paths which were read or written in the + * transaction at hand. + * + */ + +static void xs_read(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, unsigned int len) +{ + const char *path = (const char *)req_data; + struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data; + uint8_t *rsp_data = (uint8_t *)&rsp[1]; + g_autoptr(GByteArray) data = g_byte_array_new(); + int err; + + if (len == 0 || req_data[len - 1] != '\0') { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + trace_xenstore_read(tx_id, path); + err = xs_impl_read(s->impl, xen_domid, tx_id, path, data); + if (err) { + xs_error(s, req_id, tx_id, err); + return; + } + + rsp->type = XS_READ; + rsp->req_id = req_id; + rsp->tx_id = tx_id; + rsp->len = 0; + + len = data->len; + if (len > XENSTORE_PAYLOAD_MAX) { + xs_error(s, req_id, tx_id, E2BIG); + return; + } + + memcpy(&rsp_data[rsp->len], data->data, len); + rsp->len += len; +} + +static void xs_write(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, + unsigned int len) +{ + g_autoptr(GByteArray) data = g_byte_array_new(); + const char *path; + int err; + + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + path = (const char *)req_data; + + while (len--) { + if (*req_data++ == '\0') { + break; + } + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + } + + g_byte_array_append(data, req_data, len); + + trace_xenstore_write(tx_id, path); + err = xs_impl_write(s->impl, xen_domid, tx_id, path, data); + if (err) { + xs_error(s, req_id, tx_id, err); + return; + } + + xs_ok(s, XS_WRITE, req_id, tx_id); +} + +static void xs_mkdir(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, + unsigned int len) +{ + g_autoptr(GByteArray) data = g_byte_array_new(); + const char *path; + int err; + + if (len == 0 || req_data[len - 1] != '\0') { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + path = (const char *)req_data; + + trace_xenstore_mkdir(tx_id, path); + err = xs_impl_read(s->impl, xen_domid, tx_id, path, data); + if (err == ENOENT) { + err = xs_impl_write(s->impl, xen_domid, tx_id, path, data); + } + + if (!err) { + xs_error(s, req_id, tx_id, err); + return; + } + + xs_ok(s, XS_MKDIR, req_id, tx_id); +} + +static void xs_append_strings(XenXenstoreState *s, struct xsd_sockmsg *rsp, + GList *strings, unsigned int start, bool truncate) +{ + uint8_t *rsp_data = (uint8_t *)&rsp[1]; + GList *l; + + for (l = strings; l; l = l->next) { + size_t len = strlen(l->data) + 1; /* Including the NUL termination */ + char *str = l->data; + + if (rsp->len + len > XENSTORE_PAYLOAD_MAX) { + if (truncate) { + len = XENSTORE_PAYLOAD_MAX - rsp->len; + if (!len) { + return; + } + } else { + xs_error(s, rsp->req_id, rsp->tx_id, E2BIG); + return; + } + } + + if (start) { + if (start >= len) { + start -= len; + continue; + } + + str += start; + len -= start; + start = 0; + } + + memcpy(&rsp_data[rsp->len], str, len); + rsp->len += len; + } + /* XS_DIRECTORY_PART wants an extra NUL to indicate the end */ + if (truncate && rsp->len < XENSTORE_PAYLOAD_MAX) { + rsp_data[rsp->len++] = '\0'; + } +} + +static void xs_directory(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, + unsigned int len) +{ + struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data; + GList *items = NULL; + const char *path; + int err; + + if (len == 0 || req_data[len - 1] != '\0') { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + path = (const char *)req_data; + + trace_xenstore_directory(tx_id, path); + err = xs_impl_directory(s->impl, xen_domid, tx_id, path, NULL, &items); + if (err != 0) { + xs_error(s, req_id, tx_id, err); + return; + } + + rsp->type = XS_DIRECTORY; + rsp->req_id = req_id; + rsp->tx_id = tx_id; + rsp->len = 0; + + xs_append_strings(s, rsp, items, 0, false); + + g_list_free_full(items, g_free); +} + +static void xs_directory_part(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, + unsigned int len) +{ + const char *offset_str, *path = (const char *)req_data; + struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data; + char *rsp_data = (char *)&rsp[1]; + uint64_t gencnt = 0; + unsigned int offset; + GList *items = NULL; + int err; + + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + while (len--) { + if (*req_data++ == '\0') { + break; + } + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + } + + offset_str = (const char *)req_data; + while (len--) { + if (*req_data++ == '\0') { + break; + } + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + } + + if (len) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + if (qemu_strtoui(offset_str, NULL, 10, &offset) < 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + trace_xenstore_directory_part(tx_id, path, offset); + err = xs_impl_directory(s->impl, xen_domid, tx_id, path, &gencnt, &items); + if (err != 0) { + xs_error(s, req_id, tx_id, err); + return; + } + + rsp->type = XS_DIRECTORY_PART; + rsp->req_id = req_id; + rsp->tx_id = tx_id; + rsp->len = snprintf(rsp_data, XENSTORE_PAYLOAD_MAX, "%" PRIu64, gencnt) + 1; + + xs_append_strings(s, rsp, items, offset, true); + + g_list_free_full(items, g_free); +} + +static void xs_transaction_start(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, + unsigned int len) +{ + struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data; + char *rsp_data = (char *)&rsp[1]; + int err; + + if (len != 1 || req_data[0] != '\0') { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + rsp->type = XS_TRANSACTION_START; + rsp->req_id = req_id; + rsp->tx_id = tx_id; + rsp->len = 0; + + err = xs_impl_transaction_start(s->impl, xen_domid, &tx_id); + if (err) { + xs_error(s, req_id, tx_id, err); + return; + } + + trace_xenstore_transaction_start(tx_id); + + rsp->len = snprintf(rsp_data, XENSTORE_PAYLOAD_MAX, "%u", tx_id); + assert(rsp->len < XENSTORE_PAYLOAD_MAX); + rsp->len++; +} + +static void xs_transaction_end(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, + unsigned int len) +{ + bool commit; + int err; + + if (len != 2 || req_data[1] != '\0') { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + switch (req_data[0]) { + case 'T': + commit = true; + break; + case 'F': + commit = false; + break; + default: + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + trace_xenstore_transaction_end(tx_id, commit); + err = xs_impl_transaction_end(s->impl, xen_domid, tx_id, commit); + if (err) { + xs_error(s, req_id, tx_id, err); + return; + } + + xs_ok(s, XS_TRANSACTION_END, req_id, tx_id); +} + +static void xs_rm(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, unsigned int len) +{ + const char *path = (const char *)req_data; + int err; + + if (len == 0 || req_data[len - 1] != '\0') { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + trace_xenstore_rm(tx_id, path); + err = xs_impl_rm(s->impl, xen_domid, tx_id, path); + if (err) { + xs_error(s, req_id, tx_id, err); + return; + } + + xs_ok(s, XS_RM, req_id, tx_id); +} + +static void xs_get_perms(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, + unsigned int len) +{ + const char *path = (const char *)req_data; + struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data; + GList *perms = NULL; + int err; + + if (len == 0 || req_data[len - 1] != '\0') { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + trace_xenstore_get_perms(tx_id, path); + err = xs_impl_get_perms(s->impl, xen_domid, tx_id, path, &perms); + if (err) { + xs_error(s, req_id, tx_id, err); + return; + } + + rsp->type = XS_GET_PERMS; + rsp->req_id = req_id; + rsp->tx_id = tx_id; + rsp->len = 0; + + xs_append_strings(s, rsp, perms, 0, false); + + g_list_free_full(perms, g_free); +} + +static void xs_set_perms(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, + unsigned int len) +{ + const char *path = (const char *)req_data; + uint8_t *perm; + GList *perms = NULL; + int err; + + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + while (len--) { + if (*req_data++ == '\0') { + break; + } + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + } + + perm = req_data; + while (len--) { + if (*req_data++ == '\0') { + perms = g_list_append(perms, perm); + perm = req_data; + } + } + + /* + * Note that there may be trailing garbage at the end of the buffer. + * This is explicitly permitted by the '?' at the end of the definition: + * + * SET_PERMS ||+? + */ + + trace_xenstore_set_perms(tx_id, path); + err = xs_impl_set_perms(s->impl, xen_domid, tx_id, path, perms); + g_list_free(perms); + if (err) { + xs_error(s, req_id, tx_id, err); + return; + } + + xs_ok(s, XS_SET_PERMS, req_id, tx_id); +} + +static void xs_watch(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, + unsigned int len) +{ + const char *token, *path = (const char *)req_data; + int err; + + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + while (len--) { + if (*req_data++ == '\0') { + break; + } + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + } + + token = (const char *)req_data; + while (len--) { + if (*req_data++ == '\0') { + break; + } + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + } + + /* + * Note that there may be trailing garbage at the end of the buffer. + * This is explicitly permitted by the '?' at the end of the definition: + * + * WATCH ||? + */ + + trace_xenstore_watch(path, token); + err = xs_impl_watch(s->impl, xen_domid, path, token, fire_watch_cb, s); + if (err) { + xs_error(s, req_id, tx_id, err); + return; + } + + xs_ok(s, XS_WATCH, req_id, tx_id); +} + +static void xs_unwatch(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, + unsigned int len) +{ + const char *token, *path = (const char *)req_data; + int err; + + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + while (len--) { + if (*req_data++ == '\0') { + break; + } + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + } + + token = (const char *)req_data; + while (len--) { + if (*req_data++ == '\0') { + break; + } + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + } + + trace_xenstore_unwatch(path, token); + err = xs_impl_unwatch(s->impl, xen_domid, path, token, fire_watch_cb, s); + if (err) { + xs_error(s, req_id, tx_id, err); + return; + } + + xs_ok(s, XS_UNWATCH, req_id, tx_id); +} + +static void xs_reset_watches(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, + unsigned int len) +{ + if (len == 0 || req_data[len - 1] != '\0') { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + trace_xenstore_reset_watches(); + xs_impl_reset_watches(s->impl, xen_domid); + + xs_ok(s, XS_RESET_WATCHES, req_id, tx_id); +} + +static void xs_priv(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *data, + unsigned int len) +{ + xs_error(s, req_id, tx_id, EACCES); +} + +static void xs_unimpl(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *data, + unsigned int len) +{ + xs_error(s, req_id, tx_id, ENOSYS); +} + +typedef void (*xs_impl)(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *data, + unsigned int len); + +struct xsd_req { + const char *name; + xs_impl fn; +}; +#define XSD_REQ(_type, _fn) \ + [_type] = { .name = #_type, .fn = _fn } + +struct xsd_req xsd_reqs[] = { + XSD_REQ(XS_READ, xs_read), + XSD_REQ(XS_WRITE, xs_write), + XSD_REQ(XS_MKDIR, xs_mkdir), + XSD_REQ(XS_DIRECTORY, xs_directory), + XSD_REQ(XS_DIRECTORY_PART, xs_directory_part), + XSD_REQ(XS_TRANSACTION_START, xs_transaction_start), + XSD_REQ(XS_TRANSACTION_END, xs_transaction_end), + XSD_REQ(XS_RM, xs_rm), + XSD_REQ(XS_GET_PERMS, xs_get_perms), + XSD_REQ(XS_SET_PERMS, xs_set_perms), + XSD_REQ(XS_WATCH, xs_watch), + XSD_REQ(XS_UNWATCH, xs_unwatch), + XSD_REQ(XS_CONTROL, xs_priv), + XSD_REQ(XS_INTRODUCE, xs_priv), + XSD_REQ(XS_RELEASE, xs_priv), + XSD_REQ(XS_IS_DOMAIN_INTRODUCED, xs_priv), + XSD_REQ(XS_RESUME, xs_priv), + XSD_REQ(XS_SET_TARGET, xs_priv), + XSD_REQ(XS_RESET_WATCHES, xs_reset_watches), +}; + static void process_req(XenXenstoreState *s) { struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data; - struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data; - const char enosys[] = "ENOSYS"; + xs_impl handler = NULL; assert(req_pending(s)); assert(!s->rsp_pending); - rsp->type = XS_ERROR; - rsp->req_id = req->req_id; - rsp->tx_id = req->tx_id; - rsp->len = sizeof(enosys); - memcpy((void *)&rsp[1], enosys, sizeof(enosys)); + if (req->type < ARRAY_SIZE(xsd_reqs)) { + handler = xsd_reqs[req->type].fn; + } + if (!handler) { + handler = &xs_unimpl; + } + + handler(s, req->req_id, req->tx_id, (uint8_t *)&req[1], req->len); s->rsp_pending = true; reset_req(s); @@ -415,6 +1166,106 @@ static unsigned int put_rsp(XenXenstoreState *s) return copylen; } +static void deliver_watch(XenXenstoreState *s, const char *path, + const char *token) +{ + struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data; + uint8_t *rsp_data = (uint8_t *)&rsp[1]; + unsigned int len; + + assert(!s->rsp_pending); + + trace_xenstore_watch_event(path, token); + + rsp->type = XS_WATCH_EVENT; + rsp->req_id = 0; + rsp->tx_id = 0; + rsp->len = 0; + + len = strlen(path); + + /* XENSTORE_ABS/REL_PATH_MAX should ensure there can be no overflow */ + assert(rsp->len + len < XENSTORE_PAYLOAD_MAX); + + memcpy(&rsp_data[rsp->len], path, len); + rsp->len += len; + rsp_data[rsp->len] = '\0'; + rsp->len++; + + len = strlen(token); + /* + * It is possible for the guest to have chosen a token that will + * not fit (along with the patch) into a watch event. We have no + * choice but to drop the event if this is the case. + */ + if (rsp->len + len >= XENSTORE_PAYLOAD_MAX) { + return; + } + + memcpy(&rsp_data[rsp->len], token, len); + rsp->len += len; + rsp_data[rsp->len] = '\0'; + rsp->len++; + + s->rsp_pending = true; +} + +struct watch_event { + char *path; + char *token; +}; + +static void queue_watch(XenXenstoreState *s, const char *path, + const char *token) +{ + struct watch_event *ev = g_new0(struct watch_event, 1); + + ev->path = g_strdup(path); + ev->token = g_strdup(token); + + s->watch_events = g_list_append(s->watch_events, ev); +} + +static void fire_watch_cb(void *opaque, const char *path, const char *token) +{ + XenXenstoreState *s = opaque; + + assert(qemu_mutex_iothread_locked()); + + /* + * If there's a response pending, we obviously can't scribble over + * it. But if there's a request pending, it has dibs on the buffer + * too. + * + * In the common case of a watch firing due to backend activity + * when the ring was otherwise idle, we should be able to copy the + * strings directly into the rsp_data and thence the actual ring, + * without needing to perform any allocations and queue them. + */ + if (s->rsp_pending || req_pending(s)) { + queue_watch(s, path, token); + } else { + deliver_watch(s, path, token); + /* + * If the message was queued because there was already ring activity, + * no need to wake the guest. But if not, we need to send the evtchn. + */ + xen_be_evtchn_notify(s->eh, s->be_port); + } +} + +static void process_watch_events(XenXenstoreState *s) +{ + struct watch_event *ev = s->watch_events->data; + + deliver_watch(s, ev->path, ev->token); + + s->watch_events = g_list_remove(s->watch_events, ev); + g_free(ev->path); + g_free(ev->token); + g_free(ev); +} + static void xen_xenstore_event(void *opaque) { XenXenstoreState *s = opaque; @@ -433,6 +1284,10 @@ static void xen_xenstore_event(void *opaque) copied_to = copied_from = 0; processed = false; + if (!s->rsp_pending && s->watch_events) { + process_watch_events(s); + } + if (s->rsp_pending) { copied_to = put_rsp(s); } @@ -441,7 +1296,7 @@ static void xen_xenstore_event(void *opaque) copied_from = get_req(s); } - if (req_pending(s) && !s->rsp_pending) { + if (req_pending(s) && !s->rsp_pending && !s->watch_events) { process_req(s); processed = true; } diff --git a/hw/i386/kvm/xenstore_impl.c b/hw/i386/kvm/xenstore_impl.c new file mode 100644 index 0000000..31dbc98 --- /dev/null +++ b/hw/i386/kvm/xenstore_impl.c @@ -0,0 +1,117 @@ +/* + * QEMU Xen emulation: The actual implementation of XenStore + * + * Copyright © 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Authors: David Woodhouse , Paul Durrant + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" + +#include "xen_xenstore.h" +#include "xenstore_impl.h" + +struct XenstoreImplState { +}; + +int xs_impl_read(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, const char *path, GByteArray *data) +{ + /* + * The data GByteArray shall exist, and will be freed by caller. + * Just g_byte_array_append() to it. + */ + return ENOENT; +} + +int xs_impl_write(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, const char *path, GByteArray *data) +{ + /* + * The data GByteArray shall exist, will be freed by caller. You are + * free to use g_byte_array_steal() and keep the data. + */ + return ENOSYS; +} + +int xs_impl_directory(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, const char *path, + uint64_t *gencnt, GList **items) +{ + /* + * The items are (char *) to be freed by caller. Although it's consumed + * immediately so if you want to change it to (const char *) and keep + * them, go ahead and change the caller. + */ + return ENOENT; +} + +int xs_impl_transaction_start(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t *tx_id) +{ + return ENOSYS; +} + +int xs_impl_transaction_end(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, bool commit) +{ + return ENOSYS; +} + +int xs_impl_rm(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, const char *path) +{ + return ENOSYS; +} + +int xs_impl_get_perms(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, const char *path, GList **perms) +{ + /* + * The perms are (char *) in the wire format to be + * freed by the caller. + */ + return ENOSYS; +} + +int xs_impl_set_perms(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, const char *path, GList *perms) +{ + /* + * The perms are (const char *) in the wire format. + */ + return ENOSYS; +} + +int xs_impl_watch(XenstoreImplState *s, unsigned int dom_id, const char *path, + const char *token, xs_impl_watch_fn fn, void *opaque) +{ + /* + * When calling the callback @fn, note that the path should + * precisely match the relative path that the guest provided, even + * if it was a relative path which needed to be prefixed with + * /local/domain/${domid}/ + */ + return ENOSYS; +} + +int xs_impl_unwatch(XenstoreImplState *s, unsigned int dom_id, + const char *path, const char *token, + xs_impl_watch_fn fn, void *opaque) +{ + /* Remove the watch that matches all four criteria */ + return ENOSYS; +} + +int xs_impl_reset_watches(XenstoreImplState *s, unsigned int dom_id) +{ + return ENOSYS; +} + +XenstoreImplState *xs_impl_create(void) +{ + return g_new0(XenstoreImplState, 1); +} diff --git a/hw/i386/kvm/xenstore_impl.h b/hw/i386/kvm/xenstore_impl.h new file mode 100644 index 0000000..beb7b29 --- /dev/null +++ b/hw/i386/kvm/xenstore_impl.h @@ -0,0 +1,58 @@ +/* + * QEMU Xen emulation: The actual implementation of XenStore + * + * Copyright © 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Authors: David Woodhouse + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_XENSTORE_IMPL_H +#define QEMU_XENSTORE_IMPL_H + +typedef uint32_t xs_transaction_t; + +#define XBT_NULL 0 + +typedef struct XenstoreImplState XenstoreImplState; + +XenstoreImplState *xs_impl_create(void); + +/* + * These functions return *positive* error numbers. This is a little + * unconventional but it helps to keep us honest because there is + * also a very limited set of error numbers that they are permitted + * to return (those in xsd_errors). + */ + +int xs_impl_read(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, const char *path, GByteArray *data); +int xs_impl_write(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, const char *path, GByteArray *data); +int xs_impl_directory(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, const char *path, + uint64_t *gencnt, GList **items); +int xs_impl_transaction_start(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t *tx_id); +int xs_impl_transaction_end(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, bool commit); +int xs_impl_rm(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, const char *path); +int xs_impl_get_perms(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, const char *path, GList **perms); +int xs_impl_set_perms(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, const char *path, GList *perms); + +/* This differs from xs_watch_fn because it has the token */ +typedef void(xs_impl_watch_fn)(void *opaque, const char *path, + const char *token); +int xs_impl_watch(XenstoreImplState *s, unsigned int dom_id, const char *path, + const char *token, xs_impl_watch_fn fn, void *opaque); +int xs_impl_unwatch(XenstoreImplState *s, unsigned int dom_id, + const char *path, const char *token, xs_impl_watch_fn fn, + void *opaque); +int xs_impl_reset_watches(XenstoreImplState *s, unsigned int dom_id); + +#endif /* QEMU_XENSTORE_IMPL_H */ -- cgit v1.1 From 3ef7ff83caa27d8b3bfc76805cd47bc97d23b7d7 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 20 Jan 2023 01:36:38 +0000 Subject: hw/xen: Add basic XenStore tree walk and write/read/directory support This is a fairly simple implementation of a copy-on-write tree. The node walk function starts off at the root, with 'inplace == true'. If it ever encounters a node with a refcount greater than one (including the root node), then that node is shared with other trees, and cannot be modified in place, so the inplace flag is cleared and we copy on write from there on down. Xenstore write has 'mkdir -p' semantics and will create the intermediate nodes if they don't already exist, so in that case we flip the inplace flag back to true as we populate the newly-created nodes. We put a copy of the absolute path into the buffer in the struct walk_op, with *two* NUL terminators at the end. As xs_node_walk() goes down the tree, it replaces the next '/' separator with a NUL so that it can use the 'child name' in place. The next recursion down then puts the '/' back and repeats the exercise for the next path element... if it doesn't hit that *second* NUL termination which indicates the true end of the path. Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant --- hw/i386/kvm/xenstore_impl.c | 527 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 520 insertions(+), 7 deletions(-) (limited to 'hw') diff --git a/hw/i386/kvm/xenstore_impl.c b/hw/i386/kvm/xenstore_impl.c index 31dbc98..9e10a31 100644 --- a/hw/i386/kvm/xenstore_impl.c +++ b/hw/i386/kvm/xenstore_impl.c @@ -10,13 +10,470 @@ */ #include "qemu/osdep.h" +#include "qom/object.h" #include "xen_xenstore.h" #include "xenstore_impl.h" +#include "hw/xen/interface/io/xs_wire.h" + +#define XS_MAX_WATCHES 128 +#define XS_MAX_DOMAIN_NODES 1000 +#define XS_MAX_NODE_SIZE 2048 +#define XS_MAX_TRANSACTIONS 10 +#define XS_MAX_PERMS_PER_NODE 5 + +#define XS_VALID_CHARS "abcdefghijklmnopqrstuvwxyz" \ + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \ + "0123456789-/_" + +typedef struct XsNode { + uint32_t ref; + GByteArray *content; + GHashTable *children; + uint64_t gencnt; +#ifdef XS_NODE_UNIT_TEST + gchar *name; /* debug only */ +#endif +} XsNode; + struct XenstoreImplState { + XsNode *root; + unsigned int nr_nodes; }; +static inline XsNode *xs_node_new(void) +{ + XsNode *n = g_new0(XsNode, 1); + n->ref = 1; + +#ifdef XS_NODE_UNIT_TEST + nr_xs_nodes++; + xs_node_list = g_list_prepend(xs_node_list, n); +#endif + return n; +} + +static inline XsNode *xs_node_ref(XsNode *n) +{ + /* With just 10 transactions, it can never get anywhere near this. */ + g_assert(n->ref < INT_MAX); + + g_assert(n->ref); + n->ref++; + return n; +} + +static inline void xs_node_unref(XsNode *n) +{ + if (!n) { + return; + } + g_assert(n->ref); + if (--n->ref) { + return; + } + + if (n->content) { + g_byte_array_unref(n->content); + } + if (n->children) { + g_hash_table_unref(n->children); + } +#ifdef XS_NODE_UNIT_TEST + g_free(n->name); + nr_xs_nodes--; + xs_node_list = g_list_remove(xs_node_list, n); +#endif + g_free(n); +} + +/* For copying from one hash table to another using g_hash_table_foreach() */ +static void do_insert(gpointer key, gpointer value, gpointer user_data) +{ + g_hash_table_insert(user_data, g_strdup(key), xs_node_ref(value)); +} + +static XsNode *xs_node_copy(XsNode *old) +{ + XsNode *n = xs_node_new(); + + n->gencnt = old->gencnt; + if (old->children) { + n->children = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, + (GDestroyNotify)xs_node_unref); + g_hash_table_foreach(old->children, do_insert, n->children); + } + if (old && old->content) { + n->content = g_byte_array_ref(old->content); + } + return n; +} + +/* Returns true if it made a change to the hash table */ +static bool xs_node_add_child(XsNode *n, const char *path_elem, XsNode *child) +{ + assert(!strchr(path_elem, '/')); + + if (!child) { + assert(n->children); + return g_hash_table_remove(n->children, path_elem); + } + +#ifdef XS_NODE_UNIT_TEST + g_free(child->name); + child->name = g_strdup(path_elem); +#endif + if (!n->children) { + n->children = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, + (GDestroyNotify)xs_node_unref); + } + + /* + * The documentation for g_hash_table_insert() says that it "returns a + * boolean value to indicate whether the newly added value was already + * in the hash table or not." + * + * It could perhaps be clearer that returning TRUE means it wasn't, + */ + return g_hash_table_insert(n->children, g_strdup(path_elem), child); +} + +struct walk_op { + struct XenstoreImplState *s; + char path[XENSTORE_ABS_PATH_MAX + 2]; /* Two NUL terminators */ + int (*op_fn)(XsNode **n, struct walk_op *op); + void *op_opaque; + void *op_opaque2; + + unsigned int dom_id; + + /* The number of nodes which will exist in the tree if this op succeeds. */ + unsigned int new_nr_nodes; + + /* + * This is maintained on the way *down* the walk to indicate + * whether nodes can be modified in place or whether COW is + * required. It starts off being true, as we're always going to + * replace the root node. If we walk into a shared subtree it + * becomes false. If we start *creating* new nodes for a write, + * it becomes true again. + * + * Do not use it on the way back up. + */ + bool inplace; + bool mutating; + bool create_dirs; +}; + +static int xs_node_add_content(XsNode **n, struct walk_op *op) +{ + GByteArray *data = op->op_opaque; + + if (op->dom_id) { + /* + * The real XenStored includes permissions and names of child nodes + * in the calculated datasize but life's too short. For a single + * tenant internal XenStore, we don't have to be quite as pedantic. + */ + if (data->len > XS_MAX_NODE_SIZE) { + return E2BIG; + } + } + /* We *are* the node to be written. Either this or a copy. */ + if (!op->inplace) { + XsNode *old = *n; + *n = xs_node_copy(old); + xs_node_unref(old); + } + + if ((*n)->content) { + g_byte_array_unref((*n)->content); + } + (*n)->content = g_byte_array_ref(data); + return 0; +} + +static int xs_node_get_content(XsNode **n, struct walk_op *op) +{ + GByteArray *data = op->op_opaque; + GByteArray *node_data; + + assert(op->inplace); + assert(*n); + + node_data = (*n)->content; + if (node_data) { + g_byte_array_append(data, node_data->data, node_data->len); + } + + return 0; +} + +static int node_rm_recurse(gpointer key, gpointer value, gpointer user_data) +{ + struct walk_op *op = user_data; + XsNode *n = value; + bool this_inplace = op->inplace; + + if (n->ref != 1) { + op->inplace = 0; + } + + if (n->children) { + g_hash_table_foreach_remove(n->children, node_rm_recurse, op); + } + op->new_nr_nodes--; + + /* + * Actually deleting the child here is just an optimisation; if we + * don't then the final unref on the topmost victim will just have + * to cascade down again repeating all the g_hash_table_foreach() + * calls. + */ + return this_inplace; +} + +static int xs_node_rm(XsNode **n, struct walk_op *op) +{ + bool this_inplace = op->inplace; + + /* Keep count of the nodes in the subtree which gets deleted. */ + if ((*n)->children) { + g_hash_table_foreach_remove((*n)->children, node_rm_recurse, op); + } + op->new_nr_nodes--; + + if (this_inplace) { + xs_node_unref(*n); + } + *n = NULL; + return 0; +} + +/* + * Passed a full reference in *n which it may free if it needs to COW. + * + * When changing the tree, the op->inplace flag indicates whether this + * node may be modified in place (i.e. it and all its parents had a + * refcount of one). If walking down the tree we find a node whose + * refcount is higher, we must clear op->inplace and COW from there + * down. Unless we are creating new nodes as scaffolding for a write + * (which works like 'mkdir -p' does). In which case those newly + * created nodes can (and must) be modified in place again. + */ +static int xs_node_walk(XsNode **n, struct walk_op *op) +{ + char *child_name = NULL; + size_t namelen; + XsNode *old = *n, *child = NULL; + bool stole_child = false; + bool this_inplace; + int err; + + namelen = strlen(op->path); + + /* Is there a child, or do we hit the double-NUL termination? */ + if (op->path[namelen + 1]) { + char *slash; + child_name = op->path + namelen + 1; + slash = strchr(child_name, '/'); + if (slash) { + *slash = '\0'; + } + op->path[namelen] = '/'; + } + + /* If we walk into a subtree which is shared, we must COW */ + if (op->mutating && old->ref != 1) { + op->inplace = false; + } + + if (!child_name) { + /* This is the actual node on which the operation shall be performed */ + err = op->op_fn(n, op); + goto out; + } + + /* op->inplace will be further modified during the recursion */ + this_inplace = op->inplace; + + if (old && old->children) { + child = g_hash_table_lookup(old->children, child_name); + /* This is a *weak* reference to 'child', owned by the hash table */ + } + + if (child) { + xs_node_ref(child); + /* + * Now we own it too. But if we can modify inplace, that's going to + * foil the check and force it to COW. We want to be the *only* owner + * so that it can be modified in place, so remove it from the hash + * table in that case. We'll add it (or its replacement) back later. + */ + if (op->mutating && this_inplace) { + g_hash_table_remove(old->children, child_name); + stole_child = true; + } + } else if (op->create_dirs) { + if (op->dom_id && op->new_nr_nodes >= XS_MAX_DOMAIN_NODES) { + err = ENOSPC; + goto out; + } + op->new_nr_nodes++; + child = xs_node_new(); + + /* + * If we're creating a new child, we can clearly modify it (and its + * children) in place from here on down. + */ + op->inplace = true; + } else { + err = ENOENT; + goto out; + } + + /* + * Except for the temporary child-stealing as noted, our node has not + * changed yet. We don't yet know the overall operation will complete. + */ + err = xs_node_walk(&child, op); + if (err || !op->mutating) { + if (stole_child) { + /* Put it back as it was. */ + g_hash_table_replace(old->children, g_strdup(child_name), child); + } else { + xs_node_unref(child); + } + goto out; + } + + /* + * Now we know the operation has completed successfully and we're on + * the way back up. Make the change, substituting 'child' in the + * node at our level. + */ + if (!this_inplace) { + *n = xs_node_copy(old); + xs_node_unref(old); + } + + /* + * The child may be NULL here, for a remove operation. Either way, + * xs_node_add_child() will do the right thing and return a value + * indicating whether it changed the parent's hash table or not. + * + * We bump the parent gencnt if it adds a child that we *didn't* + * steal from it in the first place, or if child==NULL and was + * thus removed (whether we stole it earlier and didn't put it + * back, or xs_node_add_child() actually removed it now). + */ + if ((xs_node_add_child(*n, child_name, child) && !stole_child) || !child) { + (*n)->gencnt++; + } + + out: + op->path[namelen] = '\0'; + if (!namelen) { + /* + * On completing the recursion back up the path walk and reaching the + * top, assign the new node count if the operation was successful. + */ + if (!err && op->mutating) { + op->s->nr_nodes = op->new_nr_nodes; + } + } + return err; +} + +static void append_directory_item(gpointer key, gpointer value, + gpointer user_data) +{ + GList **items = user_data; + + *items = g_list_insert_sorted(*items, g_strdup(key), (GCompareFunc)strcmp); +} + +/* Populates items with char * names which caller must free. */ +static int xs_node_directory(XsNode **n, struct walk_op *op) +{ + GList **items = op->op_opaque; + + assert(op->inplace); + assert(*n); + + if ((*n)->children) { + g_hash_table_foreach((*n)->children, append_directory_item, items); + } + + if (op->op_opaque2) { + *(uint64_t *)op->op_opaque2 = (*n)->gencnt; + } + + return 0; +} + +static int validate_path(char *outpath, const char *userpath, + unsigned int dom_id) +{ + size_t i, pathlen = strlen(userpath); + + if (!pathlen || userpath[pathlen] == '/' || strstr(userpath, "//")) { + return EINVAL; + } + for (i = 0; i < pathlen; i++) { + if (!strchr(XS_VALID_CHARS, userpath[i])) { + return EINVAL; + } + } + if (userpath[0] == '/') { + if (pathlen > XENSTORE_ABS_PATH_MAX) { + return E2BIG; + } + memcpy(outpath, userpath, pathlen + 1); + } else { + if (pathlen > XENSTORE_REL_PATH_MAX) { + return E2BIG; + } + snprintf(outpath, XENSTORE_ABS_PATH_MAX, "/local/domain/%u/%s", dom_id, + userpath); + } + return 0; +} + + +static int init_walk_op(XenstoreImplState *s, struct walk_op *op, + xs_transaction_t tx_id, unsigned int dom_id, + const char *path, XsNode ***rootp) +{ + int ret = validate_path(op->path, path, dom_id); + if (ret) { + return ret; + } + + /* + * We use *two* NUL terminators at the end of the path, as during the walk + * we will temporarily turn each '/' into a NUL to allow us to use that + * path element for the lookup. + */ + op->path[strlen(op->path) + 1] = '\0'; + op->path[0] = '\0'; + op->inplace = true; + op->mutating = false; + op->create_dirs = false; + op->dom_id = dom_id; + op->s = s; + + if (tx_id == XBT_NULL) { + *rootp = &s->root; + op->new_nr_nodes = s->nr_nodes; + } else { + return ENOENT; + } + + return 0; +} + int xs_impl_read(XenstoreImplState *s, unsigned int dom_id, xs_transaction_t tx_id, const char *path, GByteArray *data) { @@ -24,7 +481,17 @@ int xs_impl_read(XenstoreImplState *s, unsigned int dom_id, * The data GByteArray shall exist, and will be freed by caller. * Just g_byte_array_append() to it. */ - return ENOENT; + struct walk_op op; + XsNode **n; + int ret; + + ret = init_walk_op(s, &op, tx_id, dom_id, path, &n); + if (ret) { + return ret; + } + op.op_fn = xs_node_get_content; + op.op_opaque = data; + return xs_node_walk(n, &op); } int xs_impl_write(XenstoreImplState *s, unsigned int dom_id, @@ -32,9 +499,21 @@ int xs_impl_write(XenstoreImplState *s, unsigned int dom_id, { /* * The data GByteArray shall exist, will be freed by caller. You are - * free to use g_byte_array_steal() and keep the data. + * free to use g_byte_array_steal() and keep the data. Or just ref it. */ - return ENOSYS; + struct walk_op op; + XsNode **n; + int ret; + + ret = init_walk_op(s, &op, tx_id, dom_id, path, &n); + if (ret) { + return ret; + } + op.op_fn = xs_node_add_content; + op.op_opaque = data; + op.mutating = true; + op.create_dirs = true; + return xs_node_walk(n, &op); } int xs_impl_directory(XenstoreImplState *s, unsigned int dom_id, @@ -46,7 +525,18 @@ int xs_impl_directory(XenstoreImplState *s, unsigned int dom_id, * immediately so if you want to change it to (const char *) and keep * them, go ahead and change the caller. */ - return ENOENT; + struct walk_op op; + XsNode **n; + int ret; + + ret = init_walk_op(s, &op, tx_id, dom_id, path, &n); + if (ret) { + return ret; + } + op.op_fn = xs_node_directory; + op.op_opaque = items; + op.op_opaque2 = gencnt; + return xs_node_walk(n, &op); } int xs_impl_transaction_start(XenstoreImplState *s, unsigned int dom_id, @@ -64,7 +554,17 @@ int xs_impl_transaction_end(XenstoreImplState *s, unsigned int dom_id, int xs_impl_rm(XenstoreImplState *s, unsigned int dom_id, xs_transaction_t tx_id, const char *path) { - return ENOSYS; + struct walk_op op; + XsNode **n; + int ret; + + ret = init_walk_op(s, &op, tx_id, dom_id, path, &n); + if (ret) { + return ret; + } + op.op_fn = xs_node_rm; + op.mutating = true; + return xs_node_walk(n, &op); } int xs_impl_get_perms(XenstoreImplState *s, unsigned int dom_id, @@ -102,16 +602,29 @@ int xs_impl_unwatch(XenstoreImplState *s, unsigned int dom_id, const char *path, const char *token, xs_impl_watch_fn fn, void *opaque) { - /* Remove the watch that matches all four criteria */ + /* + * When calling the callback @fn, note that the path should + * precisely match the relative path that the guest provided, even + * if it was a relative path which needed to be prefixed with + * /local/domain/${domid}/ + */ return ENOSYS; } int xs_impl_reset_watches(XenstoreImplState *s, unsigned int dom_id) { + /* Remove the watch that matches all four criteria */ return ENOSYS; } XenstoreImplState *xs_impl_create(void) { - return g_new0(XenstoreImplState, 1); + XenstoreImplState *s = g_new0(XenstoreImplState, 1); + + s->nr_nodes = 1; + s->root = xs_node_new(); +#ifdef XS_NODE_UNIT_TEST + s->root->name = g_strdup("/"); +#endif + return s; } -- cgit v1.1 From 6e1330090d361d5904587b492afaad5041e63b66 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 22 Jan 2023 18:38:23 +0000 Subject: hw/xen: Implement XenStore watches Starts out fairly simple: a hash table of watches based on the path. Except there can be multiple watches on the same path, so the watch ends up being a simple linked list, and the head of that list is in the hash table. Which makes removal a bit of a PITA but it's not so bad; we just special-case "I had to remove the head of the list and now I have to replace it in / remove it from the hash table". And if we don't remove the head, it's a simple linked-list operation. We do need to fire watches on *deleted* nodes, so instead of just a simple xs_node_unref() on the topmost victim, we need to recurse down and fire watches on them all. Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant --- hw/i386/kvm/xenstore_impl.c | 253 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 238 insertions(+), 15 deletions(-) (limited to 'hw') diff --git a/hw/i386/kvm/xenstore_impl.c b/hw/i386/kvm/xenstore_impl.c index 9e10a31..9c23488 100644 --- a/hw/i386/kvm/xenstore_impl.c +++ b/hw/i386/kvm/xenstore_impl.c @@ -37,9 +37,20 @@ typedef struct XsNode { #endif } XsNode; +typedef struct XsWatch { + struct XsWatch *next; + xs_impl_watch_fn *cb; + void *cb_opaque; + char *token; + unsigned int dom_id; + int rel_prefix; +} XsWatch; + struct XenstoreImplState { XsNode *root; unsigned int nr_nodes; + GHashTable *watches; + unsigned int nr_domu_watches; }; static inline XsNode *xs_node_new(void) @@ -146,6 +157,7 @@ struct walk_op { void *op_opaque; void *op_opaque2; + GList *watches; unsigned int dom_id; /* The number of nodes which will exist in the tree if this op succeeds. */ @@ -166,6 +178,35 @@ struct walk_op { bool create_dirs; }; +static void fire_watches(struct walk_op *op, bool parents) +{ + GList *l = NULL; + XsWatch *w; + + if (!op->mutating) { + return; + } + + if (parents) { + l = op->watches; + } + + w = g_hash_table_lookup(op->s->watches, op->path); + while (w || l) { + if (!w) { + /* Fire the parent nodes from 'op' if asked to */ + w = l->data; + l = l->next; + continue; + } + + assert(strlen(op->path) > w->rel_prefix); + w->cb(w->cb_opaque, op->path + w->rel_prefix, w->token); + + w = w->next; + } +} + static int xs_node_add_content(XsNode **n, struct walk_op *op) { GByteArray *data = op->op_opaque; @@ -213,6 +254,8 @@ static int xs_node_get_content(XsNode **n, struct walk_op *op) static int node_rm_recurse(gpointer key, gpointer value, gpointer user_data) { struct walk_op *op = user_data; + int path_len = strlen(op->path); + int key_len = strlen(key); XsNode *n = value; bool this_inplace = op->inplace; @@ -220,12 +263,23 @@ static int node_rm_recurse(gpointer key, gpointer value, gpointer user_data) op->inplace = 0; } + assert(key_len + path_len + 2 <= sizeof(op->path)); + op->path[path_len] = '/'; + memcpy(op->path + path_len + 1, key, key_len + 1); + if (n->children) { g_hash_table_foreach_remove(n->children, node_rm_recurse, op); } op->new_nr_nodes--; /* + * Fire watches on *this* node but not the parents because they are + * going to be deleted too, so the watch will fire for them anyway. + */ + fire_watches(op, false); + op->path[path_len] = '\0'; + + /* * Actually deleting the child here is just an optimisation; if we * don't then the final unref on the topmost victim will just have * to cascade down again repeating all the g_hash_table_foreach() @@ -238,7 +292,7 @@ static int xs_node_rm(XsNode **n, struct walk_op *op) { bool this_inplace = op->inplace; - /* Keep count of the nodes in the subtree which gets deleted. */ + /* Fire watches for, and count, nodes in the subtree which get deleted */ if ((*n)->children) { g_hash_table_foreach_remove((*n)->children, node_rm_recurse, op); } @@ -269,9 +323,11 @@ static int xs_node_walk(XsNode **n, struct walk_op *op) XsNode *old = *n, *child = NULL; bool stole_child = false; bool this_inplace; + XsWatch *watch; int err; namelen = strlen(op->path); + watch = g_hash_table_lookup(op->s->watches, op->path); /* Is there a child, or do we hit the double-NUL termination? */ if (op->path[namelen + 1]) { @@ -292,6 +348,9 @@ static int xs_node_walk(XsNode **n, struct walk_op *op) if (!child_name) { /* This is the actual node on which the operation shall be performed */ err = op->op_fn(n, op); + if (!err) { + fire_watches(op, true); + } goto out; } @@ -334,10 +393,23 @@ static int xs_node_walk(XsNode **n, struct walk_op *op) } /* + * If there's a watch on this node, add it to the list to be fired + * (with the correct full pathname for the modified node) at the end. + */ + if (watch) { + op->watches = g_list_append(op->watches, watch); + } + + /* * Except for the temporary child-stealing as noted, our node has not * changed yet. We don't yet know the overall operation will complete. */ err = xs_node_walk(&child, op); + + if (watch) { + op->watches = g_list_remove(op->watches, watch); + } + if (err || !op->mutating) { if (stole_child) { /* Put it back as it was. */ @@ -375,6 +447,7 @@ static int xs_node_walk(XsNode **n, struct walk_op *op) out: op->path[namelen] = '\0'; if (!namelen) { + assert(!op->watches); /* * On completing the recursion back up the path walk and reaching the * top, assign the new node count if the operation was successful. @@ -457,6 +530,7 @@ static int init_walk_op(XenstoreImplState *s, struct walk_op *op, * path element for the lookup. */ op->path[strlen(op->path) + 1] = '\0'; + op->watches = NULL; op->path[0] = '\0'; op->inplace = true; op->mutating = false; @@ -589,38 +663,187 @@ int xs_impl_set_perms(XenstoreImplState *s, unsigned int dom_id, int xs_impl_watch(XenstoreImplState *s, unsigned int dom_id, const char *path, const char *token, xs_impl_watch_fn fn, void *opaque) { - /* - * When calling the callback @fn, note that the path should - * precisely match the relative path that the guest provided, even - * if it was a relative path which needed to be prefixed with - * /local/domain/${domid}/ - */ - return ENOSYS; + char abspath[XENSTORE_ABS_PATH_MAX + 1]; + XsWatch *w, *l; + int ret; + + ret = validate_path(abspath, path, dom_id); + if (ret) { + return ret; + } + + /* Check for duplicates */ + l = w = g_hash_table_lookup(s->watches, abspath); + while (w) { + if (!g_strcmp0(token, w->token) && opaque == w->cb_opaque && + fn == w->cb && dom_id == w->dom_id) { + return EEXIST; + } + w = w->next; + } + + if (dom_id && s->nr_domu_watches >= XS_MAX_WATCHES) { + return E2BIG; + } + + w = g_new0(XsWatch, 1); + w->token = g_strdup(token); + w->cb = fn; + w->cb_opaque = opaque; + w->dom_id = dom_id; + w->rel_prefix = strlen(abspath) - strlen(path); + + /* l was looked up above when checking for duplicates */ + if (l) { + w->next = l->next; + l->next = w; + } else { + g_hash_table_insert(s->watches, g_strdup(abspath), w); + } + if (dom_id) { + s->nr_domu_watches++; + } + + /* A new watch should fire immediately */ + fn(opaque, path, token); + + return 0; +} + +static XsWatch *free_watch(XenstoreImplState *s, XsWatch *w) +{ + XsWatch *next = w->next; + + if (w->dom_id) { + assert(s->nr_domu_watches); + s->nr_domu_watches--; + } + + g_free(w->token); + g_free(w); + + return next; } int xs_impl_unwatch(XenstoreImplState *s, unsigned int dom_id, const char *path, const char *token, xs_impl_watch_fn fn, void *opaque) { + char abspath[XENSTORE_ABS_PATH_MAX + 1]; + XsWatch *w, **l; + int ret; + + ret = validate_path(abspath, path, dom_id); + if (ret) { + return ret; + } + + w = g_hash_table_lookup(s->watches, abspath); + if (!w) { + return ENOENT; + } + /* - * When calling the callback @fn, note that the path should - * precisely match the relative path that the guest provided, even - * if it was a relative path which needed to be prefixed with - * /local/domain/${domid}/ + * The hash table contains the first element of a list of + * watches. Removing the first element in the list is a + * special case because we have to update the hash table to + * point to the next (or remove it if there's nothing left). */ - return ENOSYS; + if (!g_strcmp0(token, w->token) && fn == w->cb && opaque == w->cb_opaque && + dom_id == w->dom_id) { + if (w->next) { + /* Insert the previous 'next' into the hash table */ + g_hash_table_insert(s->watches, g_strdup(abspath), w->next); + } else { + /* Nothing left; remove from hash table */ + g_hash_table_remove(s->watches, abspath); + } + free_watch(s, w); + return 0; + } + + /* + * We're all done messing with the hash table because the element + * it points to has survived the cull. Now it's just a simple + * linked list removal operation. + */ + for (l = &w->next; *l; l = &w->next) { + w = *l; + + if (!g_strcmp0(token, w->token) && fn == w->cb && + opaque != w->cb_opaque && dom_id == w->dom_id) { + *l = free_watch(s, w); + return 0; + } + } + + return ENOENT; } int xs_impl_reset_watches(XenstoreImplState *s, unsigned int dom_id) { - /* Remove the watch that matches all four criteria */ - return ENOSYS; + char **watch_paths; + guint nr_watch_paths; + guint i; + + watch_paths = (char **)g_hash_table_get_keys_as_array(s->watches, + &nr_watch_paths); + + for (i = 0; i < nr_watch_paths; i++) { + XsWatch *w1 = g_hash_table_lookup(s->watches, watch_paths[i]); + XsWatch *w2, *w, **l; + + /* + * w1 is the original list. The hash table has this pointer. + * w2 is the head of our newly-filtered list. + * w and l are temporary for processing. w is somewhat redundant + * with *l but makes my eyes bleed less. + */ + + w = w2 = w1; + l = &w; + while (w) { + if (w->dom_id == dom_id) { + /* If we're freeing the head of the list, bump w2 */ + if (w2 == w) { + w2 = w->next; + } + *l = free_watch(s, w); + } else { + l = &w->next; + } + w = *l; + } + /* + * If the head of the list survived the cull, we don't need to + * touch the hash table and we're done with this path. Else... + */ + if (w1 != w2) { + g_hash_table_steal(s->watches, watch_paths[i]); + + /* + * It was already freed. (Don't worry, this whole thing is + * single-threaded and nobody saw it in the meantime). And + * having *stolen* it, we now own the watch_paths[i] string + * so if we don't give it back to the hash table, we need + * to free it. + */ + if (w2) { + g_hash_table_insert(s->watches, watch_paths[i], w2); + } else { + g_free(watch_paths[i]); + } + } + } + g_free(watch_paths); + return 0; } XenstoreImplState *xs_impl_create(void) { XenstoreImplState *s = g_new0(XenstoreImplState, 1); + s->watches = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL); s->nr_nodes = 1; s->root = xs_node_new(); #ifdef XS_NODE_UNIT_TEST -- cgit v1.1 From 7248b87cb0292a13c0309a4aba9f5daf7a76d297 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 22 Jan 2023 22:05:37 +0000 Subject: hw/xen: Implement XenStore transactions Given that the whole thing supported copy on write from the beginning, transactions end up being fairly simple. On starting a transaction, just take a ref of the existing root; swap it back in on a successful commit. The main tree has a transaction ID too, and we keep a record of the last transaction ID given out. if the main tree is ever modified when it isn't the latest, it gets a new transaction ID. A commit can only succeed if the main tree hasn't moved on since it was forked. Strictly speaking, the XenStore protocol allows a transaction to succeed as long as nothing *it* read or wrote has changed in the interim, but no implementations do that; *any* change is sufficient to abort a transaction. This does not yet fire watches on the changed nodes on a commit. That bit is more fun and will come in a follow-on commit. Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant --- hw/i386/kvm/xenstore_impl.c | 150 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 144 insertions(+), 6 deletions(-) (limited to 'hw') diff --git a/hw/i386/kvm/xenstore_impl.c b/hw/i386/kvm/xenstore_impl.c index 9c23488..0812e36 100644 --- a/hw/i386/kvm/xenstore_impl.c +++ b/hw/i386/kvm/xenstore_impl.c @@ -46,13 +46,56 @@ typedef struct XsWatch { int rel_prefix; } XsWatch; +typedef struct XsTransaction { + XsNode *root; + unsigned int nr_nodes; + unsigned int base_tx; + unsigned int tx_id; + unsigned int dom_id; +} XsTransaction; + struct XenstoreImplState { XsNode *root; unsigned int nr_nodes; GHashTable *watches; unsigned int nr_domu_watches; + GHashTable *transactions; + unsigned int nr_domu_transactions; + unsigned int root_tx; + unsigned int last_tx; }; + +static void nobble_tx(gpointer key, gpointer value, gpointer user_data) +{ + unsigned int *new_tx_id = user_data; + XsTransaction *tx = value; + + if (tx->base_tx == *new_tx_id) { + /* Transactions based on XBT_NULL will always fail */ + tx->base_tx = XBT_NULL; + } +} + +static inline unsigned int next_tx(struct XenstoreImplState *s) +{ + unsigned int tx_id; + + /* Find the next TX id which isn't either XBT_NULL or in use. */ + do { + tx_id = ++s->last_tx; + } while (tx_id == XBT_NULL || tx_id == s->root_tx || + g_hash_table_lookup(s->transactions, GINT_TO_POINTER(tx_id))); + + /* + * It is vanishingly unlikely, but ensure that no outstanding transaction + * is based on the (previous incarnation of the) newly-allocated TX id. + */ + g_hash_table_foreach(s->transactions, nobble_tx, &tx_id); + + return tx_id; +} + static inline XsNode *xs_node_new(void) { XsNode *n = g_new0(XsNode, 1); @@ -159,6 +202,7 @@ struct walk_op { GList *watches; unsigned int dom_id; + unsigned int tx_id; /* The number of nodes which will exist in the tree if this op succeeds. */ unsigned int new_nr_nodes; @@ -176,6 +220,7 @@ struct walk_op { bool inplace; bool mutating; bool create_dirs; + bool in_transaction; }; static void fire_watches(struct walk_op *op, bool parents) @@ -183,7 +228,7 @@ static void fire_watches(struct walk_op *op, bool parents) GList *l = NULL; XsWatch *w; - if (!op->mutating) { + if (!op->mutating || op->in_transaction) { return; } @@ -450,10 +495,23 @@ static int xs_node_walk(XsNode **n, struct walk_op *op) assert(!op->watches); /* * On completing the recursion back up the path walk and reaching the - * top, assign the new node count if the operation was successful. + * top, assign the new node count if the operation was successful. If + * the main tree was changed, bump its tx ID so that outstanding + * transactions correctly fail. But don't bump it every time; only + * if it makes a difference. */ if (!err && op->mutating) { - op->s->nr_nodes = op->new_nr_nodes; + if (!op->in_transaction) { + if (op->s->root_tx != op->s->last_tx) { + op->s->root_tx = next_tx(op->s); + } + op->s->nr_nodes = op->new_nr_nodes; + } else { + XsTransaction *tx = g_hash_table_lookup(op->s->transactions, + GINT_TO_POINTER(op->tx_id)); + assert(tx); + tx->nr_nodes = op->new_nr_nodes; + } } } return err; @@ -535,14 +593,23 @@ static int init_walk_op(XenstoreImplState *s, struct walk_op *op, op->inplace = true; op->mutating = false; op->create_dirs = false; + op->in_transaction = false; op->dom_id = dom_id; + op->tx_id = tx_id; op->s = s; if (tx_id == XBT_NULL) { *rootp = &s->root; op->new_nr_nodes = s->nr_nodes; } else { - return ENOENT; + XsTransaction *tx = g_hash_table_lookup(s->transactions, + GINT_TO_POINTER(tx_id)); + if (!tx) { + return ENOENT; + } + *rootp = &tx->root; + op->new_nr_nodes = tx->nr_nodes; + op->in_transaction = true; } return 0; @@ -616,13 +683,71 @@ int xs_impl_directory(XenstoreImplState *s, unsigned int dom_id, int xs_impl_transaction_start(XenstoreImplState *s, unsigned int dom_id, xs_transaction_t *tx_id) { - return ENOSYS; + XsTransaction *tx; + + if (*tx_id != XBT_NULL) { + return EINVAL; + } + + if (dom_id && s->nr_domu_transactions >= XS_MAX_TRANSACTIONS) { + return ENOSPC; + } + + tx = g_new0(XsTransaction, 1); + + tx->nr_nodes = s->nr_nodes; + tx->tx_id = next_tx(s); + tx->base_tx = s->root_tx; + tx->root = xs_node_ref(s->root); + tx->dom_id = dom_id; + + g_hash_table_insert(s->transactions, GINT_TO_POINTER(tx->tx_id), tx); + if (dom_id) { + s->nr_domu_transactions++; + } + *tx_id = tx->tx_id; + return 0; +} + +static int transaction_commit(XenstoreImplState *s, XsTransaction *tx) +{ + if (s->root_tx != tx->base_tx) { + return EAGAIN; + } + xs_node_unref(s->root); + s->root = tx->root; + tx->root = NULL; + s->root_tx = tx->tx_id; + s->nr_nodes = tx->nr_nodes; + + /* + * XX: Walk the new root and fire watches on any node which has a + * refcount of one (which is therefore unique to this transaction). + */ + return 0; } int xs_impl_transaction_end(XenstoreImplState *s, unsigned int dom_id, xs_transaction_t tx_id, bool commit) { - return ENOSYS; + int ret = 0; + XsTransaction *tx = g_hash_table_lookup(s->transactions, + GINT_TO_POINTER(tx_id)); + + if (!tx || tx->dom_id != dom_id) { + return ENOENT; + } + + if (commit) { + ret = transaction_commit(s, tx); + } + + g_hash_table_remove(s->transactions, GINT_TO_POINTER(tx_id)); + if (dom_id) { + assert(s->nr_domu_transactions); + s->nr_domu_transactions--; + } + return ret; } int xs_impl_rm(XenstoreImplState *s, unsigned int dom_id, @@ -839,15 +964,28 @@ int xs_impl_reset_watches(XenstoreImplState *s, unsigned int dom_id) return 0; } +static void xs_tx_free(void *_tx) +{ + XsTransaction *tx = _tx; + if (tx->root) { + xs_node_unref(tx->root); + } + g_free(tx); +} + XenstoreImplState *xs_impl_create(void) { XenstoreImplState *s = g_new0(XenstoreImplState, 1); s->watches = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL); + s->transactions = g_hash_table_new_full(g_direct_hash, g_direct_equal, + NULL, xs_tx_free); s->nr_nodes = 1; s->root = xs_node_new(); #ifdef XS_NODE_UNIT_TEST s->root->name = g_strdup("/"); #endif + + s->root_tx = s->last_tx = 1; return s; } -- cgit v1.1 From 7cabbdb70df64fc7b0ed05f3e6aa4e1990eadc77 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 22 Jan 2023 22:59:49 +0000 Subject: hw/xen: Watches on XenStore transactions Firing watches on the nodes that still exist is relatively easy; just walk the tree and look at the nodes with refcount of one. Firing watches on *deleted* nodes is more fun. We add 'modified_in_tx' and 'deleted_in_tx' flags to each node. Nodes with those flags cannot be shared, as they will always be unique to the transaction in which they were created. When xs_node_walk would need to *create* a node as scaffolding and it encounters a deleted_in_tx node, it can resurrect it simply by clearing its deleted_in_tx flag. If that node originally had any *data*, they're gone, and the modified_in_tx flag will have been set when it was first deleted. We then attempt to send appropriate watches when the transaction is committed, properly delete the deleted_in_tx nodes, and remove the modified_in_tx flag from the others. Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant --- hw/i386/kvm/xenstore_impl.c | 151 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 150 insertions(+), 1 deletion(-) (limited to 'hw') diff --git a/hw/i386/kvm/xenstore_impl.c b/hw/i386/kvm/xenstore_impl.c index 0812e36..60f42f6 100644 --- a/hw/i386/kvm/xenstore_impl.c +++ b/hw/i386/kvm/xenstore_impl.c @@ -32,6 +32,8 @@ typedef struct XsNode { GByteArray *content; GHashTable *children; uint64_t gencnt; + bool deleted_in_tx; + bool modified_in_tx; #ifdef XS_NODE_UNIT_TEST gchar *name; /* debug only */ #endif @@ -153,6 +155,13 @@ static XsNode *xs_node_copy(XsNode *old) XsNode *n = xs_node_new(); n->gencnt = old->gencnt; + +#ifdef XS_NODE_UNIT_TEST + if (n->name) { + n->name = g_strdup(old->name); + } +#endif + if (old->children) { n->children = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, (GDestroyNotify)xs_node_unref); @@ -221,6 +230,9 @@ struct walk_op { bool mutating; bool create_dirs; bool in_transaction; + + /* Tracking during recursion so we know which is first. */ + bool deleted_in_tx; }; static void fire_watches(struct walk_op *op, bool parents) @@ -277,6 +289,9 @@ static int xs_node_add_content(XsNode **n, struct walk_op *op) g_byte_array_unref((*n)->content); } (*n)->content = g_byte_array_ref(data); + if (op->tx_id != XBT_NULL) { + (*n)->modified_in_tx = true; + } return 0; } @@ -333,10 +348,62 @@ static int node_rm_recurse(gpointer key, gpointer value, gpointer user_data) return this_inplace; } +static XsNode *xs_node_copy_deleted(XsNode *old, struct walk_op *op); +static void copy_deleted_recurse(gpointer key, gpointer value, + gpointer user_data) +{ + struct walk_op *op = user_data; + GHashTable *siblings = op->op_opaque2; + XsNode *n = xs_node_copy_deleted(value, op); + + /* + * Reinsert the deleted_in_tx copy of the node into the parent's + * 'children' hash table. Having stashed it from op->op_opaque2 + * before the recursive call to xs_node_copy_deleted() scribbled + * over it. + */ + g_hash_table_insert(siblings, g_strdup(key), n); +} + +static XsNode *xs_node_copy_deleted(XsNode *old, struct walk_op *op) +{ + XsNode *n = xs_node_new(); + + n->gencnt = old->gencnt; + +#ifdef XS_NODE_UNIT_TEST + if (old->name) { + n->name = g_strdup(old->name); + } +#endif + + if (old->children) { + n->children = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, + (GDestroyNotify)xs_node_unref); + op->op_opaque2 = n->children; + g_hash_table_foreach(old->children, copy_deleted_recurse, op); + } + n->deleted_in_tx = true; + /* If it gets resurrected we only fire a watch if it lost its content */ + if (old->content) { + n->modified_in_tx = true; + } + op->new_nr_nodes--; + return n; +} + static int xs_node_rm(XsNode **n, struct walk_op *op) { bool this_inplace = op->inplace; + if (op->tx_id != XBT_NULL) { + /* It's not trivial to do inplace handling for this one */ + XsNode *old = *n; + *n = xs_node_copy_deleted(old, op); + xs_node_unref(old); + return 0; + } + /* Fire watches for, and count, nodes in the subtree which get deleted */ if ((*n)->children) { g_hash_table_foreach_remove((*n)->children, node_rm_recurse, op); @@ -408,6 +475,10 @@ static int xs_node_walk(XsNode **n, struct walk_op *op) } if (child) { + if (child->deleted_in_tx) { + assert(child->ref == 1); + /* Cannot actually set child->deleted_in_tx = false until later */ + } xs_node_ref(child); /* * Now we own it too. But if we can modify inplace, that's going to @@ -476,6 +547,15 @@ static int xs_node_walk(XsNode **n, struct walk_op *op) } /* + * If we resurrected a deleted_in_tx node, we can mark it as no longer + * deleted now that we know the overall operation has succeeded. + */ + if (op->create_dirs && child && child->deleted_in_tx) { + op->new_nr_nodes++; + child->deleted_in_tx = false; + } + + /* * The child may be NULL here, for a remove operation. Either way, * xs_node_add_child() will do the right thing and return a value * indicating whether it changed the parent's hash table or not. @@ -709,8 +789,69 @@ int xs_impl_transaction_start(XenstoreImplState *s, unsigned int dom_id, return 0; } +static gboolean tx_commit_walk(gpointer key, gpointer value, + gpointer user_data) +{ + struct walk_op *op = user_data; + int path_len = strlen(op->path); + int key_len = strlen(key); + bool fire_parents = true; + XsWatch *watch; + XsNode *n = value; + + if (n->ref != 1) { + return false; + } + + if (n->deleted_in_tx) { + /* + * We fire watches on our parents if we are the *first* node + * to be deleted (the topmost one). This matches the behaviour + * when deleting in the live tree. + */ + fire_parents = !op->deleted_in_tx; + + /* Only used on the way down so no need to clear it later */ + op->deleted_in_tx = true; + } + + assert(key_len + path_len + 2 <= sizeof(op->path)); + op->path[path_len] = '/'; + memcpy(op->path + path_len + 1, key, key_len + 1); + + watch = g_hash_table_lookup(op->s->watches, op->path); + if (watch) { + op->watches = g_list_append(op->watches, watch); + } + + if (n->children) { + g_hash_table_foreach_remove(n->children, tx_commit_walk, op); + } + + if (watch) { + op->watches = g_list_remove(op->watches, watch); + } + + /* + * Don't fire watches if this node was only copied because a + * descendent was changed. The modified_in_tx flag indicates the + * ones which were really changed. + */ + if (n->modified_in_tx || n->deleted_in_tx) { + fire_watches(op, fire_parents); + n->modified_in_tx = false; + } + op->path[path_len] = '\0'; + + /* Deleted nodes really do get expunged when we commit */ + return n->deleted_in_tx; +} + static int transaction_commit(XenstoreImplState *s, XsTransaction *tx) { + struct walk_op op; + XsNode **n; + if (s->root_tx != tx->base_tx) { return EAGAIN; } @@ -720,10 +861,18 @@ static int transaction_commit(XenstoreImplState *s, XsTransaction *tx) s->root_tx = tx->tx_id; s->nr_nodes = tx->nr_nodes; + init_walk_op(s, &op, XBT_NULL, tx->dom_id, "/", &n); + op.deleted_in_tx = false; + op.mutating = true; + /* - * XX: Walk the new root and fire watches on any node which has a + * Walk the new root and fire watches on any node which has a * refcount of one (which is therefore unique to this transaction). */ + if (s->root->children) { + g_hash_table_foreach_remove(s->root->children, tx_commit_walk, &op); + } + return 0; } -- cgit v1.1 From be1934dfefe74aa1b978c0cda64c2b6282301196 Mon Sep 17 00:00:00 2001 From: Paul Durrant Date: Mon, 23 Jan 2023 16:21:16 +0000 Subject: hw/xen: Implement XenStore permissions Store perms as a GList of strings, check permissions. Signed-off-by: Paul Durrant Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant --- hw/i386/kvm/xen_xenstore.c | 2 +- hw/i386/kvm/xenstore_impl.c | 259 +++++++++++++++++++++++++++++++++++++++++--- hw/i386/kvm/xenstore_impl.h | 8 +- 3 files changed, 249 insertions(+), 20 deletions(-) (limited to 'hw') diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c index 64d8f1a..3b409e3 100644 --- a/hw/i386/kvm/xen_xenstore.c +++ b/hw/i386/kvm/xen_xenstore.c @@ -98,7 +98,7 @@ static void xen_xenstore_realize(DeviceState *dev, Error **errp) aio_set_fd_handler(qemu_get_aio_context(), xen_be_evtchn_fd(s->eh), true, xen_xenstore_event, NULL, NULL, NULL, s); - s->impl = xs_impl_create(); + s->impl = xs_impl_create(xen_domid); } static bool xen_xenstore_is_needed(void *opaque) diff --git a/hw/i386/kvm/xenstore_impl.c b/hw/i386/kvm/xenstore_impl.c index 60f42f6..8a2053e 100644 --- a/hw/i386/kvm/xenstore_impl.c +++ b/hw/i386/kvm/xenstore_impl.c @@ -12,6 +12,8 @@ #include "qemu/osdep.h" #include "qom/object.h" +#include "hw/xen/xen.h" + #include "xen_xenstore.h" #include "xenstore_impl.h" @@ -30,6 +32,7 @@ typedef struct XsNode { uint32_t ref; GByteArray *content; + GList *perms; GHashTable *children; uint64_t gencnt; bool deleted_in_tx; @@ -133,6 +136,9 @@ static inline void xs_node_unref(XsNode *n) if (n->content) { g_byte_array_unref(n->content); } + if (n->perms) { + g_list_free_full(n->perms, g_free); + } if (n->children) { g_hash_table_unref(n->children); } @@ -144,8 +150,51 @@ static inline void xs_node_unref(XsNode *n) g_free(n); } +char *xs_perm_as_string(unsigned int perm, unsigned int domid) +{ + char letter; + + switch (perm) { + case XS_PERM_READ | XS_PERM_WRITE: + letter = 'b'; + break; + case XS_PERM_READ: + letter = 'r'; + break; + case XS_PERM_WRITE: + letter = 'w'; + break; + case XS_PERM_NONE: + default: + letter = 'n'; + break; + } + + return g_strdup_printf("%c%u", letter, domid); +} + +static gpointer do_perm_copy(gconstpointer src, gpointer user_data) +{ + return g_strdup(src); +} + +static XsNode *xs_node_create(const char *name, GList *perms) +{ + XsNode *n = xs_node_new(); + +#ifdef XS_NODE_UNIT_TEST + if (name) { + n->name = g_strdup(name); + } +#endif + + n->perms = g_list_copy_deep(perms, do_perm_copy, NULL); + + return n; +} + /* For copying from one hash table to another using g_hash_table_foreach() */ -static void do_insert(gpointer key, gpointer value, gpointer user_data) +static void do_child_insert(gpointer key, gpointer value, gpointer user_data) { g_hash_table_insert(user_data, g_strdup(key), xs_node_ref(value)); } @@ -162,12 +211,16 @@ static XsNode *xs_node_copy(XsNode *old) } #endif + assert(old); if (old->children) { n->children = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, (GDestroyNotify)xs_node_unref); - g_hash_table_foreach(old->children, do_insert, n->children); + g_hash_table_foreach(old->children, do_child_insert, n->children); } - if (old && old->content) { + if (old->perms) { + n->perms = g_list_copy_deep(old->perms, do_perm_copy, NULL); + } + if (old->content) { n->content = g_byte_array_ref(old->content); } return n; @@ -383,6 +436,9 @@ static XsNode *xs_node_copy_deleted(XsNode *old, struct walk_op *op) op->op_opaque2 = n->children; g_hash_table_foreach(old->children, copy_deleted_recurse, op); } + if (old->perms) { + n->perms = g_list_copy_deep(old->perms, do_perm_copy, NULL); + } n->deleted_in_tx = true; /* If it gets resurrected we only fire a watch if it lost its content */ if (old->content) { @@ -417,6 +473,104 @@ static int xs_node_rm(XsNode **n, struct walk_op *op) return 0; } +static int xs_node_get_perms(XsNode **n, struct walk_op *op) +{ + GList **perms = op->op_opaque; + + assert(op->inplace); + assert(*n); + + *perms = g_list_copy_deep((*n)->perms, do_perm_copy, NULL); + return 0; +} + +static void parse_perm(const char *perm, char *letter, unsigned int *dom_id) +{ + unsigned int n = sscanf(perm, "%c%u", letter, dom_id); + + assert(n == 2); +} + +static bool can_access(unsigned int dom_id, GList *perms, const char *letters) +{ + unsigned int i, n; + char perm_letter; + unsigned int perm_dom_id; + bool access; + + if (dom_id == 0) { + return true; + } + + n = g_list_length(perms); + assert(n >= 1); + + /* + * The dom_id of the first perm is the owner, and the owner always has + * read-write access. + */ + parse_perm(g_list_nth_data(perms, 0), &perm_letter, &perm_dom_id); + if (dom_id == perm_dom_id) { + return true; + } + + /* + * The letter of the first perm specified the default access for all other + * domains. + */ + access = !!strchr(letters, perm_letter); + for (i = 1; i < n; i++) { + parse_perm(g_list_nth_data(perms, i), &perm_letter, &perm_dom_id); + if (dom_id != perm_dom_id) { + continue; + } + access = !!strchr(letters, perm_letter); + } + + return access; +} + +static int xs_node_set_perms(XsNode **n, struct walk_op *op) +{ + GList *perms = op->op_opaque; + + if (op->dom_id) { + unsigned int perm_dom_id; + char perm_letter; + + /* A guest may not change permissions on nodes it does not own */ + if (!can_access(op->dom_id, (*n)->perms, "")) { + return EPERM; + } + + /* A guest may not change the owner of a node it owns. */ + parse_perm(perms->data, &perm_letter, &perm_dom_id); + if (perm_dom_id != op->dom_id) { + return EPERM; + } + + if (g_list_length(perms) > XS_MAX_PERMS_PER_NODE) { + return ENOSPC; + } + } + + /* We *are* the node to be written. Either this or a copy. */ + if (!op->inplace) { + XsNode *old = *n; + *n = xs_node_copy(old); + xs_node_unref(old); + } + + if ((*n)->perms) { + g_list_free_full((*n)->perms, g_free); + } + (*n)->perms = g_list_copy_deep(perms, do_perm_copy, NULL); + if (op->tx_id != XBT_NULL) { + (*n)->modified_in_tx = true; + } + return 0; +} + /* * Passed a full reference in *n which it may free if it needs to COW. * @@ -458,6 +612,13 @@ static int xs_node_walk(XsNode **n, struct walk_op *op) } if (!child_name) { + const char *letters = op->mutating ? "wb" : "rb"; + + if (!can_access(op->dom_id, old->perms, letters)) { + err = EACCES; + goto out; + } + /* This is the actual node on which the operation shall be performed */ err = op->op_fn(n, op); if (!err) { @@ -491,12 +652,20 @@ static int xs_node_walk(XsNode **n, struct walk_op *op) stole_child = true; } } else if (op->create_dirs) { + assert(op->mutating); + + if (!can_access(op->dom_id, old->perms, "wb")) { + err = EACCES; + goto out; + } + if (op->dom_id && op->new_nr_nodes >= XS_MAX_DOMAIN_NODES) { err = ENOSPC; goto out; } + + child = xs_node_create(child_name, old->perms); op->new_nr_nodes++; - child = xs_node_new(); /* * If we're creating a new child, we can clearly modify it (and its @@ -918,20 +1087,73 @@ int xs_impl_rm(XenstoreImplState *s, unsigned int dom_id, int xs_impl_get_perms(XenstoreImplState *s, unsigned int dom_id, xs_transaction_t tx_id, const char *path, GList **perms) { - /* - * The perms are (char *) in the wire format to be - * freed by the caller. - */ - return ENOSYS; + struct walk_op op; + XsNode **n; + int ret; + + ret = init_walk_op(s, &op, tx_id, dom_id, path, &n); + if (ret) { + return ret; + } + op.op_fn = xs_node_get_perms; + op.op_opaque = perms; + return xs_node_walk(n, &op); +} + +static void is_valid_perm(gpointer data, gpointer user_data) +{ + char *perm = data; + bool *valid = user_data; + char letter; + unsigned int dom_id; + + if (!*valid) { + return; + } + + if (sscanf(perm, "%c%u", &letter, &dom_id) != 2) { + *valid = false; + return; + } + + switch (letter) { + case 'n': + case 'r': + case 'w': + case 'b': + break; + + default: + *valid = false; + break; + } } int xs_impl_set_perms(XenstoreImplState *s, unsigned int dom_id, xs_transaction_t tx_id, const char *path, GList *perms) { - /* - * The perms are (const char *) in the wire format. - */ - return ENOSYS; + struct walk_op op; + XsNode **n; + bool valid = true; + int ret; + + if (!g_list_length(perms)) { + return EINVAL; + } + + g_list_foreach(perms, is_valid_perm, &valid); + if (!valid) { + return EINVAL; + } + + ret = init_walk_op(s, &op, tx_id, dom_id, path, &n); + if (ret) { + return ret; + } + op.op_fn = xs_node_set_perms; + op.op_opaque = perms; + op.mutating = true; + return xs_node_walk(n, &op); } int xs_impl_watch(XenstoreImplState *s, unsigned int dom_id, const char *path, @@ -1122,18 +1344,19 @@ static void xs_tx_free(void *_tx) g_free(tx); } -XenstoreImplState *xs_impl_create(void) +XenstoreImplState *xs_impl_create(unsigned int dom_id) { XenstoreImplState *s = g_new0(XenstoreImplState, 1); + GList *perms; s->watches = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL); s->transactions = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, xs_tx_free); + + perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, 0)); + s->root = xs_node_create("/", perms); + g_list_free_full(perms, g_free); s->nr_nodes = 1; - s->root = xs_node_new(); -#ifdef XS_NODE_UNIT_TEST - s->root->name = g_strdup("/"); -#endif s->root_tx = s->last_tx = 1; return s; diff --git a/hw/i386/kvm/xenstore_impl.h b/hw/i386/kvm/xenstore_impl.h index beb7b29..2f81251 100644 --- a/hw/i386/kvm/xenstore_impl.h +++ b/hw/i386/kvm/xenstore_impl.h @@ -16,9 +16,15 @@ typedef uint32_t xs_transaction_t; #define XBT_NULL 0 +#define XS_PERM_NONE 0x00 +#define XS_PERM_READ 0x01 +#define XS_PERM_WRITE 0x02 + typedef struct XenstoreImplState XenstoreImplState; -XenstoreImplState *xs_impl_create(void); +XenstoreImplState *xs_impl_create(unsigned int dom_id); + +char *xs_perm_as_string(unsigned int perm, unsigned int domid); /* * These functions return *positive* error numbers. This is a little -- cgit v1.1 From 766804b101d7e452ad85995c231a5c3454f4e25b Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 31 Jan 2023 15:00:54 +0000 Subject: hw/xen: Implement core serialize/deserialize methods for xenstore_impl This implements the basic migration support in the back end, with unit tests that give additional confidence in the node-counting already in the tree. However, the existing PV back ends like xen-disk don't support migration yet. They will reset the ring and fail to continue where they left off. We will fix that in future, but not in time for the 8.0 release. Since there's also an open question of whether we want to serialize the full XenStore or only the guest-owned nodes in /local/domain/${domid}, for now just mark the XenStore device as unmigratable. Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant --- hw/i386/kvm/xen_xenstore.c | 26 +- hw/i386/kvm/xenstore_impl.c | 574 +++++++++++++++++++++++++++++++++++++++++++- hw/i386/kvm/xenstore_impl.h | 5 + 3 files changed, 599 insertions(+), 6 deletions(-) (limited to 'hw') diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c index 3b409e3..520422b 100644 --- a/hw/i386/kvm/xen_xenstore.c +++ b/hw/i386/kvm/xen_xenstore.c @@ -66,6 +66,9 @@ struct XenXenstoreState { evtchn_port_t guest_port; evtchn_port_t be_port; struct xenevtchn_handle *eh; + + uint8_t *impl_state; + uint32_t impl_state_size; }; struct XenXenstoreState *xen_xenstore_singleton; @@ -109,16 +112,26 @@ static bool xen_xenstore_is_needed(void *opaque) static int xen_xenstore_pre_save(void *opaque) { XenXenstoreState *s = opaque; + GByteArray *save; if (s->eh) { s->guest_port = xen_be_evtchn_get_guest_port(s->eh); } + + g_free(s->impl_state); + save = xs_impl_serialize(s->impl); + s->impl_state = save->data; + s->impl_state_size = save->len; + g_byte_array_free(save, false); + return 0; } static int xen_xenstore_post_load(void *opaque, int ver) { XenXenstoreState *s = opaque; + GByteArray *save; + int ret; /* * As qemu/dom0, rebind to the guest's port. The Windows drivers may @@ -135,11 +148,18 @@ static int xen_xenstore_post_load(void *opaque, int ver) } s->be_port = be_port; } - return 0; + + save = g_byte_array_new_take(s->impl_state, s->impl_state_size); + s->impl_state = NULL; + s->impl_state_size = 0; + + ret = xs_impl_deserialize(s->impl, save, xen_domid, fire_watch_cb, s); + return ret; } static const VMStateDescription xen_xenstore_vmstate = { .name = "xen_xenstore", + .unmigratable = 1, /* The PV back ends don't migrate yet */ .version_id = 1, .minimum_version_id = 1, .needed = xen_xenstore_is_needed, @@ -155,6 +175,10 @@ static const VMStateDescription xen_xenstore_vmstate = { VMSTATE_BOOL(rsp_pending, XenXenstoreState), VMSTATE_UINT32(guest_port, XenXenstoreState), VMSTATE_BOOL(fatal_error, XenXenstoreState), + VMSTATE_UINT32(impl_state_size, XenXenstoreState), + VMSTATE_VARRAY_UINT32_ALLOC(impl_state, XenXenstoreState, + impl_state_size, 0, + vmstate_info_uint8, uint8_t), VMSTATE_END_OF_LIST() } }; diff --git a/hw/i386/kvm/xenstore_impl.c b/hw/i386/kvm/xenstore_impl.c index 8a2053e..305fe75 100644 --- a/hw/i386/kvm/xenstore_impl.c +++ b/hw/i386/kvm/xenstore_impl.c @@ -37,6 +37,7 @@ typedef struct XsNode { uint64_t gencnt; bool deleted_in_tx; bool modified_in_tx; + unsigned int serialized_tx; #ifdef XS_NODE_UNIT_TEST gchar *name; /* debug only */ #endif @@ -68,6 +69,7 @@ struct XenstoreImplState { unsigned int nr_domu_transactions; unsigned int root_tx; unsigned int last_tx; + bool serialized; }; @@ -1156,8 +1158,10 @@ int xs_impl_set_perms(XenstoreImplState *s, unsigned int dom_id, return xs_node_walk(n, &op); } -int xs_impl_watch(XenstoreImplState *s, unsigned int dom_id, const char *path, - const char *token, xs_impl_watch_fn fn, void *opaque) +static int do_xs_impl_watch(XenstoreImplState *s, unsigned int dom_id, + const char *path, const char *token, + xs_impl_watch_fn fn, void *opaque) + { char abspath[XENSTORE_ABS_PATH_MAX + 1]; XsWatch *w, *l; @@ -1200,12 +1204,22 @@ int xs_impl_watch(XenstoreImplState *s, unsigned int dom_id, const char *path, s->nr_domu_watches++; } - /* A new watch should fire immediately */ - fn(opaque, path, token); - return 0; } +int xs_impl_watch(XenstoreImplState *s, unsigned int dom_id, const char *path, + const char *token, xs_impl_watch_fn fn, void *opaque) +{ + int ret = do_xs_impl_watch(s, dom_id, path, token, fn, opaque); + + if (!ret) { + /* A new watch should fire immediately */ + fn(opaque, path, token); + } + + return ret; +} + static XsWatch *free_watch(XenstoreImplState *s, XsWatch *w) { XsWatch *next = w->next; @@ -1361,3 +1375,553 @@ XenstoreImplState *xs_impl_create(unsigned int dom_id) s->root_tx = s->last_tx = 1; return s; } + + +static void clear_serialized_tx(gpointer key, gpointer value, gpointer opaque) +{ + XsNode *n = value; + + n->serialized_tx = XBT_NULL; + if (n->children) { + g_hash_table_foreach(n->children, clear_serialized_tx, NULL); + } +} + +static void clear_tx_serialized_tx(gpointer key, gpointer value, + gpointer opaque) +{ + XsTransaction *t = value; + + clear_serialized_tx(NULL, t->root, NULL); +} + +static void write_be32(GByteArray *save, uint32_t val) +{ + uint32_t be = htonl(val); + g_byte_array_append(save, (void *)&be, sizeof(be)); +} + + +struct save_state { + GByteArray *bytes; + unsigned int tx_id; +}; + +#define MODIFIED_IN_TX (1U << 0) +#define DELETED_IN_TX (1U << 1) +#define NODE_REF (1U << 2) + +static void save_node(gpointer key, gpointer value, gpointer opaque) +{ + struct save_state *ss = opaque; + XsNode *n = value; + char *name = key; + uint8_t flag = 0; + + /* Child nodes (i.e. anything but the root) have a name */ + if (name) { + g_byte_array_append(ss->bytes, key, strlen(key) + 1); + } + + /* + * If we already wrote this node, refer to the previous copy. + * There's no rename/move in XenStore, so all we need to find + * it is the tx_id of the transation in which it exists. Which + * may be the root tx. + */ + if (n->serialized_tx != XBT_NULL) { + flag = NODE_REF; + g_byte_array_append(ss->bytes, &flag, 1); + write_be32(ss->bytes, n->serialized_tx); + } else { + GList *l; + n->serialized_tx = ss->tx_id; + + if (n->modified_in_tx) { + flag |= MODIFIED_IN_TX; + } + if (n->deleted_in_tx) { + flag |= DELETED_IN_TX; + } + g_byte_array_append(ss->bytes, &flag, 1); + + if (n->content) { + write_be32(ss->bytes, n->content->len); + g_byte_array_append(ss->bytes, n->content->data, n->content->len); + } else { + write_be32(ss->bytes, 0); + } + + for (l = n->perms; l; l = l->next) { + g_byte_array_append(ss->bytes, l->data, strlen(l->data) + 1); + } + /* NUL termination after perms */ + g_byte_array_append(ss->bytes, (void *)"", 1); + + if (n->children) { + g_hash_table_foreach(n->children, save_node, ss); + } + /* NUL termination after children (child name is NUL) */ + g_byte_array_append(ss->bytes, (void *)"", 1); + } +} + +static void save_tree(struct save_state *ss, uint32_t tx_id, XsNode *root) +{ + write_be32(ss->bytes, tx_id); + ss->tx_id = tx_id; + save_node(NULL, root, ss); +} + +static void save_tx(gpointer key, gpointer value, gpointer opaque) +{ + uint32_t tx_id = GPOINTER_TO_INT(key); + struct save_state *ss = opaque; + XsTransaction *n = value; + + write_be32(ss->bytes, n->base_tx); + write_be32(ss->bytes, n->dom_id); + + save_tree(ss, tx_id, n->root); +} + +static void save_watch(gpointer key, gpointer value, gpointer opaque) +{ + struct save_state *ss = opaque; + XsWatch *w = value; + + /* We only save the *guest* watches. */ + if (w->dom_id) { + gpointer relpath = key + w->rel_prefix; + g_byte_array_append(ss->bytes, relpath, strlen(relpath) + 1); + g_byte_array_append(ss->bytes, (void *)w->token, strlen(w->token) + 1); + } +} + +GByteArray *xs_impl_serialize(XenstoreImplState *s) +{ + struct save_state ss; + + ss.bytes = g_byte_array_new(); + + /* + * node = flags [ real_node / node_ref ] + * flags = uint8_t (MODIFIED_IN_TX | DELETED_IN_TX | NODE_REF) + * node_ref = tx_id (in which the original version of this node exists) + * real_node = content perms child* NUL + * content = len data + * len = uint32_t + * data = uint8_t{len} + * perms = perm* NUL + * perm = asciiz + * child = name node + * name = asciiz + * + * tree = tx_id node + * tx_id = uint32_t + * + * transaction = base_tx_id dom_id tree + * base_tx_id = uint32_t + * dom_id = uint32_t + * + * tx_list = tree transaction* XBT_NULL + * + * watch = path token + * path = asciiz + * token = asciiz + * + * watch_list = watch* NUL + * + * xs_serialize_stream = last_tx tx_list watch_list + * last_tx = uint32_t + */ + + /* Clear serialized_tx in every node. */ + if (s->serialized) { + clear_serialized_tx(NULL, s->root, NULL); + g_hash_table_foreach(s->transactions, clear_tx_serialized_tx, NULL); + } + + s->serialized = true; + + write_be32(ss.bytes, s->last_tx); + save_tree(&ss, s->root_tx, s->root); + g_hash_table_foreach(s->transactions, save_tx, &ss); + + write_be32(ss.bytes, XBT_NULL); + + g_hash_table_foreach(s->watches, save_watch, &ss); + g_byte_array_append(ss.bytes, (void *)"", 1); + + return ss.bytes; +} + +struct unsave_state { + char path[XENSTORE_ABS_PATH_MAX + 1]; + XenstoreImplState *s; + GByteArray *bytes; + uint8_t *d; + size_t l; + bool root_walk; +}; + +static int consume_be32(struct unsave_state *us, unsigned int *val) +{ + uint32_t d; + + if (us->l < sizeof(d)) { + return -EINVAL; + } + memcpy(&d, us->d, sizeof(d)); + *val = ntohl(d); + us->d += sizeof(d); + us->l -= sizeof(d); + return 0; +} + +static int consume_string(struct unsave_state *us, char **str, size_t *len) +{ + size_t l; + + if (!us->l) { + return -EINVAL; + } + + l = strnlen((void *)us->d, us->l); + if (l == us->l) { + return -EINVAL; + } + + if (str) { + *str = (void *)us->d; + } + if (len) { + *len = l; + } + + us->d += l + 1; + us->l -= l + 1; + return 0; +} + +static XsNode *lookup_node(XsNode *n, char *path) +{ + char *slash = strchr(path, '/'); + XsNode *child; + + if (path[0] == '\0') { + return n; + } + + if (slash) { + *slash = '\0'; + } + + if (!n->children) { + return NULL; + } + child = g_hash_table_lookup(n->children, path); + if (!slash) { + return child; + } + + *slash = '/'; + if (!child) { + return NULL; + } + return lookup_node(child, slash + 1); +} + +static XsNode *lookup_tx_node(struct unsave_state *us, unsigned int tx_id) +{ + XsTransaction *t; + if (tx_id == us->s->root_tx) { + return lookup_node(us->s->root, us->path + 1); + } + + t = g_hash_table_lookup(us->s->transactions, GINT_TO_POINTER(tx_id)); + if (!t) { + return NULL; + } + g_assert(t->root); + return lookup_node(t->root, us->path + 1); +} + +static void count_child_nodes(gpointer key, gpointer value, gpointer user_data) +{ + unsigned int *nr_nodes = user_data; + XsNode *n = value; + + (*nr_nodes)++; + + if (n->children) { + g_hash_table_foreach(n->children, count_child_nodes, nr_nodes); + } +} + +static int consume_node(struct unsave_state *us, XsNode **nodep, + unsigned int *nr_nodes) +{ + XsNode *n = NULL; + uint8_t flags; + int ret; + + if (us->l < 1) { + return -EINVAL; + } + flags = us->d[0]; + us->d++; + us->l--; + + if (flags == NODE_REF) { + unsigned int tx; + + ret = consume_be32(us, &tx); + if (ret) { + return ret; + } + + n = lookup_tx_node(us, tx); + if (!n) { + return -EINVAL; + } + n->ref++; + if (n->children) { + g_hash_table_foreach(n->children, count_child_nodes, nr_nodes); + } + } else { + uint32_t datalen; + + if (flags & ~(DELETED_IN_TX | MODIFIED_IN_TX)) { + return -EINVAL; + } + n = xs_node_new(); + + if (flags & DELETED_IN_TX) { + n->deleted_in_tx = true; + } + if (flags & MODIFIED_IN_TX) { + n->modified_in_tx = true; + } + ret = consume_be32(us, &datalen); + if (ret) { + xs_node_unref(n); + return -EINVAL; + } + if (datalen) { + if (datalen > us->l) { + xs_node_unref(n); + return -EINVAL; + } + + GByteArray *node_data = g_byte_array_new(); + g_byte_array_append(node_data, us->d, datalen); + us->d += datalen; + us->l -= datalen; + n->content = node_data; + + if (us->root_walk) { + n->modified_in_tx = true; + } + } + while (1) { + char *perm = NULL; + size_t permlen = 0; + + ret = consume_string(us, &perm, &permlen); + if (ret) { + xs_node_unref(n); + return ret; + } + + if (!permlen) { + break; + } + + n->perms = g_list_append(n->perms, g_strdup(perm)); + } + + /* Now children */ + while (1) { + size_t childlen; + char *childname; + char *pathend; + XsNode *child = NULL; + + ret = consume_string(us, &childname, &childlen); + if (ret) { + xs_node_unref(n); + return ret; + } + + if (!childlen) { + break; + } + + pathend = us->path + strlen(us->path); + strncat(us->path, "/", sizeof(us->path) - 1); + strncat(us->path, childname, sizeof(us->path) - 1); + + ret = consume_node(us, &child, nr_nodes); + *pathend = '\0'; + if (ret) { + xs_node_unref(n); + return ret; + } + g_assert(child); + xs_node_add_child(n, childname, child); + } + + /* + * If the node has no data and no children we still want to fire + * a watch on it. + */ + if (us->root_walk && !n->children) { + n->modified_in_tx = true; + } + } + + if (!n->deleted_in_tx) { + (*nr_nodes)++; + } + + *nodep = n; + return 0; +} + +static int consume_tree(struct unsave_state *us, XsTransaction *t) +{ + int ret; + + ret = consume_be32(us, &t->tx_id); + if (ret) { + return ret; + } + + if (t->tx_id > us->s->last_tx) { + return -EINVAL; + } + + us->path[0] = '\0'; + + return consume_node(us, &t->root, &t->nr_nodes); +} + +int xs_impl_deserialize(XenstoreImplState *s, GByteArray *bytes, + unsigned int dom_id, xs_impl_watch_fn watch_fn, + void *watch_opaque) +{ + struct unsave_state us; + XsTransaction base_t = { 0 }; + int ret; + + us.s = s; + us.bytes = bytes; + us.d = bytes->data; + us.l = bytes->len; + + xs_impl_reset_watches(s, dom_id); + g_hash_table_remove_all(s->transactions); + + xs_node_unref(s->root); + s->root = NULL; + s->root_tx = s->last_tx = XBT_NULL; + + ret = consume_be32(&us, &s->last_tx); + if (ret) { + return ret; + } + + /* + * Consume the base tree into a transaction so that watches can be + * fired as we commit it. By setting us.root_walk we cause the nodes + * to be marked as 'modified_in_tx' as they are created, so that the + * watches are triggered on them. + */ + base_t.dom_id = dom_id; + base_t.base_tx = XBT_NULL; + us.root_walk = true; + ret = consume_tree(&us, &base_t); + if (ret) { + return ret; + } + us.root_walk = false; + + /* + * Commit the transaction now while the refcount on all nodes is 1. + * Note that we haven't yet reinstated the *guest* watches but that's + * OK because we don't want the guest to see any changes. Even any + * backend nodes which get recreated should be *precisely* as they + * were before the migration. Back ends may have been instantiated + * already, and will see the frontend magically blink into existence + * now (well, from the aio_bh which fires the watches). It's their + * responsibility to rebuild everything precisely as it was before. + */ + ret = transaction_commit(s, &base_t); + if (ret) { + return ret; + } + + while (1) { + unsigned int base_tx; + XsTransaction *t; + + ret = consume_be32(&us, &base_tx); + if (ret) { + return ret; + } + if (base_tx == XBT_NULL) { + break; + } + + t = g_new0(XsTransaction, 1); + t->base_tx = base_tx; + + ret = consume_be32(&us, &t->dom_id); + if (!ret) { + ret = consume_tree(&us, t); + } + if (ret) { + g_free(t); + return ret; + } + g_assert(t->root); + if (t->dom_id) { + s->nr_domu_transactions++; + } + g_hash_table_insert(s->transactions, GINT_TO_POINTER(t->tx_id), t); + } + + while (1) { + char *path, *token; + size_t pathlen, toklen; + + ret = consume_string(&us, &path, &pathlen); + if (ret) { + return ret; + } + if (!pathlen) { + break; + } + + ret = consume_string(&us, &token, &toklen); + if (ret) { + return ret; + } + + if (!watch_fn) { + continue; + } + + ret = do_xs_impl_watch(s, dom_id, path, token, watch_fn, watch_opaque); + if (ret) { + return ret; + } + } + + if (us.l) { + return -EINVAL; + } + + return 0; +} diff --git a/hw/i386/kvm/xenstore_impl.h b/hw/i386/kvm/xenstore_impl.h index 2f81251..bbe2391 100644 --- a/hw/i386/kvm/xenstore_impl.h +++ b/hw/i386/kvm/xenstore_impl.h @@ -61,4 +61,9 @@ int xs_impl_unwatch(XenstoreImplState *s, unsigned int dom_id, void *opaque); int xs_impl_reset_watches(XenstoreImplState *s, unsigned int dom_id); +GByteArray *xs_impl_serialize(XenstoreImplState *s); +int xs_impl_deserialize(XenstoreImplState *s, GByteArray *bytes, + unsigned int dom_id, xs_impl_watch_fn watch_fn, + void *watch_opaque); + #endif /* QEMU_XENSTORE_IMPL_H */ -- cgit v1.1 From 831b0db8abda1d837a299893c4e3027942c8ac49 Mon Sep 17 00:00:00 2001 From: Paul Durrant Date: Tue, 24 Jan 2023 09:34:06 +0000 Subject: hw/xen: Create initial XenStore nodes Signed-off-by: Paul Durrant Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant --- hw/i386/kvm/xen_xenstore.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) (limited to 'hw') diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c index 520422b..fb3648a 100644 --- a/hw/i386/kvm/xen_xenstore.c +++ b/hw/i386/kvm/xen_xenstore.c @@ -76,9 +76,39 @@ struct XenXenstoreState *xen_xenstore_singleton; static void xen_xenstore_event(void *opaque); static void fire_watch_cb(void *opaque, const char *path, const char *token); +static void G_GNUC_PRINTF (4, 5) relpath_printf(XenXenstoreState *s, + GList *perms, + const char *relpath, + const char *fmt, ...) +{ + gchar *abspath; + gchar *value; + va_list args; + GByteArray *data; + int err; + + abspath = g_strdup_printf("/local/domain/%u/%s", xen_domid, relpath); + va_start(args, fmt); + value = g_strdup_vprintf(fmt, args); + va_end(args); + + data = g_byte_array_new_take((void *)value, strlen(value)); + + err = xs_impl_write(s->impl, DOMID_QEMU, XBT_NULL, abspath, data); + assert(!err); + + g_byte_array_unref(data); + + err = xs_impl_set_perms(s->impl, DOMID_QEMU, XBT_NULL, abspath, perms); + assert(!err); + + g_free(abspath); +} + static void xen_xenstore_realize(DeviceState *dev, Error **errp) { XenXenstoreState *s = XEN_XENSTORE(dev); + GList *perms; if (xen_mode != XEN_EMULATE) { error_setg(errp, "Xen xenstore support is for Xen emulation"); @@ -102,6 +132,46 @@ static void xen_xenstore_realize(DeviceState *dev, Error **errp) xen_xenstore_event, NULL, NULL, NULL, s); s->impl = xs_impl_create(xen_domid); + + /* Populate the default nodes */ + + /* Nodes owned by 'dom0' but readable by the guest */ + perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, DOMID_QEMU)); + perms = g_list_append(perms, xs_perm_as_string(XS_PERM_READ, xen_domid)); + + relpath_printf(s, perms, "", "%s", ""); + + relpath_printf(s, perms, "domid", "%u", xen_domid); + + relpath_printf(s, perms, "control/platform-feature-xs_reset_watches", "%u", 1); + relpath_printf(s, perms, "control/platform-feature-multiprocessor-suspend", "%u", 1); + + relpath_printf(s, perms, "platform/acpi", "%u", 1); + relpath_printf(s, perms, "platform/acpi_s3", "%u", 1); + relpath_printf(s, perms, "platform/acpi_s4", "%u", 1); + relpath_printf(s, perms, "platform/acpi_laptop_slate", "%u", 0); + + g_list_free_full(perms, g_free); + + /* Nodes owned by the guest */ + perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, xen_domid)); + + relpath_printf(s, perms, "attr", "%s", ""); + + relpath_printf(s, perms, "control/shutdown", "%s", ""); + relpath_printf(s, perms, "control/feature-poweroff", "%u", 1); + relpath_printf(s, perms, "control/feature-reboot", "%u", 1); + relpath_printf(s, perms, "control/feature-suspend", "%u", 1); + relpath_printf(s, perms, "control/feature-s3", "%u", 1); + relpath_printf(s, perms, "control/feature-s4", "%u", 1); + + relpath_printf(s, perms, "data", "%s", ""); + relpath_printf(s, perms, "device", "%s", ""); + relpath_printf(s, perms, "drivers", "%s", ""); + relpath_printf(s, perms, "error", "%s", ""); + relpath_printf(s, perms, "feature", "%s", ""); + + g_list_free_full(perms, g_free); } static bool xen_xenstore_is_needed(void *opaque) -- cgit v1.1 From b6cacfea0b38300e3ea5fd6d486d5085122554eb Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 1 Jan 2023 17:54:41 +0000 Subject: hw/xen: Add evtchn operations to allow redirection to internal emulation The existing implementation calling into the real libxenevtchn moves to a new file hw/xen/xen-operations.c, and is called via a function table which in a subsequent commit will also be able to invoke the emulated event channel support. Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant --- hw/9pfs/xen-9p-backend.c | 24 +++++++-------- hw/i386/xen/xen-hvm.c | 27 +++++++++-------- hw/xen/meson.build | 1 + hw/xen/xen-bus.c | 22 +++++++------- hw/xen/xen-legacy-backend.c | 8 ++--- hw/xen/xen-operations.c | 71 +++++++++++++++++++++++++++++++++++++++++++++ hw/xen/xen_pvdev.c | 12 ++++---- 7 files changed, 120 insertions(+), 45 deletions(-) create mode 100644 hw/xen/xen-operations.c (limited to 'hw') diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c index 65c4979..864bdaf 100644 --- a/hw/9pfs/xen-9p-backend.c +++ b/hw/9pfs/xen-9p-backend.c @@ -241,7 +241,7 @@ static void xen_9pfs_push_and_notify(V9fsPDU *pdu) xen_wmb(); ring->inprogress = false; - xenevtchn_notify(ring->evtchndev, ring->local_port); + qemu_xen_evtchn_notify(ring->evtchndev, ring->local_port); qemu_bh_schedule(ring->bh); } @@ -324,8 +324,8 @@ static void xen_9pfs_evtchn_event(void *opaque) Xen9pfsRing *ring = opaque; evtchn_port_t port; - port = xenevtchn_pending(ring->evtchndev); - xenevtchn_unmask(ring->evtchndev, port); + port = qemu_xen_evtchn_pending(ring->evtchndev); + qemu_xen_evtchn_unmask(ring->evtchndev, port); qemu_bh_schedule(ring->bh); } @@ -337,10 +337,10 @@ static void xen_9pfs_disconnect(struct XenLegacyDevice *xendev) for (i = 0; i < xen_9pdev->num_rings; i++) { if (xen_9pdev->rings[i].evtchndev != NULL) { - qemu_set_fd_handler(xenevtchn_fd(xen_9pdev->rings[i].evtchndev), - NULL, NULL, NULL); - xenevtchn_unbind(xen_9pdev->rings[i].evtchndev, - xen_9pdev->rings[i].local_port); + qemu_set_fd_handler(qemu_xen_evtchn_fd(xen_9pdev->rings[i].evtchndev), + NULL, NULL, NULL); + qemu_xen_evtchn_unbind(xen_9pdev->rings[i].evtchndev, + xen_9pdev->rings[i].local_port); xen_9pdev->rings[i].evtchndev = NULL; } } @@ -447,12 +447,12 @@ static int xen_9pfs_connect(struct XenLegacyDevice *xendev) xen_9pdev->rings[i].inprogress = false; - xen_9pdev->rings[i].evtchndev = xenevtchn_open(NULL, 0); + xen_9pdev->rings[i].evtchndev = qemu_xen_evtchn_open(); if (xen_9pdev->rings[i].evtchndev == NULL) { goto out; } - qemu_set_cloexec(xenevtchn_fd(xen_9pdev->rings[i].evtchndev)); - xen_9pdev->rings[i].local_port = xenevtchn_bind_interdomain + qemu_set_cloexec(qemu_xen_evtchn_fd(xen_9pdev->rings[i].evtchndev)); + xen_9pdev->rings[i].local_port = qemu_xen_evtchn_bind_interdomain (xen_9pdev->rings[i].evtchndev, xendev->dom, xen_9pdev->rings[i].evtchn); @@ -463,8 +463,8 @@ static int xen_9pfs_connect(struct XenLegacyDevice *xendev) goto out; } xen_pv_printf(xendev, 2, "bind evtchn port %d\n", xendev->local_port); - qemu_set_fd_handler(xenevtchn_fd(xen_9pdev->rings[i].evtchndev), - xen_9pfs_evtchn_event, NULL, &xen_9pdev->rings[i]); + qemu_set_fd_handler(qemu_xen_evtchn_fd(xen_9pdev->rings[i].evtchndev), + xen_9pfs_evtchn_event, NULL, &xen_9pdev->rings[i]); } xen_9pdev->security_model = xenstore_read_be_str(xendev, "security_model"); diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c index e5a1dd1..cb1d24f 100644 --- a/hw/i386/xen/xen-hvm.c +++ b/hw/i386/xen/xen-hvm.c @@ -761,7 +761,7 @@ static ioreq_t *cpu_get_ioreq(XenIOState *state) int i; evtchn_port_t port; - port = xenevtchn_pending(state->xce_handle); + port = qemu_xen_evtchn_pending(state->xce_handle); if (port == state->bufioreq_local_port) { timer_mod(state->buffered_io_timer, BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME)); @@ -780,7 +780,7 @@ static ioreq_t *cpu_get_ioreq(XenIOState *state) } /* unmask the wanted port again */ - xenevtchn_unmask(state->xce_handle, port); + qemu_xen_evtchn_unmask(state->xce_handle, port); /* get the io packet from shared memory */ state->send_vcpu = i; @@ -1147,7 +1147,7 @@ static void handle_buffered_io(void *opaque) BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME)); } else { timer_del(state->buffered_io_timer); - xenevtchn_unmask(state->xce_handle, state->bufioreq_local_port); + qemu_xen_evtchn_unmask(state->xce_handle, state->bufioreq_local_port); } } @@ -1196,8 +1196,8 @@ static void cpu_handle_ioreq(void *opaque) } req->state = STATE_IORESP_READY; - xenevtchn_notify(state->xce_handle, - state->ioreq_local_port[state->send_vcpu]); + qemu_xen_evtchn_notify(state->xce_handle, + state->ioreq_local_port[state->send_vcpu]); } } @@ -1206,7 +1206,7 @@ static void xen_main_loop_prepare(XenIOState *state) int evtchn_fd = -1; if (state->xce_handle != NULL) { - evtchn_fd = xenevtchn_fd(state->xce_handle); + evtchn_fd = qemu_xen_evtchn_fd(state->xce_handle); } state->buffered_io_timer = timer_new_ms(QEMU_CLOCK_REALTIME, handle_buffered_io, @@ -1249,7 +1249,7 @@ static void xen_exit_notifier(Notifier *n, void *data) xenforeignmemory_unmap_resource(xen_fmem, state->fres); } - xenevtchn_close(state->xce_handle); + qemu_xen_evtchn_close(state->xce_handle); xs_daemon_close(state->xenstore); } @@ -1397,9 +1397,11 @@ void xen_hvm_init_pc(PCMachineState *pcms, MemoryRegion **ram_memory) xen_pfn_t ioreq_pfn; XenIOState *state; + setup_xen_backend_ops(); + state = g_new0(XenIOState, 1); - state->xce_handle = xenevtchn_open(NULL, 0); + state->xce_handle = qemu_xen_evtchn_open(); if (state->xce_handle == NULL) { perror("xen: event channel open"); goto err; @@ -1463,8 +1465,9 @@ void xen_hvm_init_pc(PCMachineState *pcms, MemoryRegion **ram_memory) /* FIXME: how about if we overflow the page here? */ for (i = 0; i < max_cpus; i++) { - rc = xenevtchn_bind_interdomain(state->xce_handle, xen_domid, - xen_vcpu_eport(state->shared_page, i)); + rc = qemu_xen_evtchn_bind_interdomain(state->xce_handle, xen_domid, + xen_vcpu_eport(state->shared_page, + i)); if (rc == -1) { error_report("shared evtchn %d bind error %d", i, errno); goto err; @@ -1472,8 +1475,8 @@ void xen_hvm_init_pc(PCMachineState *pcms, MemoryRegion **ram_memory) state->ioreq_local_port[i] = rc; } - rc = xenevtchn_bind_interdomain(state->xce_handle, xen_domid, - state->bufioreq_remote_port); + rc = qemu_xen_evtchn_bind_interdomain(state->xce_handle, xen_domid, + state->bufioreq_remote_port); if (rc == -1) { error_report("buffered evtchn bind error %d", errno); goto err; diff --git a/hw/xen/meson.build b/hw/xen/meson.build index ae0ace3..f195bbd 100644 --- a/hw/xen/meson.build +++ b/hw/xen/meson.build @@ -5,6 +5,7 @@ softmmu_ss.add(when: ['CONFIG_XEN', xen], if_true: files( 'xen-legacy-backend.c', 'xen_devconfig.c', 'xen_pvdev.c', + 'xen-operations.c', )) xen_specific_ss = ss.source_set() diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c index df3f6b9..d0b1ae9 100644 --- a/hw/xen/xen-bus.c +++ b/hw/xen/xen-bus.c @@ -1095,12 +1095,12 @@ static bool xen_device_poll(void *opaque) static void xen_device_event(void *opaque) { XenEventChannel *channel = opaque; - unsigned long port = xenevtchn_pending(channel->xeh); + unsigned long port = qemu_xen_evtchn_pending(channel->xeh); if (port == channel->local_port) { xen_device_poll(channel); - xenevtchn_unmask(channel->xeh, port); + qemu_xen_evtchn_unmask(channel->xeh, port); } } @@ -1115,11 +1115,11 @@ void xen_device_set_event_channel_context(XenDevice *xendev, } if (channel->ctx) - aio_set_fd_handler(channel->ctx, xenevtchn_fd(channel->xeh), true, + aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), true, NULL, NULL, NULL, NULL, NULL); channel->ctx = ctx; - aio_set_fd_handler(channel->ctx, xenevtchn_fd(channel->xeh), true, + aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), true, xen_device_event, NULL, xen_device_poll, NULL, channel); } @@ -1131,13 +1131,13 @@ XenEventChannel *xen_device_bind_event_channel(XenDevice *xendev, XenEventChannel *channel = g_new0(XenEventChannel, 1); xenevtchn_port_or_error_t local_port; - channel->xeh = xenevtchn_open(NULL, 0); + channel->xeh = qemu_xen_evtchn_open(); if (!channel->xeh) { error_setg_errno(errp, errno, "failed xenevtchn_open"); goto fail; } - local_port = xenevtchn_bind_interdomain(channel->xeh, + local_port = qemu_xen_evtchn_bind_interdomain(channel->xeh, xendev->frontend_id, port); if (local_port < 0) { @@ -1160,7 +1160,7 @@ XenEventChannel *xen_device_bind_event_channel(XenDevice *xendev, fail: if (channel->xeh) { - xenevtchn_close(channel->xeh); + qemu_xen_evtchn_close(channel->xeh); } g_free(channel); @@ -1177,7 +1177,7 @@ void xen_device_notify_event_channel(XenDevice *xendev, return; } - if (xenevtchn_notify(channel->xeh, channel->local_port) < 0) { + if (qemu_xen_evtchn_notify(channel->xeh, channel->local_port) < 0) { error_setg_errno(errp, errno, "xenevtchn_notify failed"); } } @@ -1193,14 +1193,14 @@ void xen_device_unbind_event_channel(XenDevice *xendev, QLIST_REMOVE(channel, list); - aio_set_fd_handler(channel->ctx, xenevtchn_fd(channel->xeh), true, + aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), true, NULL, NULL, NULL, NULL, NULL); - if (xenevtchn_unbind(channel->xeh, channel->local_port) < 0) { + if (qemu_xen_evtchn_unbind(channel->xeh, channel->local_port) < 0) { error_setg_errno(errp, errno, "xenevtchn_unbind failed"); } - xenevtchn_close(channel->xeh); + qemu_xen_evtchn_close(channel->xeh); g_free(channel); } diff --git a/hw/xen/xen-legacy-backend.c b/hw/xen/xen-legacy-backend.c index afba71f..9ce3dc2 100644 --- a/hw/xen/xen-legacy-backend.c +++ b/hw/xen/xen-legacy-backend.c @@ -294,13 +294,13 @@ static struct XenLegacyDevice *xen_be_get_xendev(const char *type, int dom, xendev->debug = debug; xendev->local_port = -1; - xendev->evtchndev = xenevtchn_open(NULL, 0); + xendev->evtchndev = qemu_xen_evtchn_open(); if (xendev->evtchndev == NULL) { xen_pv_printf(NULL, 0, "can't open evtchn device\n"); qdev_unplug(DEVICE(xendev), NULL); return NULL; } - qemu_set_cloexec(xenevtchn_fd(xendev->evtchndev)); + qemu_set_cloexec(qemu_xen_evtchn_fd(xendev->evtchndev)); xen_pv_insert_xendev(xendev); @@ -751,14 +751,14 @@ int xen_be_bind_evtchn(struct XenLegacyDevice *xendev) if (xendev->local_port != -1) { return 0; } - xendev->local_port = xenevtchn_bind_interdomain + xendev->local_port = qemu_xen_evtchn_bind_interdomain (xendev->evtchndev, xendev->dom, xendev->remote_port); if (xendev->local_port == -1) { xen_pv_printf(xendev, 0, "xenevtchn_bind_interdomain failed\n"); return -1; } xen_pv_printf(xendev, 2, "bind evtchn port %d\n", xendev->local_port); - qemu_set_fd_handler(xenevtchn_fd(xendev->evtchndev), + qemu_set_fd_handler(qemu_xen_evtchn_fd(xendev->evtchndev), xen_pv_evtchn_event, NULL, xendev); return 0; } diff --git a/hw/xen/xen-operations.c b/hw/xen/xen-operations.c new file mode 100644 index 0000000..1a959d8 --- /dev/null +++ b/hw/xen/xen-operations.c @@ -0,0 +1,71 @@ +/* + * QEMU Xen backend support: Operations for true Xen + * + * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Authors: David Woodhouse + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" + +#include "hw/xen/xen_backend_ops.h" +#include "hw/xen/xen_common.h" + +/* + * If we have new enough libxenctrl then we do not want/need these compat + * interfaces, despite what the user supplied cflags might say. They + * must be undefined before including xenctrl.h + */ +#undef XC_WANT_COMPAT_EVTCHN_API + +#include + +/* + * We don't support Xen prior to 4.2.0. + */ + +/* Xen 4.2 through 4.6 */ +#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 40701 + +typedef xc_evtchn xenevtchn_handle; +typedef evtchn_port_or_error_t xenevtchn_port_or_error_t; + +#define xenevtchn_open(l, f) xc_evtchn_open(l, f); +#define xenevtchn_close(h) xc_evtchn_close(h) +#define xenevtchn_fd(h) xc_evtchn_fd(h) +#define xenevtchn_pending(h) xc_evtchn_pending(h) +#define xenevtchn_notify(h, p) xc_evtchn_notify(h, p) +#define xenevtchn_bind_interdomain(h, d, p) xc_evtchn_bind_interdomain(h, d, p) +#define xenevtchn_unmask(h, p) xc_evtchn_unmask(h, p) +#define xenevtchn_unbind(h, p) xc_evtchn_unbind(h, p) + +#else /* CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40701 */ + +#include + +#endif + +static xenevtchn_handle *libxenevtchn_backend_open(void) +{ + return xenevtchn_open(NULL, 0); +} + +struct evtchn_backend_ops libxenevtchn_backend_ops = { + .open = libxenevtchn_backend_open, + .close = xenevtchn_close, + .bind_interdomain = xenevtchn_bind_interdomain, + .unbind = xenevtchn_unbind, + .get_fd = xenevtchn_fd, + .notify = xenevtchn_notify, + .unmask = xenevtchn_unmask, + .pending = xenevtchn_pending, +}; + +void setup_xen_backend_ops(void) +{ + xen_evtchn_ops = &libxenevtchn_backend_ops; +} diff --git a/hw/xen/xen_pvdev.c b/hw/xen/xen_pvdev.c index 1a5177b..86a2c8e 100644 --- a/hw/xen/xen_pvdev.c +++ b/hw/xen/xen_pvdev.c @@ -238,14 +238,14 @@ void xen_pv_evtchn_event(void *opaque) struct XenLegacyDevice *xendev = opaque; evtchn_port_t port; - port = xenevtchn_pending(xendev->evtchndev); + port = qemu_xen_evtchn_pending(xendev->evtchndev); if (port != xendev->local_port) { xen_pv_printf(xendev, 0, "xenevtchn_pending returned %d (expected %d)\n", port, xendev->local_port); return; } - xenevtchn_unmask(xendev->evtchndev, port); + qemu_xen_evtchn_unmask(xendev->evtchndev, port); if (xendev->ops->event) { xendev->ops->event(xendev); @@ -257,15 +257,15 @@ void xen_pv_unbind_evtchn(struct XenLegacyDevice *xendev) if (xendev->local_port == -1) { return; } - qemu_set_fd_handler(xenevtchn_fd(xendev->evtchndev), NULL, NULL, NULL); - xenevtchn_unbind(xendev->evtchndev, xendev->local_port); + qemu_set_fd_handler(qemu_xen_evtchn_fd(xendev->evtchndev), NULL, NULL, NULL); + qemu_xen_evtchn_unbind(xendev->evtchndev, xendev->local_port); xen_pv_printf(xendev, 2, "unbind evtchn port %d\n", xendev->local_port); xendev->local_port = -1; } int xen_pv_send_notify(struct XenLegacyDevice *xendev) { - return xenevtchn_notify(xendev->evtchndev, xendev->local_port); + return qemu_xen_evtchn_notify(xendev->evtchndev, xendev->local_port); } /* ------------------------------------------------------------- */ @@ -306,7 +306,7 @@ void xen_pv_del_xendev(struct XenLegacyDevice *xendev) } if (xendev->evtchndev != NULL) { - xenevtchn_close(xendev->evtchndev); + qemu_xen_evtchn_close(xendev->evtchndev); } if (xendev->gnttabdev != NULL) { xengnttab_close(xendev->gnttabdev); -- cgit v1.1 From c412ba47b2ec4c75e1ef84f39f898cfdec0630ad Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 1 Jan 2023 21:31:37 +0000 Subject: hw/xen: Add gnttab operations to allow redirection to internal emulation Move the existing code using libxengnttab to xen-operations.c and allow the operations to be redirected so that we can add emulation of grant table mapping for backend drivers. In emulation, mapping more than one grant ref to be virtually contiguous would be fairly difficult. The best way to do it might be to make the ram_block mappings actually backed by a file (shmem or a deleted file, perhaps) so that we can have multiple *shared* mappings of it. But that would be fairly intrusive. Making the backend drivers cope with page *lists* instead of expecting the mapping to be contiguous is also non-trivial, since some structures would actually *cross* page boundaries (e.g. the 32-bit blkif responses which are 12 bytes). So for now, we'll support only single-page mappings in emulation. Add a XEN_GNTTAB_OP_FEATURE_MAP_MULTIPLE flag to indicate that the native Xen implementation *does* support multi-page maps, and a helper function to query it. Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant --- hw/xen/xen-bus.c | 112 +++---------------------------- hw/xen/xen-legacy-backend.c | 125 ++++------------------------------- hw/xen/xen-operations.c | 157 ++++++++++++++++++++++++++++++++++++++++++++ hw/xen/xen_pvdev.c | 2 +- 4 files changed, 177 insertions(+), 219 deletions(-) (limited to 'hw') diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c index d0b1ae9..b247e86 100644 --- a/hw/xen/xen-bus.c +++ b/hw/xen/xen-bus.c @@ -947,7 +947,7 @@ static void xen_device_frontend_destroy(XenDevice *xendev) void xen_device_set_max_grant_refs(XenDevice *xendev, unsigned int nr_refs, Error **errp) { - if (xengnttab_set_max_grants(xendev->xgth, nr_refs)) { + if (qemu_xen_gnttab_set_max_grants(xendev->xgth, nr_refs)) { error_setg_errno(errp, errno, "xengnttab_set_max_grants failed"); } } @@ -956,9 +956,8 @@ void *xen_device_map_grant_refs(XenDevice *xendev, uint32_t *refs, unsigned int nr_refs, int prot, Error **errp) { - void *map = xengnttab_map_domain_grant_refs(xendev->xgth, nr_refs, - xendev->frontend_id, refs, - prot); + void *map = qemu_xen_gnttab_map_refs(xendev->xgth, nr_refs, + xendev->frontend_id, refs, prot); if (!map) { error_setg_errno(errp, errno, @@ -971,109 +970,17 @@ void *xen_device_map_grant_refs(XenDevice *xendev, uint32_t *refs, void xen_device_unmap_grant_refs(XenDevice *xendev, void *map, unsigned int nr_refs, Error **errp) { - if (xengnttab_unmap(xendev->xgth, map, nr_refs)) { + if (qemu_xen_gnttab_unmap(xendev->xgth, map, nr_refs)) { error_setg_errno(errp, errno, "xengnttab_unmap failed"); } } -static void compat_copy_grant_refs(XenDevice *xendev, bool to_domain, - XenDeviceGrantCopySegment segs[], - unsigned int nr_segs, Error **errp) -{ - uint32_t *refs = g_new(uint32_t, nr_segs); - int prot = to_domain ? PROT_WRITE : PROT_READ; - void *map; - unsigned int i; - - for (i = 0; i < nr_segs; i++) { - XenDeviceGrantCopySegment *seg = &segs[i]; - - refs[i] = to_domain ? seg->dest.foreign.ref : - seg->source.foreign.ref; - } - - map = xengnttab_map_domain_grant_refs(xendev->xgth, nr_segs, - xendev->frontend_id, refs, - prot); - if (!map) { - error_setg_errno(errp, errno, - "xengnttab_map_domain_grant_refs failed"); - goto done; - } - - for (i = 0; i < nr_segs; i++) { - XenDeviceGrantCopySegment *seg = &segs[i]; - void *page = map + (i * XC_PAGE_SIZE); - - if (to_domain) { - memcpy(page + seg->dest.foreign.offset, seg->source.virt, - seg->len); - } else { - memcpy(seg->dest.virt, page + seg->source.foreign.offset, - seg->len); - } - } - - if (xengnttab_unmap(xendev->xgth, map, nr_segs)) { - error_setg_errno(errp, errno, "xengnttab_unmap failed"); - } - -done: - g_free(refs); -} - void xen_device_copy_grant_refs(XenDevice *xendev, bool to_domain, XenDeviceGrantCopySegment segs[], unsigned int nr_segs, Error **errp) { - xengnttab_grant_copy_segment_t *xengnttab_segs; - unsigned int i; - - if (!xendev->feature_grant_copy) { - compat_copy_grant_refs(xendev, to_domain, segs, nr_segs, errp); - return; - } - - xengnttab_segs = g_new0(xengnttab_grant_copy_segment_t, nr_segs); - - for (i = 0; i < nr_segs; i++) { - XenDeviceGrantCopySegment *seg = &segs[i]; - xengnttab_grant_copy_segment_t *xengnttab_seg = &xengnttab_segs[i]; - - if (to_domain) { - xengnttab_seg->flags = GNTCOPY_dest_gref; - xengnttab_seg->dest.foreign.domid = xendev->frontend_id; - xengnttab_seg->dest.foreign.ref = seg->dest.foreign.ref; - xengnttab_seg->dest.foreign.offset = seg->dest.foreign.offset; - xengnttab_seg->source.virt = seg->source.virt; - } else { - xengnttab_seg->flags = GNTCOPY_source_gref; - xengnttab_seg->source.foreign.domid = xendev->frontend_id; - xengnttab_seg->source.foreign.ref = seg->source.foreign.ref; - xengnttab_seg->source.foreign.offset = - seg->source.foreign.offset; - xengnttab_seg->dest.virt = seg->dest.virt; - } - - xengnttab_seg->len = seg->len; - } - - if (xengnttab_grant_copy(xendev->xgth, nr_segs, xengnttab_segs)) { - error_setg_errno(errp, errno, "xengnttab_grant_copy failed"); - goto done; - } - - for (i = 0; i < nr_segs; i++) { - xengnttab_grant_copy_segment_t *xengnttab_seg = &xengnttab_segs[i]; - - if (xengnttab_seg->status != GNTST_okay) { - error_setg(errp, "xengnttab_grant_copy seg[%u] failed", i); - break; - } - } - -done: - g_free(xengnttab_segs); + qemu_xen_gnttab_grant_copy(xendev->xgth, to_domain, xendev->frontend_id, + (XenGrantCopySegment *)segs, nr_segs, errp); } struct XenEventChannel { @@ -1235,7 +1142,7 @@ static void xen_device_unrealize(DeviceState *dev) xen_device_backend_destroy(xendev); if (xendev->xgth) { - xengnttab_close(xendev->xgth); + qemu_xen_gnttab_close(xendev->xgth); xendev->xgth = NULL; } @@ -1298,15 +1205,12 @@ static void xen_device_realize(DeviceState *dev, Error **errp) xendev->watch_list = watch_list_create(xendev->xsh); - xendev->xgth = xengnttab_open(NULL, 0); + xendev->xgth = qemu_xen_gnttab_open(); if (!xendev->xgth) { error_setg_errno(errp, errno, "failed xengnttab_open"); goto unrealize; } - xendev->feature_grant_copy = - (xengnttab_grant_copy(xendev->xgth, 0, NULL) == 0); - xen_device_backend_create(xendev, errp); if (*errp) { goto unrealize; diff --git a/hw/xen/xen-legacy-backend.c b/hw/xen/xen-legacy-backend.c index 9ce3dc2..1e9a28f 100644 --- a/hw/xen/xen-legacy-backend.c +++ b/hw/xen/xen-legacy-backend.c @@ -43,7 +43,6 @@ struct xs_handle *xenstore; const char *xen_protocol; /* private */ -static bool xen_feature_grant_copy; static int debug; int xenstore_write_be_str(struct XenLegacyDevice *xendev, const char *node, @@ -113,7 +112,7 @@ void xen_be_set_max_grant_refs(struct XenLegacyDevice *xendev, { assert(xendev->ops->flags & DEVOPS_FLAG_NEED_GNTDEV); - if (xengnttab_set_max_grants(xendev->gnttabdev, nr_refs)) { + if (qemu_xen_gnttab_set_max_grants(xendev->gnttabdev, nr_refs)) { xen_pv_printf(xendev, 0, "xengnttab_set_max_grants failed: %s\n", strerror(errno)); } @@ -126,8 +125,8 @@ void *xen_be_map_grant_refs(struct XenLegacyDevice *xendev, uint32_t *refs, assert(xendev->ops->flags & DEVOPS_FLAG_NEED_GNTDEV); - ptr = xengnttab_map_domain_grant_refs(xendev->gnttabdev, nr_refs, - xen_domid, refs, prot); + ptr = qemu_xen_gnttab_map_refs(xendev->gnttabdev, nr_refs, xen_domid, refs, + prot); if (!ptr) { xen_pv_printf(xendev, 0, "xengnttab_map_domain_grant_refs failed: %s\n", @@ -142,119 +141,27 @@ void xen_be_unmap_grant_refs(struct XenLegacyDevice *xendev, void *ptr, { assert(xendev->ops->flags & DEVOPS_FLAG_NEED_GNTDEV); - if (xengnttab_unmap(xendev->gnttabdev, ptr, nr_refs)) { + if (qemu_xen_gnttab_unmap(xendev->gnttabdev, ptr, nr_refs)) { xen_pv_printf(xendev, 0, "xengnttab_unmap failed: %s\n", strerror(errno)); } } -static int compat_copy_grant_refs(struct XenLegacyDevice *xendev, - bool to_domain, - XenGrantCopySegment segs[], - unsigned int nr_segs) -{ - uint32_t *refs = g_new(uint32_t, nr_segs); - int prot = to_domain ? PROT_WRITE : PROT_READ; - void *pages; - unsigned int i; - - for (i = 0; i < nr_segs; i++) { - XenGrantCopySegment *seg = &segs[i]; - - refs[i] = to_domain ? - seg->dest.foreign.ref : seg->source.foreign.ref; - } - - pages = xengnttab_map_domain_grant_refs(xendev->gnttabdev, nr_segs, - xen_domid, refs, prot); - if (!pages) { - xen_pv_printf(xendev, 0, - "xengnttab_map_domain_grant_refs failed: %s\n", - strerror(errno)); - g_free(refs); - return -1; - } - - for (i = 0; i < nr_segs; i++) { - XenGrantCopySegment *seg = &segs[i]; - void *page = pages + (i * XC_PAGE_SIZE); - - if (to_domain) { - memcpy(page + seg->dest.foreign.offset, seg->source.virt, - seg->len); - } else { - memcpy(seg->dest.virt, page + seg->source.foreign.offset, - seg->len); - } - } - - if (xengnttab_unmap(xendev->gnttabdev, pages, nr_segs)) { - xen_pv_printf(xendev, 0, "xengnttab_unmap failed: %s\n", - strerror(errno)); - } - - g_free(refs); - return 0; -} - int xen_be_copy_grant_refs(struct XenLegacyDevice *xendev, bool to_domain, XenGrantCopySegment segs[], unsigned int nr_segs) { - xengnttab_grant_copy_segment_t *xengnttab_segs; - unsigned int i; int rc; assert(xendev->ops->flags & DEVOPS_FLAG_NEED_GNTDEV); - if (!xen_feature_grant_copy) { - return compat_copy_grant_refs(xendev, to_domain, segs, nr_segs); - } - - xengnttab_segs = g_new0(xengnttab_grant_copy_segment_t, nr_segs); - - for (i = 0; i < nr_segs; i++) { - XenGrantCopySegment *seg = &segs[i]; - xengnttab_grant_copy_segment_t *xengnttab_seg = &xengnttab_segs[i]; - - if (to_domain) { - xengnttab_seg->flags = GNTCOPY_dest_gref; - xengnttab_seg->dest.foreign.domid = xen_domid; - xengnttab_seg->dest.foreign.ref = seg->dest.foreign.ref; - xengnttab_seg->dest.foreign.offset = seg->dest.foreign.offset; - xengnttab_seg->source.virt = seg->source.virt; - } else { - xengnttab_seg->flags = GNTCOPY_source_gref; - xengnttab_seg->source.foreign.domid = xen_domid; - xengnttab_seg->source.foreign.ref = seg->source.foreign.ref; - xengnttab_seg->source.foreign.offset = - seg->source.foreign.offset; - xengnttab_seg->dest.virt = seg->dest.virt; - } - - xengnttab_seg->len = seg->len; - } - - rc = xengnttab_grant_copy(xendev->gnttabdev, nr_segs, xengnttab_segs); - + rc = qemu_xen_gnttab_grant_copy(xendev->gnttabdev, to_domain, xen_domid, + segs, nr_segs, NULL); if (rc) { - xen_pv_printf(xendev, 0, "xengnttab_copy failed: %s\n", - strerror(errno)); - } - - for (i = 0; i < nr_segs; i++) { - xengnttab_grant_copy_segment_t *xengnttab_seg = - &xengnttab_segs[i]; - - if (xengnttab_seg->status != GNTST_okay) { - xen_pv_printf(xendev, 0, "segment[%u] status: %d\n", i, - xengnttab_seg->status); - rc = -1; - } + xen_pv_printf(xendev, 0, "xengnttab_grant_copy failed: %s\n", + strerror(-rc)); } - - g_free(xengnttab_segs); return rc; } @@ -466,7 +373,7 @@ static int xen_be_try_initialise(struct XenLegacyDevice *xendev) } if (xendev->ops->flags & DEVOPS_FLAG_NEED_GNTDEV) { - xendev->gnttabdev = xengnttab_open(NULL, 0); + xendev->gnttabdev = qemu_xen_gnttab_open(); if (xendev->gnttabdev == NULL) { xen_pv_printf(NULL, 0, "can't open gnttab device\n"); return -1; @@ -524,7 +431,7 @@ static void xen_be_disconnect(struct XenLegacyDevice *xendev, xendev->ops->disconnect(xendev); } if (xendev->gnttabdev) { - xengnttab_close(xendev->gnttabdev); + qemu_xen_gnttab_close(xendev->gnttabdev); xendev->gnttabdev = NULL; } if (xendev->be_state != state) { @@ -687,8 +594,6 @@ static void xen_set_dynamic_sysbus(void) void xen_be_init(void) { - xengnttab_handle *gnttabdev; - xenstore = xs_daemon_open(); if (!xenstore) { xen_pv_printf(NULL, 0, "can't connect to xenstored\n"); @@ -697,19 +602,11 @@ void xen_be_init(void) qemu_set_fd_handler(xs_fileno(xenstore), xenstore_update, NULL, NULL); - if (xen_xc == NULL || xen_fmem == NULL) { + if (xen_evtchn_ops == NULL || xen_gnttab_ops == NULL) { xen_pv_printf(NULL, 0, "Xen operations not set up\n"); exit(1); } - gnttabdev = xengnttab_open(NULL, 0); - if (gnttabdev != NULL) { - if (xengnttab_grant_copy(gnttabdev, 0, NULL) == 0) { - xen_feature_grant_copy = true; - } - xengnttab_close(gnttabdev); - } - xen_sysdev = qdev_new(TYPE_XENSYSDEV); sysbus_realize_and_unref(SYS_BUS_DEVICE(xen_sysdev), &error_fatal); xen_sysbus = qbus_new(TYPE_XENSYSBUS, xen_sysdev, "xen-sysbus"); diff --git a/hw/xen/xen-operations.c b/hw/xen/xen-operations.c index 1a959d8..2e74a28 100644 --- a/hw/xen/xen-operations.c +++ b/hw/xen/xen-operations.c @@ -21,6 +21,7 @@ * must be undefined before including xenctrl.h */ #undef XC_WANT_COMPAT_EVTCHN_API +#undef XC_WANT_COMPAT_GNTTAB_API #include @@ -43,12 +44,141 @@ typedef evtchn_port_or_error_t xenevtchn_port_or_error_t; #define xenevtchn_unmask(h, p) xc_evtchn_unmask(h, p) #define xenevtchn_unbind(h, p) xc_evtchn_unbind(h, p) +typedef xc_gnttab xengnttab_handle; + +#define xengnttab_open(l, f) xc_gnttab_open(l, f) +#define xengnttab_close(h) xc_gnttab_close(h) +#define xengnttab_set_max_grants(h, n) xc_gnttab_set_max_grants(h, n) +#define xengnttab_map_grant_ref(h, d, r, p) xc_gnttab_map_grant_ref(h, d, r, p) +#define xengnttab_unmap(h, a, n) xc_gnttab_munmap(h, a, n) +#define xengnttab_map_grant_refs(h, c, d, r, p) \ + xc_gnttab_map_grant_refs(h, c, d, r, p) +#define xengnttab_map_domain_grant_refs(h, c, d, r, p) \ + xc_gnttab_map_domain_grant_refs(h, c, d, r, p) + #else /* CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40701 */ #include +#include #endif +/* Xen before 4.8 */ + +static int libxengnttab_fallback_grant_copy(xengnttab_handle *xgt, + bool to_domain, uint32_t domid, + XenGrantCopySegment segs[], + unsigned int nr_segs, Error **errp) +{ + uint32_t *refs = g_new(uint32_t, nr_segs); + int prot = to_domain ? PROT_WRITE : PROT_READ; + void *map; + unsigned int i; + int rc = 0; + + for (i = 0; i < nr_segs; i++) { + XenGrantCopySegment *seg = &segs[i]; + + refs[i] = to_domain ? seg->dest.foreign.ref : + seg->source.foreign.ref; + } + map = xengnttab_map_domain_grant_refs(xgt, nr_segs, domid, refs, prot); + if (!map) { + if (errp) { + error_setg_errno(errp, errno, + "xengnttab_map_domain_grant_refs failed"); + } + rc = -errno; + goto done; + } + + for (i = 0; i < nr_segs; i++) { + XenGrantCopySegment *seg = &segs[i]; + void *page = map + (i * XEN_PAGE_SIZE); + + if (to_domain) { + memcpy(page + seg->dest.foreign.offset, seg->source.virt, + seg->len); + } else { + memcpy(seg->dest.virt, page + seg->source.foreign.offset, + seg->len); + } + } + + if (xengnttab_unmap(xgt, map, nr_segs)) { + if (errp) { + error_setg_errno(errp, errno, "xengnttab_unmap failed"); + } + rc = -errno; + } + +done: + g_free(refs); + return rc; +} + +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40800 + +static int libxengnttab_backend_grant_copy(xengnttab_handle *xgt, + bool to_domain, uint32_t domid, + XenGrantCopySegment *segs, + uint32_t nr_segs, Error **errp) +{ + xengnttab_grant_copy_segment_t *xengnttab_segs; + unsigned int i; + int rc; + + xengnttab_segs = g_new0(xengnttab_grant_copy_segment_t, nr_segs); + + for (i = 0; i < nr_segs; i++) { + XenGrantCopySegment *seg = &segs[i]; + xengnttab_grant_copy_segment_t *xengnttab_seg = &xengnttab_segs[i]; + + if (to_domain) { + xengnttab_seg->flags = GNTCOPY_dest_gref; + xengnttab_seg->dest.foreign.domid = domid; + xengnttab_seg->dest.foreign.ref = seg->dest.foreign.ref; + xengnttab_seg->dest.foreign.offset = seg->dest.foreign.offset; + xengnttab_seg->source.virt = seg->source.virt; + } else { + xengnttab_seg->flags = GNTCOPY_source_gref; + xengnttab_seg->source.foreign.domid = domid; + xengnttab_seg->source.foreign.ref = seg->source.foreign.ref; + xengnttab_seg->source.foreign.offset = + seg->source.foreign.offset; + xengnttab_seg->dest.virt = seg->dest.virt; + } + + xengnttab_seg->len = seg->len; + } + + if (xengnttab_grant_copy(xgt, nr_segs, xengnttab_segs)) { + if (errp) { + error_setg_errno(errp, errno, "xengnttab_grant_copy failed"); + } + rc = -errno; + goto done; + } + + rc = 0; + for (i = 0; i < nr_segs; i++) { + xengnttab_grant_copy_segment_t *xengnttab_seg = &xengnttab_segs[i]; + + if (xengnttab_seg->status != GNTST_okay) { + if (errp) { + error_setg(errp, "xengnttab_grant_copy seg[%u] failed", i); + } + rc = -EIO; + break; + } + } + +done: + g_free(xengnttab_segs); + return rc; +} +#endif + static xenevtchn_handle *libxenevtchn_backend_open(void) { return xenevtchn_open(NULL, 0); @@ -65,7 +195,34 @@ struct evtchn_backend_ops libxenevtchn_backend_ops = { .pending = xenevtchn_pending, }; +static xengnttab_handle *libxengnttab_backend_open(void) +{ + return xengnttab_open(NULL, 0); +} + + +static struct gnttab_backend_ops libxengnttab_backend_ops = { + .features = XEN_GNTTAB_OP_FEATURE_MAP_MULTIPLE, + .open = libxengnttab_backend_open, + .close = xengnttab_close, + .grant_copy = libxengnttab_fallback_grant_copy, + .set_max_grants = xengnttab_set_max_grants, + .map_refs = xengnttab_map_domain_grant_refs, + .unmap = xengnttab_unmap, +}; + void setup_xen_backend_ops(void) { +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40800 + xengnttab_handle *xgt = xengnttab_open(NULL, 0); + + if (xgt) { + if (xengnttab_grant_copy(xgt, 0, NULL) == 0) { + libxengnttab_backend_ops.grant_copy = libxengnttab_backend_grant_copy; + } + xengnttab_close(xgt); + } +#endif xen_evtchn_ops = &libxenevtchn_backend_ops; + xen_gnttab_ops = &libxengnttab_backend_ops; } diff --git a/hw/xen/xen_pvdev.c b/hw/xen/xen_pvdev.c index 86a2c8e..d8582cc 100644 --- a/hw/xen/xen_pvdev.c +++ b/hw/xen/xen_pvdev.c @@ -309,7 +309,7 @@ void xen_pv_del_xendev(struct XenLegacyDevice *xendev) qemu_xen_evtchn_close(xendev->evtchndev); } if (xendev->gnttabdev != NULL) { - xengnttab_close(xendev->gnttabdev); + qemu_xen_gnttab_close(xendev->gnttabdev); } QTAILQ_REMOVE(&xendevs, xendev, next); -- cgit v1.1 From f80fad16afa5aebb8cce919e87f6c58fa03d16e6 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 10 Jan 2023 00:03:49 +0000 Subject: hw/xen: Pass grant ref to gnttab unmap operation The previous commit introduced redirectable gnttab operations fairly much like-for-like, with the exception of the extra arguments to the ->open() call which were always NULL/0 anyway. This *changes* the arguments to the ->unmap() operation to include the original ref# that was mapped. Under real Xen it isn't necessary; all we need to do from QEMU is munmap(), then the kernel will release the grant, and Xen does the tracking/refcounting for the guest. When we have emulated grant tables though, we need to do all that for ourselves. So let's have the back ends keep track of what they mapped and pass it in to the ->unmap() method for us. Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant --- hw/9pfs/xen-9p-backend.c | 7 ++++--- hw/block/dataplane/xen-block.c | 1 + hw/char/xen_console.c | 2 +- hw/net/xen_nic.c | 13 ++++++++----- hw/usb/xen-usb.c | 21 ++++++++++++++++----- hw/xen/xen-bus.c | 4 ++-- hw/xen/xen-legacy-backend.c | 4 ++-- hw/xen/xen-operations.c | 9 ++++++++- 8 files changed, 42 insertions(+), 19 deletions(-) (limited to 'hw') diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c index 864bdaf..d8bb0e8 100644 --- a/hw/9pfs/xen-9p-backend.c +++ b/hw/9pfs/xen-9p-backend.c @@ -359,12 +359,13 @@ static int xen_9pfs_free(struct XenLegacyDevice *xendev) if (xen_9pdev->rings[i].data != NULL) { xen_be_unmap_grant_refs(&xen_9pdev->xendev, xen_9pdev->rings[i].data, + xen_9pdev->rings[i].intf->ref, (1 << xen_9pdev->rings[i].ring_order)); } if (xen_9pdev->rings[i].intf != NULL) { - xen_be_unmap_grant_refs(&xen_9pdev->xendev, - xen_9pdev->rings[i].intf, - 1); + xen_be_unmap_grant_ref(&xen_9pdev->xendev, + xen_9pdev->rings[i].intf, + xen_9pdev->rings[i].ref); } if (xen_9pdev->rings[i].bh != NULL) { qemu_bh_delete(xen_9pdev->rings[i].bh); diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c index 2785b9e..e55b713 100644 --- a/hw/block/dataplane/xen-block.c +++ b/hw/block/dataplane/xen-block.c @@ -705,6 +705,7 @@ void xen_block_dataplane_stop(XenBlockDataPlane *dataplane) Error *local_err = NULL; xen_device_unmap_grant_refs(xendev, dataplane->sring, + dataplane->ring_ref, dataplane->nr_ring_ref, &local_err); dataplane->sring = NULL; diff --git a/hw/char/xen_console.c b/hw/char/xen_console.c index 63153df..19ad6c9 100644 --- a/hw/char/xen_console.c +++ b/hw/char/xen_console.c @@ -271,7 +271,7 @@ static void con_disconnect(struct XenLegacyDevice *xendev) if (!xendev->dev) { xenforeignmemory_unmap(xen_fmem, con->sring, 1); } else { - xen_be_unmap_grant_ref(xendev, con->sring); + xen_be_unmap_grant_ref(xendev, con->sring, con->ring_ref); } con->sring = NULL; } diff --git a/hw/net/xen_nic.c b/hw/net/xen_nic.c index 7d92c2d..166d037 100644 --- a/hw/net/xen_nic.c +++ b/hw/net/xen_nic.c @@ -181,7 +181,7 @@ static void net_tx_packets(struct XenNetDev *netdev) qemu_send_packet(qemu_get_queue(netdev->nic), page + txreq.offset, txreq.size); } - xen_be_unmap_grant_ref(&netdev->xendev, page); + xen_be_unmap_grant_ref(&netdev->xendev, page, txreq.gref); net_tx_response(netdev, &txreq, NETIF_RSP_OKAY); } if (!netdev->tx_work) { @@ -261,7 +261,7 @@ static ssize_t net_rx_packet(NetClientState *nc, const uint8_t *buf, size_t size return -1; } memcpy(page + NET_IP_ALIGN, buf, size); - xen_be_unmap_grant_ref(&netdev->xendev, page); + xen_be_unmap_grant_ref(&netdev->xendev, page, rxreq.gref); net_rx_response(netdev, &rxreq, NETIF_RSP_OKAY, NET_IP_ALIGN, size, 0); return size; @@ -343,7 +343,8 @@ static int net_connect(struct XenLegacyDevice *xendev) netdev->rx_ring_ref, PROT_READ | PROT_WRITE); if (!netdev->rxs) { - xen_be_unmap_grant_ref(&netdev->xendev, netdev->txs); + xen_be_unmap_grant_ref(&netdev->xendev, netdev->txs, + netdev->tx_ring_ref); netdev->txs = NULL; return -1; } @@ -368,11 +369,13 @@ static void net_disconnect(struct XenLegacyDevice *xendev) xen_pv_unbind_evtchn(&netdev->xendev); if (netdev->txs) { - xen_be_unmap_grant_ref(&netdev->xendev, netdev->txs); + xen_be_unmap_grant_ref(&netdev->xendev, netdev->txs, + netdev->tx_ring_ref); netdev->txs = NULL; } if (netdev->rxs) { - xen_be_unmap_grant_ref(&netdev->xendev, netdev->rxs); + xen_be_unmap_grant_ref(&netdev->xendev, netdev->rxs, + netdev->rx_ring_ref); netdev->rxs = NULL; } } diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c index 0f7369e..a770a64 100644 --- a/hw/usb/xen-usb.c +++ b/hw/usb/xen-usb.c @@ -101,6 +101,8 @@ struct usbback_hotplug { struct usbback_info { struct XenLegacyDevice xendev; /* must be first */ USBBus bus; + uint32_t urb_ring_ref; + uint32_t conn_ring_ref; void *urb_sring; void *conn_sring; struct usbif_urb_back_ring urb_ring; @@ -277,10 +279,11 @@ static int usbback_init_packet(struct usbback_req *usbback_req) static void usbback_do_response(struct usbback_req *usbback_req, int32_t status, int32_t actual_length, int32_t error_count) { + uint32_t ref[USBIF_MAX_SEGMENTS_PER_REQUEST]; struct usbback_info *usbif; struct usbif_urb_response *res; struct XenLegacyDevice *xendev; - unsigned int notify; + unsigned int notify, i; usbif = usbback_req->usbif; xendev = &usbif->xendev; @@ -293,13 +296,19 @@ static void usbback_do_response(struct usbback_req *usbback_req, int32_t status, } if (usbback_req->buffer) { - xen_be_unmap_grant_refs(xendev, usbback_req->buffer, + for (i = 0; i < usbback_req->nr_buffer_segs; i++) { + ref[i] = usbback_req->req.seg[i].gref; + } + xen_be_unmap_grant_refs(xendev, usbback_req->buffer, ref, usbback_req->nr_buffer_segs); usbback_req->buffer = NULL; } if (usbback_req->isoc_buffer) { - xen_be_unmap_grant_refs(xendev, usbback_req->isoc_buffer, + for (i = 0; i < usbback_req->nr_extra_segs; i++) { + ref[i] = usbback_req->req.seg[i + usbback_req->req.nr_buffer_segs].gref; + } + xen_be_unmap_grant_refs(xendev, usbback_req->isoc_buffer, ref, usbback_req->nr_extra_segs); usbback_req->isoc_buffer = NULL; } @@ -832,11 +841,11 @@ static void usbback_disconnect(struct XenLegacyDevice *xendev) xen_pv_unbind_evtchn(xendev); if (usbif->urb_sring) { - xen_be_unmap_grant_ref(xendev, usbif->urb_sring); + xen_be_unmap_grant_ref(xendev, usbif->urb_sring, usbif->urb_ring_ref); usbif->urb_sring = NULL; } if (usbif->conn_sring) { - xen_be_unmap_grant_ref(xendev, usbif->conn_sring); + xen_be_unmap_grant_ref(xendev, usbif->conn_sring, usbif->conn_ring_ref); usbif->conn_sring = NULL; } @@ -889,6 +898,8 @@ static int usbback_connect(struct XenLegacyDevice *xendev) return -1; } + usbif->urb_ring_ref = urb_ring_ref; + usbif->conn_ring_ref = conn_ring_ref; urb_sring = usbif->urb_sring; conn_sring = usbif->conn_sring; BACK_RING_INIT(&usbif->urb_ring, urb_sring, XC_PAGE_SIZE); diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c index b247e86..aee6a8c 100644 --- a/hw/xen/xen-bus.c +++ b/hw/xen/xen-bus.c @@ -967,10 +967,10 @@ void *xen_device_map_grant_refs(XenDevice *xendev, uint32_t *refs, return map; } -void xen_device_unmap_grant_refs(XenDevice *xendev, void *map, +void xen_device_unmap_grant_refs(XenDevice *xendev, void *map, uint32_t *refs, unsigned int nr_refs, Error **errp) { - if (qemu_xen_gnttab_unmap(xendev->xgth, map, nr_refs)) { + if (qemu_xen_gnttab_unmap(xendev->xgth, map, refs, nr_refs)) { error_setg_errno(errp, errno, "xengnttab_unmap failed"); } } diff --git a/hw/xen/xen-legacy-backend.c b/hw/xen/xen-legacy-backend.c index 1e9a28f..a48a25a 100644 --- a/hw/xen/xen-legacy-backend.c +++ b/hw/xen/xen-legacy-backend.c @@ -137,11 +137,11 @@ void *xen_be_map_grant_refs(struct XenLegacyDevice *xendev, uint32_t *refs, } void xen_be_unmap_grant_refs(struct XenLegacyDevice *xendev, void *ptr, - unsigned int nr_refs) + uint32_t *refs, unsigned int nr_refs) { assert(xendev->ops->flags & DEVOPS_FLAG_NEED_GNTDEV); - if (qemu_xen_gnttab_unmap(xendev->gnttabdev, ptr, nr_refs)) { + if (qemu_xen_gnttab_unmap(xendev->gnttabdev, ptr, refs, nr_refs)) { xen_pv_printf(xendev, 0, "xengnttab_unmap failed: %s\n", strerror(errno)); } diff --git a/hw/xen/xen-operations.c b/hw/xen/xen-operations.c index 2e74a28..c5956d2 100644 --- a/hw/xen/xen-operations.c +++ b/hw/xen/xen-operations.c @@ -200,6 +200,13 @@ static xengnttab_handle *libxengnttab_backend_open(void) return xengnttab_open(NULL, 0); } +static int libxengnttab_backend_unmap(xengnttab_handle *xgt, + void *start_address, uint32_t *refs, + uint32_t count) +{ + return xengnttab_unmap(xgt, start_address, count); +} + static struct gnttab_backend_ops libxengnttab_backend_ops = { .features = XEN_GNTTAB_OP_FEATURE_MAP_MULTIPLE, @@ -208,7 +215,7 @@ static struct gnttab_backend_ops libxengnttab_backend_ops = { .grant_copy = libxengnttab_fallback_grant_copy, .set_max_grants = xengnttab_set_max_grants, .map_refs = xengnttab_map_domain_grant_refs, - .unmap = xengnttab_unmap, + .unmap = libxengnttab_backend_unmap, }; void setup_xen_backend_ops(void) -- cgit v1.1 From 15e283c5b684c2e502e9327186eb89eb69c68812 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 2 Jan 2023 01:13:46 +0000 Subject: hw/xen: Add foreignmem operations to allow redirection to internal emulation Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant --- hw/char/xen_console.c | 8 ++++---- hw/display/xenfb.c | 20 +++++++++++--------- hw/xen/xen-operations.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+), 13 deletions(-) (limited to 'hw') diff --git a/hw/char/xen_console.c b/hw/char/xen_console.c index 19ad6c9..e9cef3e 100644 --- a/hw/char/xen_console.c +++ b/hw/char/xen_console.c @@ -237,9 +237,9 @@ static int con_initialise(struct XenLegacyDevice *xendev) if (!xendev->dev) { xen_pfn_t mfn = con->ring_ref; - con->sring = xenforeignmemory_map(xen_fmem, con->xendev.dom, - PROT_READ | PROT_WRITE, - 1, &mfn, NULL); + con->sring = qemu_xen_foreignmem_map(con->xendev.dom, NULL, + PROT_READ | PROT_WRITE, + 1, &mfn, NULL); } else { con->sring = xen_be_map_grant_ref(xendev, con->ring_ref, PROT_READ | PROT_WRITE); @@ -269,7 +269,7 @@ static void con_disconnect(struct XenLegacyDevice *xendev) if (con->sring) { if (!xendev->dev) { - xenforeignmemory_unmap(xen_fmem, con->sring, 1); + qemu_xen_foreignmem_unmap(con->sring, 1); } else { xen_be_unmap_grant_ref(xendev, con->sring, con->ring_ref); } diff --git a/hw/display/xenfb.c b/hw/display/xenfb.c index 260eb38..2c4016f 100644 --- a/hw/display/xenfb.c +++ b/hw/display/xenfb.c @@ -98,8 +98,9 @@ static int common_bind(struct common *c) if (xenstore_read_fe_int(&c->xendev, "event-channel", &c->xendev.remote_port) == -1) return -1; - c->page = xenforeignmemory_map(xen_fmem, c->xendev.dom, - PROT_READ | PROT_WRITE, 1, &mfn, NULL); + c->page = qemu_xen_foreignmem_map(c->xendev.dom, NULL, + PROT_READ | PROT_WRITE, 1, &mfn, + NULL); if (c->page == NULL) return -1; @@ -115,7 +116,7 @@ static void common_unbind(struct common *c) { xen_pv_unbind_evtchn(&c->xendev); if (c->page) { - xenforeignmemory_unmap(xen_fmem, c->page, 1); + qemu_xen_foreignmem_unmap(c->page, 1); c->page = NULL; } } @@ -500,15 +501,16 @@ static int xenfb_map_fb(struct XenFB *xenfb) fbmfns = g_new0(xen_pfn_t, xenfb->fbpages); xenfb_copy_mfns(mode, n_fbdirs, pgmfns, pd); - map = xenforeignmemory_map(xen_fmem, xenfb->c.xendev.dom, - PROT_READ, n_fbdirs, pgmfns, NULL); + map = qemu_xen_foreignmem_map(xenfb->c.xendev.dom, NULL, PROT_READ, + n_fbdirs, pgmfns, NULL); if (map == NULL) goto out; xenfb_copy_mfns(mode, xenfb->fbpages, fbmfns, map); - xenforeignmemory_unmap(xen_fmem, map, n_fbdirs); + qemu_xen_foreignmem_unmap(map, n_fbdirs); - xenfb->pixels = xenforeignmemory_map(xen_fmem, xenfb->c.xendev.dom, - PROT_READ, xenfb->fbpages, fbmfns, NULL); + xenfb->pixels = qemu_xen_foreignmem_map(xenfb->c.xendev.dom, NULL, + PROT_READ, xenfb->fbpages, + fbmfns, NULL); if (xenfb->pixels == NULL) goto out; @@ -927,7 +929,7 @@ static void fb_disconnect(struct XenLegacyDevice *xendev) * Replacing the framebuffer with anonymous shared memory * instead. This releases the guest pages and keeps qemu happy. */ - xenforeignmemory_unmap(xen_fmem, fb->pixels, fb->fbpages); + qemu_xen_foreignmem_unmap(fb->pixels, fb->fbpages); fb->pixels = mmap(fb->pixels, fb->fbpages * XC_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0); diff --git a/hw/xen/xen-operations.c b/hw/xen/xen-operations.c index c5956d2..440e566 100644 --- a/hw/xen/xen-operations.c +++ b/hw/xen/xen-operations.c @@ -22,6 +22,7 @@ */ #undef XC_WANT_COMPAT_EVTCHN_API #undef XC_WANT_COMPAT_GNTTAB_API +#undef XC_WANT_COMPAT_MAP_FOREIGN_API #include @@ -56,10 +57,13 @@ typedef xc_gnttab xengnttab_handle; #define xengnttab_map_domain_grant_refs(h, c, d, r, p) \ xc_gnttab_map_domain_grant_refs(h, c, d, r, p) +typedef xc_interface xenforeignmemory_handle; + #else /* CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40701 */ #include #include +#include #endif @@ -218,6 +222,46 @@ static struct gnttab_backend_ops libxengnttab_backend_ops = { .unmap = libxengnttab_backend_unmap, }; +#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 40701 + +static void *libxenforeignmem_backend_map(uint32_t dom, void *addr, int prot, + size_t pages, xfn_pfn_t *pfns, + int *errs) +{ + if (errs) { + return xc_map_foreign_bulk(xen_xc, dom, prot, pfns, errs, pages); + } else { + return xc_map_foreign_pages(xen_xc, dom, prot, pfns, pages); + } +} + +static int libxenforeignmem_backend_unmap(void *addr, size_t pages) +{ + return munmap(addr, pages * XC_PAGE_SIZE); +} + +#else /* CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40701 */ + +static void *libxenforeignmem_backend_map(uint32_t dom, void *addr, int prot, + size_t pages, xen_pfn_t *pfns, + int *errs) +{ + return xenforeignmemory_map2(xen_fmem, dom, addr, prot, 0, pages, pfns, + errs); +} + +static int libxenforeignmem_backend_unmap(void *addr, size_t pages) +{ + return xenforeignmemory_unmap(xen_fmem, addr, pages); +} + +#endif + +struct foreignmem_backend_ops libxenforeignmem_backend_ops = { + .map = libxenforeignmem_backend_map, + .unmap = libxenforeignmem_backend_unmap, +}; + void setup_xen_backend_ops(void) { #if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40800 @@ -232,4 +276,5 @@ void setup_xen_backend_ops(void) #endif xen_evtchn_ops = &libxenevtchn_backend_ops; xen_gnttab_ops = &libxengnttab_backend_ops; + xen_foreignmem_ops = &libxenforeignmem_backend_ops; } -- cgit v1.1 From ba2a92db1ff682c16730b1d7f156bac61928f04d Mon Sep 17 00:00:00 2001 From: Paul Durrant Date: Mon, 2 Jan 2023 11:05:16 +0000 Subject: hw/xen: Add xenstore operations to allow redirection to internal emulation Signed-off-by: Paul Durrant Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant --- hw/char/xen_console.c | 2 +- hw/i386/kvm/xen_xenstore.c | 3 - hw/i386/kvm/xenstore_impl.h | 8 +- hw/xen/xen-bus-helper.c | 62 +++++------ hw/xen/xen-bus.c | 261 +++++++------------------------------------- hw/xen/xen-legacy-backend.c | 119 ++++++++++---------- hw/xen/xen-operations.c | 198 +++++++++++++++++++++++++++++++++ hw/xen/xen_devconfig.c | 4 +- hw/xen/xen_pt_graphics.c | 1 - hw/xen/xen_pvdev.c | 49 +-------- 10 files changed, 333 insertions(+), 374 deletions(-) (limited to 'hw') diff --git a/hw/char/xen_console.c b/hw/char/xen_console.c index e9cef3e..ad8638a 100644 --- a/hw/char/xen_console.c +++ b/hw/char/xen_console.c @@ -181,7 +181,7 @@ static int con_init(struct XenLegacyDevice *xendev) const char *output; /* setup */ - dom = xs_get_domain_path(xenstore, con->xendev.dom); + dom = qemu_xen_xs_get_domain_path(xenstore, con->xendev.dom); if (!xendev->dev) { snprintf(con->console, sizeof(con->console), "%s/console", dom); } else { diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c index fb3648a..35898e9 100644 --- a/hw/i386/kvm/xen_xenstore.c +++ b/hw/i386/kvm/xen_xenstore.c @@ -38,9 +38,6 @@ #define TYPE_XEN_XENSTORE "xen-xenstore" OBJECT_DECLARE_SIMPLE_TYPE(XenXenstoreState, XEN_XENSTORE) -#define XEN_PAGE_SHIFT 12 -#define XEN_PAGE_SIZE (1ULL << XEN_PAGE_SHIFT) - #define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t)) #define ENTRIES_PER_FRAME_V2 (XEN_PAGE_SIZE / sizeof(grant_entry_v2_t)) diff --git a/hw/i386/kvm/xenstore_impl.h b/hw/i386/kvm/xenstore_impl.h index bbe2391..0df2a91 100644 --- a/hw/i386/kvm/xenstore_impl.h +++ b/hw/i386/kvm/xenstore_impl.h @@ -12,13 +12,7 @@ #ifndef QEMU_XENSTORE_IMPL_H #define QEMU_XENSTORE_IMPL_H -typedef uint32_t xs_transaction_t; - -#define XBT_NULL 0 - -#define XS_PERM_NONE 0x00 -#define XS_PERM_READ 0x01 -#define XS_PERM_WRITE 0x02 +#include "hw/xen/xen_backend_ops.h" typedef struct XenstoreImplState XenstoreImplState; diff --git a/hw/xen/xen-bus-helper.c b/hw/xen/xen-bus-helper.c index 5a1e12b..b2b2cc9 100644 --- a/hw/xen/xen-bus-helper.c +++ b/hw/xen/xen-bus-helper.c @@ -10,6 +10,7 @@ #include "hw/xen/xen-bus.h" #include "hw/xen/xen-bus-helper.h" #include "qapi/error.h" +#include "trace.h" #include @@ -46,34 +47,28 @@ const char *xs_strstate(enum xenbus_state state) return "INVALID"; } -void xs_node_create(struct xs_handle *xsh, xs_transaction_t tid, - const char *node, struct xs_permissions perms[], - unsigned int nr_perms, Error **errp) +void xs_node_create(struct qemu_xs_handle *h, xs_transaction_t tid, + const char *node, unsigned int owner, unsigned int domid, + unsigned int perms, Error **errp) { trace_xs_node_create(node); - if (!xs_write(xsh, tid, node, "", 0)) { + if (!qemu_xen_xs_create(h, tid, owner, domid, perms, node)) { error_setg_errno(errp, errno, "failed to create node '%s'", node); - return; - } - - if (!xs_set_permissions(xsh, tid, node, perms, nr_perms)) { - error_setg_errno(errp, errno, "failed to set node '%s' permissions", - node); } } -void xs_node_destroy(struct xs_handle *xsh, xs_transaction_t tid, +void xs_node_destroy(struct qemu_xs_handle *h, xs_transaction_t tid, const char *node, Error **errp) { trace_xs_node_destroy(node); - if (!xs_rm(xsh, tid, node)) { + if (!qemu_xen_xs_destroy(h, tid, node)) { error_setg_errno(errp, errno, "failed to destroy node '%s'", node); } } -void xs_node_vprintf(struct xs_handle *xsh, xs_transaction_t tid, +void xs_node_vprintf(struct qemu_xs_handle *h, xs_transaction_t tid, const char *node, const char *key, Error **errp, const char *fmt, va_list ap) { @@ -86,7 +81,7 @@ void xs_node_vprintf(struct xs_handle *xsh, xs_transaction_t tid, trace_xs_node_vprintf(path, value); - if (!xs_write(xsh, tid, path, value, len)) { + if (!qemu_xen_xs_write(h, tid, path, value, len)) { error_setg_errno(errp, errno, "failed to write '%s' to '%s'", value, path); } @@ -95,18 +90,18 @@ void xs_node_vprintf(struct xs_handle *xsh, xs_transaction_t tid, g_free(path); } -void xs_node_printf(struct xs_handle *xsh, xs_transaction_t tid, +void xs_node_printf(struct qemu_xs_handle *h, xs_transaction_t tid, const char *node, const char *key, Error **errp, const char *fmt, ...) { va_list ap; va_start(ap, fmt); - xs_node_vprintf(xsh, tid, node, key, errp, fmt, ap); + xs_node_vprintf(h, tid, node, key, errp, fmt, ap); va_end(ap); } -int xs_node_vscanf(struct xs_handle *xsh, xs_transaction_t tid, +int xs_node_vscanf(struct qemu_xs_handle *h, xs_transaction_t tid, const char *node, const char *key, Error **errp, const char *fmt, va_list ap) { @@ -115,7 +110,7 @@ int xs_node_vscanf(struct xs_handle *xsh, xs_transaction_t tid, path = (strlen(node) != 0) ? g_strdup_printf("%s/%s", node, key) : g_strdup(key); - value = xs_read(xsh, tid, path, NULL); + value = qemu_xen_xs_read(h, tid, path, NULL); trace_xs_node_vscanf(path, value); @@ -133,7 +128,7 @@ int xs_node_vscanf(struct xs_handle *xsh, xs_transaction_t tid, return rc; } -int xs_node_scanf(struct xs_handle *xsh, xs_transaction_t tid, +int xs_node_scanf(struct qemu_xs_handle *h, xs_transaction_t tid, const char *node, const char *key, Error **errp, const char *fmt, ...) { @@ -141,42 +136,35 @@ int xs_node_scanf(struct xs_handle *xsh, xs_transaction_t tid, int rc; va_start(ap, fmt); - rc = xs_node_vscanf(xsh, tid, node, key, errp, fmt, ap); + rc = xs_node_vscanf(h, tid, node, key, errp, fmt, ap); va_end(ap); return rc; } -void xs_node_watch(struct xs_handle *xsh, const char *node, const char *key, - char *token, Error **errp) +struct qemu_xs_watch *xs_node_watch(struct qemu_xs_handle *h, const char *node, + const char *key, xs_watch_fn fn, + void *opaque, Error **errp) { char *path; + struct qemu_xs_watch *w; path = (strlen(node) != 0) ? g_strdup_printf("%s/%s", node, key) : g_strdup(key); trace_xs_node_watch(path); - if (!xs_watch(xsh, path, token)) { + w = qemu_xen_xs_watch(h, path, fn, opaque); + if (!w) { error_setg_errno(errp, errno, "failed to watch node '%s'", path); } g_free(path); + + return w; } -void xs_node_unwatch(struct xs_handle *xsh, const char *node, - const char *key, const char *token, Error **errp) +void xs_node_unwatch(struct qemu_xs_handle *h, struct qemu_xs_watch *w) { - char *path; - - path = (strlen(node) != 0) ? g_strdup_printf("%s/%s", node, key) : - g_strdup(key); - - trace_xs_node_unwatch(path); - - if (!xs_unwatch(xsh, path, token)) { - error_setg_errno(errp, errno, "failed to unwatch node '%s'", path); - } - - g_free(path); + qemu_xen_xs_unwatch(h, w); } diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c index aee6a8c..9fe5496 100644 --- a/hw/xen/xen-bus.c +++ b/hw/xen/xen-bus.c @@ -62,7 +62,7 @@ static void xen_device_unplug(XenDevice *xendev, Error **errp) /* Mimic the way the Xen toolstack does an unplug */ again: - tid = xs_transaction_start(xenbus->xsh); + tid = qemu_xen_xs_transaction_start(xenbus->xsh); if (tid == XBT_NULL) { error_setg_errno(errp, errno, "failed xs_transaction_start"); return; @@ -80,7 +80,7 @@ again: goto abort; } - if (!xs_transaction_end(xenbus->xsh, tid, false)) { + if (!qemu_xen_xs_transaction_end(xenbus->xsh, tid, false)) { if (errno == EAGAIN) { goto again; } @@ -95,7 +95,7 @@ abort: * We only abort if there is already a failure so ignore any error * from ending the transaction. */ - xs_transaction_end(xenbus->xsh, tid, true); + qemu_xen_xs_transaction_end(xenbus->xsh, tid, true); } static void xen_bus_print_dev(Monitor *mon, DeviceState *dev, int indent) @@ -111,143 +111,6 @@ static char *xen_bus_get_dev_path(DeviceState *dev) return xen_device_get_backend_path(XEN_DEVICE(dev)); } -struct XenWatch { - char *node, *key; - char *token; - XenWatchHandler handler; - void *opaque; - Notifier notifier; -}; - -static void watch_notify(Notifier *n, void *data) -{ - XenWatch *watch = container_of(n, XenWatch, notifier); - const char *token = data; - - if (!strcmp(watch->token, token)) { - watch->handler(watch->opaque); - } -} - -static XenWatch *new_watch(const char *node, const char *key, - XenWatchHandler handler, void *opaque) -{ - XenWatch *watch = g_new0(XenWatch, 1); - QemuUUID uuid; - - qemu_uuid_generate(&uuid); - - watch->token = qemu_uuid_unparse_strdup(&uuid); - watch->node = g_strdup(node); - watch->key = g_strdup(key); - watch->handler = handler; - watch->opaque = opaque; - watch->notifier.notify = watch_notify; - - return watch; -} - -static void free_watch(XenWatch *watch) -{ - g_free(watch->token); - g_free(watch->key); - g_free(watch->node); - - g_free(watch); -} - -struct XenWatchList { - struct xs_handle *xsh; - NotifierList notifiers; -}; - -static void watch_list_event(void *opaque) -{ - XenWatchList *watch_list = opaque; - char **v; - const char *token; - - v = xs_check_watch(watch_list->xsh); - if (!v) { - return; - } - - token = v[XS_WATCH_TOKEN]; - - notifier_list_notify(&watch_list->notifiers, (void *)token); - - free(v); -} - -static XenWatchList *watch_list_create(struct xs_handle *xsh) -{ - XenWatchList *watch_list = g_new0(XenWatchList, 1); - - g_assert(xsh); - - watch_list->xsh = xsh; - notifier_list_init(&watch_list->notifiers); - qemu_set_fd_handler(xs_fileno(watch_list->xsh), watch_list_event, NULL, - watch_list); - - return watch_list; -} - -static void watch_list_destroy(XenWatchList *watch_list) -{ - g_assert(notifier_list_empty(&watch_list->notifiers)); - qemu_set_fd_handler(xs_fileno(watch_list->xsh), NULL, NULL, NULL); - g_free(watch_list); -} - -static XenWatch *watch_list_add(XenWatchList *watch_list, const char *node, - const char *key, XenWatchHandler handler, - void *opaque, Error **errp) -{ - ERRP_GUARD(); - XenWatch *watch = new_watch(node, key, handler, opaque); - - notifier_list_add(&watch_list->notifiers, &watch->notifier); - - xs_node_watch(watch_list->xsh, node, key, watch->token, errp); - if (*errp) { - notifier_remove(&watch->notifier); - free_watch(watch); - - return NULL; - } - - return watch; -} - -static void watch_list_remove(XenWatchList *watch_list, XenWatch *watch, - Error **errp) -{ - xs_node_unwatch(watch_list->xsh, watch->node, watch->key, watch->token, - errp); - - notifier_remove(&watch->notifier); - free_watch(watch); -} - -static XenWatch *xen_bus_add_watch(XenBus *xenbus, const char *node, - const char *key, XenWatchHandler handler, - Error **errp) -{ - trace_xen_bus_add_watch(node, key); - - return watch_list_add(xenbus->watch_list, node, key, handler, xenbus, - errp); -} - -static void xen_bus_remove_watch(XenBus *xenbus, XenWatch *watch, - Error **errp) -{ - trace_xen_bus_remove_watch(watch->node, watch->key); - - watch_list_remove(xenbus->watch_list, watch, errp); -} - static void xen_bus_backend_create(XenBus *xenbus, const char *type, const char *name, char *path, Error **errp) @@ -261,15 +124,15 @@ static void xen_bus_backend_create(XenBus *xenbus, const char *type, trace_xen_bus_backend_create(type, path); again: - tid = xs_transaction_start(xenbus->xsh); + tid = qemu_xen_xs_transaction_start(xenbus->xsh); if (tid == XBT_NULL) { error_setg(errp, "failed xs_transaction_start"); return; } - key = xs_directory(xenbus->xsh, tid, path, &n); + key = qemu_xen_xs_directory(xenbus->xsh, tid, path, &n); if (!key) { - if (!xs_transaction_end(xenbus->xsh, tid, true)) { + if (!qemu_xen_xs_transaction_end(xenbus->xsh, tid, true)) { error_setg_errno(errp, errno, "failed xs_transaction_end"); } return; @@ -300,7 +163,7 @@ again: free(key); - if (!xs_transaction_end(xenbus->xsh, tid, false)) { + if (!qemu_xen_xs_transaction_end(xenbus->xsh, tid, false)) { qobject_unref(opts); if (errno == EAGAIN) { @@ -327,7 +190,7 @@ static void xen_bus_type_enumerate(XenBus *xenbus, const char *type) trace_xen_bus_type_enumerate(type); - backend = xs_directory(xenbus->xsh, XBT_NULL, domain_path, &n); + backend = qemu_xen_xs_directory(xenbus->xsh, XBT_NULL, domain_path, &n); if (!backend) { goto out; } @@ -372,7 +235,7 @@ static void xen_bus_enumerate(XenBus *xenbus) trace_xen_bus_enumerate(); - type = xs_directory(xenbus->xsh, XBT_NULL, "backend", &n); + type = qemu_xen_xs_directory(xenbus->xsh, XBT_NULL, "backend", &n); if (!type) { return; } @@ -415,7 +278,7 @@ static void xen_bus_cleanup(XenBus *xenbus) } } -static void xen_bus_backend_changed(void *opaque) +static void xen_bus_backend_changed(void *opaque, const char *path) { XenBus *xenbus = opaque; @@ -434,7 +297,7 @@ static void xen_bus_unrealize(BusState *bus) for (i = 0; i < xenbus->backend_types; i++) { if (xenbus->backend_watch[i]) { - xen_bus_remove_watch(xenbus, xenbus->backend_watch[i], NULL); + xs_node_unwatch(xenbus->xsh, xenbus->backend_watch[i]); } } @@ -442,13 +305,8 @@ static void xen_bus_unrealize(BusState *bus) xenbus->backend_watch = NULL; } - if (xenbus->watch_list) { - watch_list_destroy(xenbus->watch_list); - xenbus->watch_list = NULL; - } - if (xenbus->xsh) { - xs_close(xenbus->xsh); + qemu_xen_xs_close(xenbus->xsh); } } @@ -463,7 +321,7 @@ static void xen_bus_realize(BusState *bus, Error **errp) trace_xen_bus_realize(); - xenbus->xsh = xs_open(0); + xenbus->xsh = qemu_xen_xs_open(); if (!xenbus->xsh) { error_setg_errno(errp, errno, "failed xs_open"); goto fail; @@ -476,19 +334,18 @@ static void xen_bus_realize(BusState *bus, Error **errp) xenbus->backend_id = 0; /* Assume lack of node means dom0 */ } - xenbus->watch_list = watch_list_create(xenbus->xsh); - module_call_init(MODULE_INIT_XEN_BACKEND); type = xen_backend_get_types(&xenbus->backend_types); - xenbus->backend_watch = g_new(XenWatch *, xenbus->backend_types); + xenbus->backend_watch = g_new(struct qemu_xs_watch *, + xenbus->backend_types); for (i = 0; i < xenbus->backend_types; i++) { char *node = g_strdup_printf("backend/%s", type[i]); xenbus->backend_watch[i] = - xen_bus_add_watch(xenbus, node, key, xen_bus_backend_changed, - &local_err); + xs_node_watch(xenbus->xsh, node, key, xen_bus_backend_changed, + xenbus, &local_err); if (local_err) { /* This need not be treated as a hard error so don't propagate */ error_reportf_err(local_err, @@ -631,7 +488,7 @@ static bool xen_device_frontend_is_active(XenDevice *xendev) } } -static void xen_device_backend_changed(void *opaque) +static void xen_device_backend_changed(void *opaque, const char *path) { XenDevice *xendev = opaque; const char *type = object_get_typename(OBJECT(xendev)); @@ -685,66 +542,35 @@ static void xen_device_backend_changed(void *opaque) } } -static XenWatch *xen_device_add_watch(XenDevice *xendev, const char *node, - const char *key, - XenWatchHandler handler, - Error **errp) -{ - const char *type = object_get_typename(OBJECT(xendev)); - - trace_xen_device_add_watch(type, xendev->name, node, key); - - return watch_list_add(xendev->watch_list, node, key, handler, xendev, - errp); -} - -static void xen_device_remove_watch(XenDevice *xendev, XenWatch *watch, - Error **errp) -{ - const char *type = object_get_typename(OBJECT(xendev)); - - trace_xen_device_remove_watch(type, xendev->name, watch->node, - watch->key); - - watch_list_remove(xendev->watch_list, watch, errp); -} - - static void xen_device_backend_create(XenDevice *xendev, Error **errp) { ERRP_GUARD(); XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev))); - struct xs_permissions perms[2]; xendev->backend_path = xen_device_get_backend_path(xendev); - perms[0].id = xenbus->backend_id; - perms[0].perms = XS_PERM_NONE; - perms[1].id = xendev->frontend_id; - perms[1].perms = XS_PERM_READ; - g_assert(xenbus->xsh); - xs_node_create(xenbus->xsh, XBT_NULL, xendev->backend_path, perms, - ARRAY_SIZE(perms), errp); + xs_node_create(xenbus->xsh, XBT_NULL, xendev->backend_path, + xenbus->backend_id, xendev->frontend_id, XS_PERM_READ, errp); if (*errp) { error_prepend(errp, "failed to create backend: "); return; } xendev->backend_state_watch = - xen_device_add_watch(xendev, xendev->backend_path, - "state", xen_device_backend_changed, - errp); + xs_node_watch(xendev->xsh, xendev->backend_path, + "state", xen_device_backend_changed, xendev, + errp); if (*errp) { error_prepend(errp, "failed to watch backend state: "); return; } xendev->backend_online_watch = - xen_device_add_watch(xendev, xendev->backend_path, - "online", xen_device_backend_changed, - errp); + xs_node_watch(xendev->xsh, xendev->backend_path, + "online", xen_device_backend_changed, xendev, + errp); if (*errp) { error_prepend(errp, "failed to watch backend online: "); return; @@ -757,12 +583,12 @@ static void xen_device_backend_destroy(XenDevice *xendev) Error *local_err = NULL; if (xendev->backend_online_watch) { - xen_device_remove_watch(xendev, xendev->backend_online_watch, NULL); + xs_node_unwatch(xendev->xsh, xendev->backend_online_watch); xendev->backend_online_watch = NULL; } if (xendev->backend_state_watch) { - xen_device_remove_watch(xendev, xendev->backend_state_watch, NULL); + xs_node_unwatch(xendev->xsh, xendev->backend_state_watch); xendev->backend_state_watch = NULL; } @@ -837,7 +663,7 @@ static void xen_device_frontend_set_state(XenDevice *xendev, } } -static void xen_device_frontend_changed(void *opaque) +static void xen_device_frontend_changed(void *opaque, const char *path) { XenDevice *xendev = opaque; XenDeviceClass *xendev_class = XEN_DEVICE_GET_CLASS(xendev); @@ -885,7 +711,6 @@ static void xen_device_frontend_create(XenDevice *xendev, Error **errp) { ERRP_GUARD(); XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev))); - struct xs_permissions perms[2]; xendev->frontend_path = xen_device_get_frontend_path(xendev); @@ -894,15 +719,11 @@ static void xen_device_frontend_create(XenDevice *xendev, Error **errp) * toolstack. */ if (!xen_device_frontend_exists(xendev)) { - perms[0].id = xendev->frontend_id; - perms[0].perms = XS_PERM_NONE; - perms[1].id = xenbus->backend_id; - perms[1].perms = XS_PERM_READ | XS_PERM_WRITE; - g_assert(xenbus->xsh); - xs_node_create(xenbus->xsh, XBT_NULL, xendev->frontend_path, perms, - ARRAY_SIZE(perms), errp); + xs_node_create(xenbus->xsh, XBT_NULL, xendev->frontend_path, + xendev->frontend_id, xenbus->backend_id, + XS_PERM_READ | XS_PERM_WRITE, errp); if (*errp) { error_prepend(errp, "failed to create frontend: "); return; @@ -910,8 +731,8 @@ static void xen_device_frontend_create(XenDevice *xendev, Error **errp) } xendev->frontend_state_watch = - xen_device_add_watch(xendev, xendev->frontend_path, "state", - xen_device_frontend_changed, errp); + xs_node_watch(xendev->xsh, xendev->frontend_path, "state", + xen_device_frontend_changed, xendev, errp); if (*errp) { error_prepend(errp, "failed to watch frontend state: "); } @@ -923,8 +744,7 @@ static void xen_device_frontend_destroy(XenDevice *xendev) Error *local_err = NULL; if (xendev->frontend_state_watch) { - xen_device_remove_watch(xendev, xendev->frontend_state_watch, - NULL); + xs_node_unwatch(xendev->xsh, xendev->frontend_state_watch); xendev->frontend_state_watch = NULL; } @@ -1146,13 +966,8 @@ static void xen_device_unrealize(DeviceState *dev) xendev->xgth = NULL; } - if (xendev->watch_list) { - watch_list_destroy(xendev->watch_list); - xendev->watch_list = NULL; - } - if (xendev->xsh) { - xs_close(xendev->xsh); + qemu_xen_xs_close(xendev->xsh); xendev->xsh = NULL; } @@ -1197,14 +1012,12 @@ static void xen_device_realize(DeviceState *dev, Error **errp) trace_xen_device_realize(type, xendev->name); - xendev->xsh = xs_open(0); + xendev->xsh = qemu_xen_xs_open(); if (!xendev->xsh) { error_setg_errno(errp, errno, "failed xs_open"); goto unrealize; } - xendev->watch_list = watch_list_create(xendev->xsh); - xendev->xgth = qemu_xen_gnttab_open(); if (!xendev->xgth) { error_setg_errno(errp, errno, "failed xengnttab_open"); diff --git a/hw/xen/xen-legacy-backend.c b/hw/xen/xen-legacy-backend.c index a48a25a..4ded3ce 100644 --- a/hw/xen/xen-legacy-backend.c +++ b/hw/xen/xen-legacy-backend.c @@ -39,7 +39,7 @@ BusState *xen_sysbus; /* ------------------------------------------------------------- */ /* public */ -struct xs_handle *xenstore; +struct qemu_xs_handle *xenstore; const char *xen_protocol; /* private */ @@ -274,6 +274,25 @@ static void xen_be_frontend_changed(struct XenLegacyDevice *xendev, } } +static void xenstore_update_fe(void *opaque, const char *watch) +{ + struct XenLegacyDevice *xendev = opaque; + const char *node; + unsigned int len; + + len = strlen(xendev->fe); + if (strncmp(xendev->fe, watch, len) != 0) { + return; + } + if (watch[len] != '/') { + return; + } + node = watch + len + 1; + + xen_be_frontend_changed(xendev, node); + xen_be_check_state(xendev); +} + /* ------------------------------------------------------------- */ /* Check for possible state transitions and perform them. */ @@ -287,7 +306,6 @@ static void xen_be_frontend_changed(struct XenLegacyDevice *xendev, */ static int xen_be_try_setup(struct XenLegacyDevice *xendev) { - char token[XEN_BUFSIZE]; int be_state; if (xenstore_read_be_int(xendev, "state", &be_state) == -1) { @@ -308,8 +326,9 @@ static int xen_be_try_setup(struct XenLegacyDevice *xendev) } /* setup frontend watch */ - snprintf(token, sizeof(token), "fe:%p", xendev); - if (!xs_watch(xenstore, xendev->fe, token)) { + xendev->watch = qemu_xen_xs_watch(xenstore, xendev->fe, xenstore_update_fe, + xendev); + if (!xendev->watch) { xen_pv_printf(xendev, 0, "watching frontend path (%s) failed\n", xendev->fe); return -1; @@ -498,46 +517,20 @@ void xen_be_check_state(struct XenLegacyDevice *xendev) /* ------------------------------------------------------------- */ -static int xenstore_scan(const char *type, int dom, struct XenDevOps *ops) -{ - struct XenLegacyDevice *xendev; - char path[XEN_BUFSIZE], token[XEN_BUFSIZE]; - char **dev = NULL; - unsigned int cdev, j; - - /* setup watch */ - snprintf(token, sizeof(token), "be:%p:%d:%p", type, dom, ops); - snprintf(path, sizeof(path), "backend/%s/%d", type, dom); - if (!xs_watch(xenstore, path, token)) { - xen_pv_printf(NULL, 0, "xen be: watching backend path (%s) failed\n", - path); - return -1; - } - - /* look for backends */ - dev = xs_directory(xenstore, 0, path, &cdev); - if (!dev) { - return 0; - } - for (j = 0; j < cdev; j++) { - xendev = xen_be_get_xendev(type, dom, atoi(dev[j]), ops); - if (xendev == NULL) { - continue; - } - xen_be_check_state(xendev); - } - free(dev); - return 0; -} +struct xenstore_be { + const char *type; + int dom; + struct XenDevOps *ops; +}; -void xenstore_update_be(char *watch, char *type, int dom, - struct XenDevOps *ops) +static void xenstore_update_be(void *opaque, const char *watch) { + struct xenstore_be *be = opaque; struct XenLegacyDevice *xendev; char path[XEN_BUFSIZE], *bepath; unsigned int len, dev; - len = snprintf(path, sizeof(path), "backend/%s/%d", type, dom); + len = snprintf(path, sizeof(path), "backend/%s/%d", be->type, be->dom); if (strncmp(path, watch, len) != 0) { return; } @@ -551,9 +544,9 @@ void xenstore_update_be(char *watch, char *type, int dom, return; } - xendev = xen_be_get_xendev(type, dom, dev, ops); + xendev = xen_be_get_xendev(be->type, be->dom, dev, be->ops); if (xendev != NULL) { - bepath = xs_read(xenstore, 0, xendev->be, &len); + bepath = qemu_xen_xs_read(xenstore, 0, xendev->be, &len); if (bepath == NULL) { xen_pv_del_xendev(xendev); } else { @@ -564,23 +557,41 @@ void xenstore_update_be(char *watch, char *type, int dom, } } -void xenstore_update_fe(char *watch, struct XenLegacyDevice *xendev) +static int xenstore_scan(const char *type, int dom, struct XenDevOps *ops) { - char *node; - unsigned int len; + struct XenLegacyDevice *xendev; + char path[XEN_BUFSIZE]; + struct xenstore_be *be = g_new0(struct xenstore_be, 1); + char **dev = NULL; + unsigned int cdev, j; - len = strlen(xendev->fe); - if (strncmp(xendev->fe, watch, len) != 0) { - return; - } - if (watch[len] != '/') { - return; + /* setup watch */ + be->type = type; + be->dom = dom; + be->ops = ops; + snprintf(path, sizeof(path), "backend/%s/%d", type, dom); + if (!qemu_xen_xs_watch(xenstore, path, xenstore_update_be, be)) { + xen_pv_printf(NULL, 0, "xen be: watching backend path (%s) failed\n", + path); + return -1; } - node = watch + len + 1; - xen_be_frontend_changed(xendev, node); - xen_be_check_state(xendev); + /* look for backends */ + dev = qemu_xen_xs_directory(xenstore, 0, path, &cdev); + if (!dev) { + return 0; + } + for (j = 0; j < cdev; j++) { + xendev = xen_be_get_xendev(type, dom, atoi(dev[j]), ops); + if (xendev == NULL) { + continue; + } + xen_be_check_state(xendev); + } + free(dev); + return 0; } + /* -------------------------------------------------------------------- */ static void xen_set_dynamic_sysbus(void) @@ -594,14 +605,12 @@ static void xen_set_dynamic_sysbus(void) void xen_be_init(void) { - xenstore = xs_daemon_open(); + xenstore = qemu_xen_xs_open(); if (!xenstore) { xen_pv_printf(NULL, 0, "can't connect to xenstored\n"); exit(1); } - qemu_set_fd_handler(xs_fileno(xenstore), xenstore_update, NULL, NULL); - if (xen_evtchn_ops == NULL || xen_gnttab_ops == NULL) { xen_pv_printf(NULL, 0, "Xen operations not set up\n"); exit(1); diff --git a/hw/xen/xen-operations.c b/hw/xen/xen-operations.c index 440e566..abed812 100644 --- a/hw/xen/xen-operations.c +++ b/hw/xen/xen-operations.c @@ -10,6 +10,7 @@ */ #include "qemu/osdep.h" +#include "qemu/uuid.h" #include "qapi/error.h" #include "hw/xen/xen_backend_ops.h" @@ -262,6 +263,202 @@ struct foreignmem_backend_ops libxenforeignmem_backend_ops = { .unmap = libxenforeignmem_backend_unmap, }; +struct qemu_xs_handle { + struct xs_handle *xsh; + NotifierList notifiers; +}; + +static void watch_event(void *opaque) +{ + struct qemu_xs_handle *h = opaque; + + for (;;) { + char **v = xs_check_watch(h->xsh); + + if (!v) { + break; + } + + notifier_list_notify(&h->notifiers, v); + free(v); + } +} + +static struct qemu_xs_handle *libxenstore_open(void) +{ + struct xs_handle *xsh = xs_open(0); + struct qemu_xs_handle *h = g_new0(struct qemu_xs_handle, 1); + + if (!xsh) { + return NULL; + } + + h = g_new0(struct qemu_xs_handle, 1); + h->xsh = xsh; + + notifier_list_init(&h->notifiers); + qemu_set_fd_handler(xs_fileno(h->xsh), watch_event, NULL, h); + + return h; +} + +static void libxenstore_close(struct qemu_xs_handle *h) +{ + g_assert(notifier_list_empty(&h->notifiers)); + qemu_set_fd_handler(xs_fileno(h->xsh), NULL, NULL, NULL); + xs_close(h->xsh); + g_free(h); +} + +static char *libxenstore_get_domain_path(struct qemu_xs_handle *h, + unsigned int domid) +{ + return xs_get_domain_path(h->xsh, domid); +} + +static char **libxenstore_directory(struct qemu_xs_handle *h, + xs_transaction_t t, const char *path, + unsigned int *num) +{ + return xs_directory(h->xsh, t, path, num); +} + +static void *libxenstore_read(struct qemu_xs_handle *h, xs_transaction_t t, + const char *path, unsigned int *len) +{ + return xs_read(h->xsh, t, path, len); +} + +static bool libxenstore_write(struct qemu_xs_handle *h, xs_transaction_t t, + const char *path, const void *data, + unsigned int len) +{ + return xs_write(h->xsh, t, path, data, len); +} + +static bool libxenstore_create(struct qemu_xs_handle *h, xs_transaction_t t, + unsigned int owner, unsigned int domid, + unsigned int perms, const char *path) +{ + struct xs_permissions perms_list[] = { + { + .id = owner, + .perms = XS_PERM_NONE, + }, + { + .id = domid, + .perms = perms, + }, + }; + + if (!xs_mkdir(h->xsh, t, path)) { + return false; + } + + return xs_set_permissions(h->xsh, t, path, perms_list, + ARRAY_SIZE(perms_list)); +} + +static bool libxenstore_destroy(struct qemu_xs_handle *h, xs_transaction_t t, + const char *path) +{ + return xs_rm(h->xsh, t, path); +} + +struct qemu_xs_watch { + char *path; + char *token; + xs_watch_fn fn; + void *opaque; + Notifier notifier; +}; + +static void watch_notify(Notifier *n, void *data) +{ + struct qemu_xs_watch *w = container_of(n, struct qemu_xs_watch, notifier); + const char **v = data; + + if (!strcmp(w->token, v[XS_WATCH_TOKEN])) { + w->fn(w->opaque, v[XS_WATCH_PATH]); + } +} + +static struct qemu_xs_watch *new_watch(const char *path, xs_watch_fn fn, + void *opaque) +{ + struct qemu_xs_watch *w = g_new0(struct qemu_xs_watch, 1); + QemuUUID uuid; + + qemu_uuid_generate(&uuid); + + w->token = qemu_uuid_unparse_strdup(&uuid); + w->path = g_strdup(path); + w->fn = fn; + w->opaque = opaque; + w->notifier.notify = watch_notify; + + return w; +} + +static void free_watch(struct qemu_xs_watch *w) +{ + g_free(w->token); + g_free(w->path); + + g_free(w); +} + +static struct qemu_xs_watch *libxenstore_watch(struct qemu_xs_handle *h, + const char *path, xs_watch_fn fn, + void *opaque) +{ + struct qemu_xs_watch *w = new_watch(path, fn, opaque); + + notifier_list_add(&h->notifiers, &w->notifier); + + if (!xs_watch(h->xsh, path, w->token)) { + notifier_remove(&w->notifier); + free_watch(w); + return NULL; + } + + return w; +} + +static void libxenstore_unwatch(struct qemu_xs_handle *h, + struct qemu_xs_watch *w) +{ + xs_unwatch(h->xsh, w->path, w->token); + notifier_remove(&w->notifier); + free_watch(w); +} + +static xs_transaction_t libxenstore_transaction_start(struct qemu_xs_handle *h) +{ + return xs_transaction_start(h->xsh); +} + +static bool libxenstore_transaction_end(struct qemu_xs_handle *h, + xs_transaction_t t, bool abort) +{ + return xs_transaction_end(h->xsh, t, abort); +} + +struct xenstore_backend_ops libxenstore_backend_ops = { + .open = libxenstore_open, + .close = libxenstore_close, + .get_domain_path = libxenstore_get_domain_path, + .directory = libxenstore_directory, + .read = libxenstore_read, + .write = libxenstore_write, + .create = libxenstore_create, + .destroy = libxenstore_destroy, + .watch = libxenstore_watch, + .unwatch = libxenstore_unwatch, + .transaction_start = libxenstore_transaction_start, + .transaction_end = libxenstore_transaction_end, +}; + void setup_xen_backend_ops(void) { #if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40800 @@ -277,4 +474,5 @@ void setup_xen_backend_ops(void) xen_evtchn_ops = &libxenevtchn_backend_ops; xen_gnttab_ops = &libxengnttab_backend_ops; xen_foreignmem_ops = &libxenforeignmem_backend_ops; + xen_xenstore_ops = &libxenstore_backend_ops; } diff --git a/hw/xen/xen_devconfig.c b/hw/xen/xen_devconfig.c index 46ee4a7..9b7304e 100644 --- a/hw/xen/xen_devconfig.c +++ b/hw/xen/xen_devconfig.c @@ -11,11 +11,11 @@ static int xen_config_dev_dirs(const char *ftype, const char *btype, int vdev, { char *dom; - dom = xs_get_domain_path(xenstore, xen_domid); + dom = qemu_xen_xs_get_domain_path(xenstore, xen_domid); snprintf(fe, len, "%s/device/%s/%d", dom, ftype, vdev); free(dom); - dom = xs_get_domain_path(xenstore, 0); + dom = qemu_xen_xs_get_domain_path(xenstore, 0); snprintf(be, len, "%s/backend/%s/%d/%d", dom, btype, xen_domid, vdev); free(dom); diff --git a/hw/xen/xen_pt_graphics.c b/hw/xen/xen_pt_graphics.c index f303f67..0aed3bb 100644 --- a/hw/xen/xen_pt_graphics.c +++ b/hw/xen/xen_pt_graphics.c @@ -5,7 +5,6 @@ #include "qapi/error.h" #include "xen_pt.h" #include "xen-host-pci-device.h" -#include "hw/xen/xen-legacy-backend.h" static unsigned long igd_guest_opregion; static unsigned long igd_host_opregion; diff --git a/hw/xen/xen_pvdev.c b/hw/xen/xen_pvdev.c index d8582cc..be1504b 100644 --- a/hw/xen/xen_pvdev.c +++ b/hw/xen/xen_pvdev.c @@ -54,31 +54,17 @@ void xen_config_cleanup(void) struct xs_dirs *d; QTAILQ_FOREACH(d, &xs_cleanup, list) { - xs_rm(xenstore, 0, d->xs_dir); + qemu_xen_xs_destroy(xenstore, 0, d->xs_dir); } } int xenstore_mkdir(char *path, int p) { - struct xs_permissions perms[2] = { - { - .id = 0, /* set owner: dom0 */ - }, { - .id = xen_domid, - .perms = p, - } - }; - - if (!xs_mkdir(xenstore, 0, path)) { + if (!qemu_xen_xs_create(xenstore, 0, 0, xen_domid, p, path)) { xen_pv_printf(NULL, 0, "xs_mkdir %s: failed\n", path); return -1; } xenstore_cleanup_dir(g_strdup(path)); - - if (!xs_set_permissions(xenstore, 0, path, perms, 2)) { - xen_pv_printf(NULL, 0, "xs_set_permissions %s: failed\n", path); - return -1; - } return 0; } @@ -87,7 +73,7 @@ int xenstore_write_str(const char *base, const char *node, const char *val) char abspath[XEN_BUFSIZE]; snprintf(abspath, sizeof(abspath), "%s/%s", base, node); - if (!xs_write(xenstore, 0, abspath, val, strlen(val))) { + if (!qemu_xen_xs_write(xenstore, 0, abspath, val, strlen(val))) { return -1; } return 0; @@ -100,7 +86,7 @@ char *xenstore_read_str(const char *base, const char *node) char *str, *ret = NULL; snprintf(abspath, sizeof(abspath), "%s/%s", base, node); - str = xs_read(xenstore, 0, abspath, &len); + str = qemu_xen_xs_read(xenstore, 0, abspath, &len); if (str != NULL) { /* move to qemu-allocated memory to make sure * callers can savely g_free() stuff. */ @@ -152,29 +138,6 @@ int xenstore_read_uint64(const char *base, const char *node, uint64_t *uval) return rc; } -void xenstore_update(void *unused) -{ - char **vec = NULL; - intptr_t type, ops, ptr; - unsigned int dom, count; - - vec = xs_read_watch(xenstore, &count); - if (vec == NULL) { - goto cleanup; - } - - if (sscanf(vec[XS_WATCH_TOKEN], "be:%" PRIxPTR ":%d:%" PRIxPTR, - &type, &dom, &ops) == 3) { - xenstore_update_be(vec[XS_WATCH_PATH], (void *)type, dom, (void*)ops); - } - if (sscanf(vec[XS_WATCH_TOKEN], "fe:%" PRIxPTR, &ptr) == 1) { - xenstore_update_fe(vec[XS_WATCH_PATH], (void *)ptr); - } - -cleanup: - free(vec); -} - const char *xenbus_strstate(enum xenbus_state state) { static const char *const name[] = { @@ -299,9 +262,7 @@ void xen_pv_del_xendev(struct XenLegacyDevice *xendev) } if (xendev->fe) { - char token[XEN_BUFSIZE]; - snprintf(token, sizeof(token), "fe:%p", xendev); - xs_unwatch(xenstore, xendev->fe, token); + qemu_xen_xs_unwatch(xenstore, xendev->watch); g_free(xendev->fe); } -- cgit v1.1 From 7a8a749da7d30b420291fa0b11e3eda7f72d9b83 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 7 Jan 2023 16:17:51 +0000 Subject: hw/xen: Move xenstore_store_pv_console_info to xen_console.c There's no need for this to be in the Xen accel code, and as we want to use the Xen console support with KVM-emulated Xen we'll want to have a platform-agnostic version of it. Make it use GString to build up the path while we're at it. Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant --- hw/char/xen_console.c | 45 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) (limited to 'hw') diff --git a/hw/char/xen_console.c b/hw/char/xen_console.c index ad8638a..c7a19c0 100644 --- a/hw/char/xen_console.c +++ b/hw/char/xen_console.c @@ -173,6 +173,48 @@ static void xencons_send(struct XenConsole *con) /* -------------------------------------------------------------------- */ +static int store_con_info(struct XenConsole *con) +{ + Chardev *cs = qemu_chr_fe_get_driver(&con->chr); + char *pts = NULL; + char *dom_path; + GString *path; + int ret = -1; + + /* Only continue if we're talking to a pty. */ + if (!CHARDEV_IS_PTY(cs)) { + return 0; + } + pts = cs->filename + 4; + + dom_path = qemu_xen_xs_get_domain_path(xenstore, xen_domid); + if (!dom_path) { + return 0; + } + + path = g_string_new(dom_path); + free(dom_path); + + if (con->xendev.dev) { + g_string_append_printf(path, "/device/console/%d", con->xendev.dev); + } else { + g_string_append(path, "/console"); + } + g_string_append(path, "/tty"); + + if (xenstore_write_str(con->console, path->str, pts)) { + fprintf(stderr, "xenstore_write_str for '%s' fail", path->str); + goto out; + } + ret = 0; + +out: + g_string_free(path, true); + free(path); + + return ret; +} + static int con_init(struct XenLegacyDevice *xendev) { struct XenConsole *con = container_of(xendev, struct XenConsole, xendev); @@ -215,8 +257,7 @@ static int con_init(struct XenLegacyDevice *xendev) &error_abort); } - xenstore_store_pv_console_info(con->xendev.dev, - qemu_chr_fe_get_driver(&con->chr)); + store_con_info(con); out: g_free(type); -- cgit v1.1 From a9ae1418b36b20ab06fb760b1108f61f49a76164 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 7 Jan 2023 16:47:43 +0000 Subject: hw/xen: Use XEN_PAGE_SIZE in PV backend drivers XC_PAGE_SIZE comes from the actual Xen libraries, while XEN_PAGE_SIZE is provided by QEMU itself in xen_backend_ops.h. For backends which may be built for emulation mode, use the latter. Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant --- hw/block/dataplane/xen-block.c | 8 ++++---- hw/display/xenfb.c | 12 ++++++------ hw/net/xen_nic.c | 12 ++++++------ hw/usb/xen-usb.c | 8 ++++---- 4 files changed, 20 insertions(+), 20 deletions(-) (limited to 'hw') diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c index e55b713..8322a1d 100644 --- a/hw/block/dataplane/xen-block.c +++ b/hw/block/dataplane/xen-block.c @@ -101,9 +101,9 @@ static XenBlockRequest *xen_block_start_request(XenBlockDataPlane *dataplane) * re-use requests, allocate the memory once here. It will be freed * xen_block_dataplane_destroy() when the request list is freed. */ - request->buf = qemu_memalign(XC_PAGE_SIZE, + request->buf = qemu_memalign(XEN_PAGE_SIZE, BLKIF_MAX_SEGMENTS_PER_REQUEST * - XC_PAGE_SIZE); + XEN_PAGE_SIZE); dataplane->requests_total++; qemu_iovec_init(&request->v, 1); } else { @@ -185,7 +185,7 @@ static int xen_block_parse_request(XenBlockRequest *request) goto err; } if (request->req.seg[i].last_sect * dataplane->sector_size >= - XC_PAGE_SIZE) { + XEN_PAGE_SIZE) { error_report("error: page crossing"); goto err; } @@ -740,7 +740,7 @@ void xen_block_dataplane_start(XenBlockDataPlane *dataplane, dataplane->protocol = protocol; - ring_size = XC_PAGE_SIZE * dataplane->nr_ring_ref; + ring_size = XEN_PAGE_SIZE * dataplane->nr_ring_ref; switch (dataplane->protocol) { case BLKIF_PROTOCOL_NATIVE: { diff --git a/hw/display/xenfb.c b/hw/display/xenfb.c index 2c4016f..0074a9b 100644 --- a/hw/display/xenfb.c +++ b/hw/display/xenfb.c @@ -489,13 +489,13 @@ static int xenfb_map_fb(struct XenFB *xenfb) } if (xenfb->pixels) { - munmap(xenfb->pixels, xenfb->fbpages * XC_PAGE_SIZE); + munmap(xenfb->pixels, xenfb->fbpages * XEN_PAGE_SIZE); xenfb->pixels = NULL; } - xenfb->fbpages = DIV_ROUND_UP(xenfb->fb_len, XC_PAGE_SIZE); + xenfb->fbpages = DIV_ROUND_UP(xenfb->fb_len, XEN_PAGE_SIZE); n_fbdirs = xenfb->fbpages * mode / 8; - n_fbdirs = DIV_ROUND_UP(n_fbdirs, XC_PAGE_SIZE); + n_fbdirs = DIV_ROUND_UP(n_fbdirs, XEN_PAGE_SIZE); pgmfns = g_new0(xen_pfn_t, n_fbdirs); fbmfns = g_new0(xen_pfn_t, xenfb->fbpages); @@ -528,8 +528,8 @@ static int xenfb_configure_fb(struct XenFB *xenfb, size_t fb_len_lim, { size_t mfn_sz = sizeof_field(struct xenfb_page, pd[0]); size_t pd_len = sizeof_field(struct xenfb_page, pd) / mfn_sz; - size_t fb_pages = pd_len * XC_PAGE_SIZE / mfn_sz; - size_t fb_len_max = fb_pages * XC_PAGE_SIZE; + size_t fb_pages = pd_len * XEN_PAGE_SIZE / mfn_sz; + size_t fb_len_max = fb_pages * XEN_PAGE_SIZE; int max_width, max_height; if (fb_len_lim > fb_len_max) { @@ -930,7 +930,7 @@ static void fb_disconnect(struct XenLegacyDevice *xendev) * instead. This releases the guest pages and keeps qemu happy. */ qemu_xen_foreignmem_unmap(fb->pixels, fb->fbpages); - fb->pixels = mmap(fb->pixels, fb->fbpages * XC_PAGE_SIZE, + fb->pixels = mmap(fb->pixels, fb->fbpages * XEN_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0); if (fb->pixels == MAP_FAILED) { diff --git a/hw/net/xen_nic.c b/hw/net/xen_nic.c index 166d037..9bbf659 100644 --- a/hw/net/xen_nic.c +++ b/hw/net/xen_nic.c @@ -145,7 +145,7 @@ static void net_tx_packets(struct XenNetDev *netdev) continue; } - if ((txreq.offset + txreq.size) > XC_PAGE_SIZE) { + if ((txreq.offset + txreq.size) > XEN_PAGE_SIZE) { xen_pv_printf(&netdev->xendev, 0, "error: page crossing\n"); net_tx_error(netdev, &txreq, rc); continue; @@ -171,7 +171,7 @@ static void net_tx_packets(struct XenNetDev *netdev) if (txreq.flags & NETTXF_csum_blank) { /* have read-only mapping -> can't fill checksum in-place */ if (!tmpbuf) { - tmpbuf = g_malloc(XC_PAGE_SIZE); + tmpbuf = g_malloc(XEN_PAGE_SIZE); } memcpy(tmpbuf, page + txreq.offset, txreq.size); net_checksum_calculate(tmpbuf, txreq.size, CSUM_ALL); @@ -243,9 +243,9 @@ static ssize_t net_rx_packet(NetClientState *nc, const uint8_t *buf, size_t size if (rc == rp || RING_REQUEST_CONS_OVERFLOW(&netdev->rx_ring, rc)) { return 0; } - if (size > XC_PAGE_SIZE - NET_IP_ALIGN) { + if (size > XEN_PAGE_SIZE - NET_IP_ALIGN) { xen_pv_printf(&netdev->xendev, 0, "packet too big (%lu > %ld)", - (unsigned long)size, XC_PAGE_SIZE - NET_IP_ALIGN); + (unsigned long)size, XEN_PAGE_SIZE - NET_IP_ALIGN); return -1; } @@ -348,8 +348,8 @@ static int net_connect(struct XenLegacyDevice *xendev) netdev->txs = NULL; return -1; } - BACK_RING_INIT(&netdev->tx_ring, netdev->txs, XC_PAGE_SIZE); - BACK_RING_INIT(&netdev->rx_ring, netdev->rxs, XC_PAGE_SIZE); + BACK_RING_INIT(&netdev->tx_ring, netdev->txs, XEN_PAGE_SIZE); + BACK_RING_INIT(&netdev->rx_ring, netdev->rxs, XEN_PAGE_SIZE); xen_be_bind_evtchn(&netdev->xendev); diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c index a770a64..66cb3f7 100644 --- a/hw/usb/xen-usb.c +++ b/hw/usb/xen-usb.c @@ -161,7 +161,7 @@ static int usbback_gnttab_map(struct usbback_req *usbback_req) for (i = 0; i < nr_segs; i++) { if ((unsigned)usbback_req->req.seg[i].offset + - (unsigned)usbback_req->req.seg[i].length > XC_PAGE_SIZE) { + (unsigned)usbback_req->req.seg[i].length > XEN_PAGE_SIZE) { xen_pv_printf(xendev, 0, "segment crosses page boundary\n"); return -EINVAL; } @@ -185,7 +185,7 @@ static int usbback_gnttab_map(struct usbback_req *usbback_req) for (i = 0; i < usbback_req->nr_buffer_segs; i++) { seg = usbback_req->req.seg + i; - addr = usbback_req->buffer + i * XC_PAGE_SIZE + seg->offset; + addr = usbback_req->buffer + i * XEN_PAGE_SIZE + seg->offset; qemu_iovec_add(&usbback_req->packet.iov, addr, seg->length); } } @@ -902,8 +902,8 @@ static int usbback_connect(struct XenLegacyDevice *xendev) usbif->conn_ring_ref = conn_ring_ref; urb_sring = usbif->urb_sring; conn_sring = usbif->conn_sring; - BACK_RING_INIT(&usbif->urb_ring, urb_sring, XC_PAGE_SIZE); - BACK_RING_INIT(&usbif->conn_ring, conn_sring, XC_PAGE_SIZE); + BACK_RING_INIT(&usbif->urb_ring, urb_sring, XEN_PAGE_SIZE); + BACK_RING_INIT(&usbif->conn_ring, conn_sring, XEN_PAGE_SIZE); xen_be_bind_evtchn(xendev); -- cgit v1.1 From e2abfe5ec67b69fb310fbeaacf7e68d61d16609e Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 2 Jan 2023 00:39:13 +0000 Subject: hw/xen: Rename xen_common.h to xen_native.h This header is now only for native Xen code, not PV backends that may be used in Xen emulation. Since the toolstack libraries may depend on the specific version of Xen headers that they pull in (and will set the __XEN_TOOLS__ macro to enable internal definitions that they depend on), the rule is that xen_native.h (and thus the toolstack library headers) must be included *before* any of the headers in include/hw/xen/interface. Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant --- hw/9pfs/xen-9p-backend.c | 1 + hw/block/dataplane/xen-block.c | 3 ++- hw/block/xen-block.c | 1 - hw/i386/pc_piix.c | 4 ++-- hw/i386/xen/xen-hvm.c | 11 ++++++----- hw/i386/xen/xen-mapcache.c | 2 +- hw/i386/xen/xen_platform.c | 7 ++++--- hw/xen/trace-events | 2 +- hw/xen/xen-operations.c | 2 +- hw/xen/xen_pt.c | 2 +- hw/xen/xen_pt.h | 2 +- hw/xen/xen_pt_config_init.c | 2 +- hw/xen/xen_pt_msi.c | 4 ++-- 13 files changed, 23 insertions(+), 20 deletions(-) (limited to 'hw') diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c index d8bb0e8..74f3a05 100644 --- a/hw/9pfs/xen-9p-backend.c +++ b/hw/9pfs/xen-9p-backend.c @@ -22,6 +22,7 @@ #include "qemu/config-file.h" #include "qemu/main-loop.h" #include "qemu/option.h" +#include "qemu/iov.h" #include "fsdev/qemu-fsdev.h" #define VERSIONS "1" diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c index 8322a1d..734da42 100644 --- a/hw/block/dataplane/xen-block.c +++ b/hw/block/dataplane/xen-block.c @@ -23,8 +23,9 @@ #include "qemu/main-loop.h" #include "qemu/memalign.h" #include "qapi/error.h" -#include "hw/xen/xen_common.h" +#include "hw/xen/xen.h" #include "hw/block/xen_blkif.h" +#include "hw/xen/interface/io/ring.h" #include "sysemu/block-backend.h" #include "sysemu/iothread.h" #include "xen-block.h" diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c index 345b284..8729961 100644 --- a/hw/block/xen-block.c +++ b/hw/block/xen-block.c @@ -19,7 +19,6 @@ #include "qapi/qmp/qdict.h" #include "qapi/qmp/qstring.h" #include "qom/object_interfaces.h" -#include "hw/xen/xen_common.h" #include "hw/block/xen_blkif.h" #include "hw/qdev-properties.h" #include "hw/xen/xen-block.h" diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 4bf15f9..30eedd6 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -47,8 +47,6 @@ #include "hw/kvm/clock.h" #include "hw/sysbus.h" #include "hw/i2c/smbus_eeprom.h" -#include "hw/xen/xen-x86.h" -#include "hw/xen/xen.h" #include "exec/memory.h" #include "hw/acpi/acpi.h" #include "hw/acpi/piix4.h" @@ -60,6 +58,8 @@ #include #include "hw/xen/xen_pt.h" #endif +#include "hw/xen/xen-x86.h" +#include "hw/xen/xen.h" #include "migration/global_state.h" #include "migration/misc.h" #include "sysemu/numa.h" diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c index cb1d24f..56641a5 100644 --- a/hw/i386/xen/xen-hvm.c +++ b/hw/i386/xen/xen-hvm.c @@ -18,7 +18,7 @@ #include "hw/irq.h" #include "hw/hw.h" #include "hw/i386/apic-msidef.h" -#include "hw/xen/xen_common.h" +#include "hw/xen/xen_native.h" #include "hw/xen/xen-legacy-backend.h" #include "hw/xen/xen-bus.h" #include "hw/xen/xen-x86.h" @@ -52,10 +52,11 @@ static bool xen_in_migration; /* Compatibility with older version */ -/* This allows QEMU to build on a system that has Xen 4.5 or earlier - * installed. This here (not in hw/xen/xen_common.h) because xen/hvm/ioreq.h - * needs to be included before this block and hw/xen/xen_common.h needs to - * be included before xen/hvm/ioreq.h +/* + * This allows QEMU to build on a system that has Xen 4.5 or earlier installed. + * This is here (not in hw/xen/xen_native.h) because xen/hvm/ioreq.h needs to + * be included before this block and hw/xen/xen_native.h needs to be included + * before xen/hvm/ioreq.h */ #ifndef IOREQ_TYPE_VMWARE_PORT #define IOREQ_TYPE_VMWARE_PORT 3 diff --git a/hw/i386/xen/xen-mapcache.c b/hw/i386/xen/xen-mapcache.c index 1d0879d..f7d9746 100644 --- a/hw/i386/xen/xen-mapcache.c +++ b/hw/i386/xen/xen-mapcache.c @@ -14,7 +14,7 @@ #include -#include "hw/xen/xen-legacy-backend.h" +#include "hw/xen/xen_native.h" #include "qemu/bitmap.h" #include "sysemu/runstate.h" diff --git a/hw/i386/xen/xen_platform.c b/hw/i386/xen/xen_platform.c index 539f7da..57f1d74 100644 --- a/hw/i386/xen/xen_platform.c +++ b/hw/i386/xen/xen_platform.c @@ -28,7 +28,6 @@ #include "hw/ide/pci.h" #include "hw/pci/pci.h" #include "migration/vmstate.h" -#include "hw/xen/xen.h" #include "net/net.h" #include "trace.h" #include "sysemu/xen.h" @@ -38,10 +37,12 @@ #include "qom/object.h" #ifdef CONFIG_XEN -#include "hw/xen/xen_common.h" -#include "hw/xen/xen-legacy-backend.h" +#include "hw/xen/xen_native.h" #endif +/* The rule is that xen_native.h must come first */ +#include "hw/xen/xen.h" + //#define DEBUG_PLATFORM #ifdef DEBUG_PLATFORM diff --git a/hw/xen/trace-events b/hw/xen/trace-events index 3da3fd8..55c9e1d 100644 --- a/hw/xen/trace-events +++ b/hw/xen/trace-events @@ -1,6 +1,6 @@ # See docs/devel/tracing.rst for syntax documentation. -# ../../include/hw/xen/xen_common.h +# ../../include/hw/xen/xen_native.h xen_default_ioreq_server(void) "" xen_ioreq_server_create(uint32_t id) "id: %u" xen_ioreq_server_destroy(uint32_t id) "id: %u" diff --git a/hw/xen/xen-operations.c b/hw/xen/xen-operations.c index abed812..4b78fbf 100644 --- a/hw/xen/xen-operations.c +++ b/hw/xen/xen-operations.c @@ -13,8 +13,8 @@ #include "qemu/uuid.h" #include "qapi/error.h" +#include "hw/xen/xen_native.h" #include "hw/xen/xen_backend_ops.h" -#include "hw/xen/xen_common.h" /* * If we have new enough libxenctrl then we do not want/need these compat diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c index 85c93cf..2d33d17 100644 --- a/hw/xen/xen_pt.c +++ b/hw/xen/xen_pt.c @@ -60,9 +60,9 @@ #include "hw/pci/pci_bus.h" #include "hw/qdev-properties.h" #include "hw/qdev-properties-system.h" +#include "xen_pt.h" #include "hw/xen/xen.h" #include "hw/xen/xen-legacy-backend.h" -#include "xen_pt.h" #include "qemu/range.h" static bool has_igd_gfx_passthru; diff --git a/hw/xen/xen_pt.h b/hw/xen/xen_pt.h index e184699..b20744f 100644 --- a/hw/xen/xen_pt.h +++ b/hw/xen/xen_pt.h @@ -1,7 +1,7 @@ #ifndef XEN_PT_H #define XEN_PT_H -#include "hw/xen/xen_common.h" +#include "hw/xen/xen_native.h" #include "xen-host-pci-device.h" #include "qom/object.h" diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c index 8b9b554..2b8680b 100644 --- a/hw/xen/xen_pt_config_init.c +++ b/hw/xen/xen_pt_config_init.c @@ -15,8 +15,8 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qemu/timer.h" -#include "hw/xen/xen-legacy-backend.h" #include "xen_pt.h" +#include "hw/xen/xen-legacy-backend.h" #define XEN_PT_MERGE_VALUE(value, data, val_mask) \ (((value) & (val_mask)) | ((data) & ~(val_mask))) diff --git a/hw/xen/xen_pt_msi.c b/hw/xen/xen_pt_msi.c index b71563f..09cca4e 100644 --- a/hw/xen/xen_pt_msi.c +++ b/hw/xen/xen_pt_msi.c @@ -11,9 +11,9 @@ #include "qemu/osdep.h" -#include "hw/xen/xen-legacy-backend.h" -#include "xen_pt.h" #include "hw/i386/apic-msidef.h" +#include "xen_pt.h" +#include "hw/xen/xen-legacy-backend.h" #define XEN_PT_AUTO_ASSIGN -1 -- cgit v1.1 From 4ca8cf092dabf934a32968c917f0d0682053cd4e Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 2 Jan 2023 01:26:04 +0000 Subject: hw/xen: Build PV backend drivers for CONFIG_XEN_BUS Now that we have the redirectable Xen backend operations we can build the PV backends even without the Xen libraries. Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant --- hw/9pfs/meson.build | 2 +- hw/block/dataplane/meson.build | 2 +- hw/block/meson.build | 2 +- hw/char/meson.build | 2 +- hw/display/meson.build | 2 +- hw/usb/meson.build | 2 +- hw/xen/meson.build | 5 ++++- 7 files changed, 10 insertions(+), 7 deletions(-) (limited to 'hw') diff --git a/hw/9pfs/meson.build b/hw/9pfs/meson.build index 12443b6..fd37b7a 100644 --- a/hw/9pfs/meson.build +++ b/hw/9pfs/meson.build @@ -15,7 +15,7 @@ fs_ss.add(files( )) fs_ss.add(when: 'CONFIG_LINUX', if_true: files('9p-util-linux.c')) fs_ss.add(when: 'CONFIG_DARWIN', if_true: files('9p-util-darwin.c')) -fs_ss.add(when: 'CONFIG_XEN', if_true: files('xen-9p-backend.c')) +fs_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen-9p-backend.c')) softmmu_ss.add_all(when: 'CONFIG_FSDEV_9P', if_true: fs_ss) specific_ss.add(when: 'CONFIG_VIRTIO_9P', if_true: files('virtio-9p-device.c')) diff --git a/hw/block/dataplane/meson.build b/hw/block/dataplane/meson.build index 12c6a26..78d7ac1 100644 --- a/hw/block/dataplane/meson.build +++ b/hw/block/dataplane/meson.build @@ -1,2 +1,2 @@ specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c')) -specific_ss.add(when: 'CONFIG_XEN', if_true: files('xen-block.c')) +specific_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen-block.c')) diff --git a/hw/block/meson.build b/hw/block/meson.build index b434d56..cc2a75c 100644 --- a/hw/block/meson.build +++ b/hw/block/meson.build @@ -14,7 +14,7 @@ softmmu_ss.add(when: 'CONFIG_PFLASH_CFI02', if_true: files('pflash_cfi02.c')) softmmu_ss.add(when: 'CONFIG_SSI_M25P80', if_true: files('m25p80.c')) softmmu_ss.add(when: 'CONFIG_SSI_M25P80', if_true: files('m25p80_sfdp.c')) softmmu_ss.add(when: 'CONFIG_SWIM', if_true: files('swim.c')) -softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xen-block.c')) +softmmu_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen-block.c')) softmmu_ss.add(when: 'CONFIG_TC58128', if_true: files('tc58128.c')) specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c', 'virtio-blk-common.c')) diff --git a/hw/char/meson.build b/hw/char/meson.build index 7b594f5..e02c60d 100644 --- a/hw/char/meson.build +++ b/hw/char/meson.build @@ -18,7 +18,7 @@ softmmu_ss.add(when: 'CONFIG_SERIAL_PCI', if_true: files('serial-pci.c')) softmmu_ss.add(when: 'CONFIG_SERIAL_PCI_MULTI', if_true: files('serial-pci-multi.c')) softmmu_ss.add(when: 'CONFIG_SHAKTI_UART', if_true: files('shakti_uart.c')) softmmu_ss.add(when: 'CONFIG_VIRTIO_SERIAL', if_true: files('virtio-console.c')) -softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xen_console.c')) +softmmu_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen_console.c')) softmmu_ss.add(when: 'CONFIG_XILINX', if_true: files('xilinx_uartlite.c')) softmmu_ss.add(when: 'CONFIG_AVR_USART', if_true: files('avr_usart.c')) diff --git a/hw/display/meson.build b/hw/display/meson.build index f470179..4191694 100644 --- a/hw/display/meson.build +++ b/hw/display/meson.build @@ -14,7 +14,7 @@ softmmu_ss.add(when: 'CONFIG_PL110', if_true: files('pl110.c')) softmmu_ss.add(when: 'CONFIG_SII9022', if_true: files('sii9022.c')) softmmu_ss.add(when: 'CONFIG_SSD0303', if_true: files('ssd0303.c')) softmmu_ss.add(when: 'CONFIG_SSD0323', if_true: files('ssd0323.c')) -softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xenfb.c')) +softmmu_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xenfb.c')) softmmu_ss.add(when: 'CONFIG_VGA_PCI', if_true: files('vga-pci.c')) softmmu_ss.add(when: 'CONFIG_VGA_ISA', if_true: files('vga-isa.c')) diff --git a/hw/usb/meson.build b/hw/usb/meson.build index bdf34cb..599dc24 100644 --- a/hw/usb/meson.build +++ b/hw/usb/meson.build @@ -84,6 +84,6 @@ if libusb.found() hw_usb_modules += {'host': usbhost_ss} endif -softmmu_ss.add(when: ['CONFIG_USB', 'CONFIG_XEN', libusb], if_true: files('xen-usb.c')) +softmmu_ss.add(when: ['CONFIG_USB', 'CONFIG_XEN_BUS', libusb], if_true: files('xen-usb.c')) modules += { 'hw-usb': hw_usb_modules } diff --git a/hw/xen/meson.build b/hw/xen/meson.build index f195bbd..19c6aab 100644 --- a/hw/xen/meson.build +++ b/hw/xen/meson.build @@ -1,10 +1,13 @@ -softmmu_ss.add(when: ['CONFIG_XEN', xen], if_true: files( +softmmu_ss.add(when: ['CONFIG_XEN_BUS'], if_true: files( 'xen-backend.c', 'xen-bus-helper.c', 'xen-bus.c', 'xen-legacy-backend.c', 'xen_devconfig.c', 'xen_pvdev.c', +)) + +softmmu_ss.add(when: ['CONFIG_XEN', xen], if_true: files( 'xen-operations.c', )) -- cgit v1.1 From 240cc11369fc692c037a6ec46b358e75a55df894 Mon Sep 17 00:00:00 2001 From: Paul Durrant Date: Mon, 30 Jan 2023 14:35:28 +0000 Subject: hw/xen: Avoid crash when backend watch fires too early The xen-block code ends up calling aio_poll() through blkconf_geometry(), which means we see watch events during the indirect call to xendev_class->realize() in xen_device_realize(). Unfortunately this call is made before populating the initial frontend and backend device nodes in xenstore and hence xen_block_frontend_changed() (which is called from a watch event) fails to read the frontend's 'state' node, and hence believes the device is being torn down. This in-turn sets the backend state to XenbusStateClosed and causes the device to be deleted before it is fully set up, leading to the crash. By simply moving the call to xendev_class->realize() after the initial xenstore nodes are populated, this sorry state of affairs is avoided. Reported-by: David Woodhouse Signed-off-by: Paul Durrant Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant --- hw/xen/xen-bus.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'hw') diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c index 9fe5496..c59850b 100644 --- a/hw/xen/xen-bus.c +++ b/hw/xen/xen-bus.c @@ -1034,13 +1034,6 @@ static void xen_device_realize(DeviceState *dev, Error **errp) goto unrealize; } - if (xendev_class->realize) { - xendev_class->realize(xendev, errp); - if (*errp) { - goto unrealize; - } - } - xen_device_backend_printf(xendev, "frontend", "%s", xendev->frontend_path); xen_device_backend_printf(xendev, "frontend-id", "%u", @@ -1059,6 +1052,13 @@ static void xen_device_realize(DeviceState *dev, Error **errp) xen_device_frontend_set_state(xendev, XenbusStateInitialising, true); } + if (xendev_class->realize) { + xendev_class->realize(xendev, errp); + if (*errp) { + goto unrealize; + } + } + xendev->exit.notify = xen_device_exit; qemu_add_exit_notifier(&xendev->exit); return; -- cgit v1.1 From 072519037dde8957cc8c519caad21e7816b46129 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 30 Jan 2023 17:27:07 +0100 Subject: hw/xen: Only advertise ring-page-order for xen-block if gnttab supports it Whem emulating Xen, multi-page grants are distinctly non-trivial and we have elected not to support them for the time being. Don't advertise them to the guest. Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant --- hw/block/xen-block.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'hw') diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c index 8729961..f5a7445 100644 --- a/hw/block/xen-block.c +++ b/hw/block/xen-block.c @@ -83,7 +83,8 @@ static void xen_block_connect(XenDevice *xendev, Error **errp) g_free(ring_ref); return; } - } else if (order <= blockdev->props.max_ring_page_order) { + } else if (qemu_xen_gnttab_can_map_multi() && + order <= blockdev->props.max_ring_page_order) { unsigned int i; nr_ring_ref = 1 << order; @@ -255,8 +256,12 @@ static void xen_block_realize(XenDevice *xendev, Error **errp) } xen_device_backend_printf(xendev, "feature-flush-cache", "%u", 1); - xen_device_backend_printf(xendev, "max-ring-page-order", "%u", - blockdev->props.max_ring_page_order); + + if (qemu_xen_gnttab_can_map_multi()) { + xen_device_backend_printf(xendev, "max-ring-page-order", "%u", + blockdev->props.max_ring_page_order); + } + xen_device_backend_printf(xendev, "info", "%u", blockdev->info); xen_device_frontend_printf(xendev, "virtual-device", "%lu", -- cgit v1.1 From 4dfd5fb178f93c3636a20684e7378427f067ce35 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 1 Jan 2023 23:49:25 +0000 Subject: hw/xen: Hook up emulated implementation for event channel operations We provided the backend-facing evtchn functions very early on as part of the core Xen platform support, since things like timers and xenstore need to use them. By what may or may not be an astonishing coincidence, those functions just *happen* all to have exactly the right function prototypes to slot into the evtchn_backend_ops table and be called by the PV backends. Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant --- hw/i386/kvm/xen_evtchn.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'hw') diff --git a/hw/i386/kvm/xen_evtchn.c b/hw/i386/kvm/xen_evtchn.c index 886fbf6..98a7b85 100644 --- a/hw/i386/kvm/xen_evtchn.c +++ b/hw/i386/kvm/xen_evtchn.c @@ -34,6 +34,7 @@ #include "hw/pci/msi.h" #include "hw/pci/msix.h" #include "hw/irq.h" +#include "hw/xen/xen_backend_ops.h" #include "xen_evtchn.h" #include "xen_overlay.h" @@ -278,6 +279,17 @@ static const TypeInfo xen_evtchn_info = { .class_init = xen_evtchn_class_init, }; +static struct evtchn_backend_ops emu_evtchn_backend_ops = { + .open = xen_be_evtchn_open, + .bind_interdomain = xen_be_evtchn_bind_interdomain, + .unbind = xen_be_evtchn_unbind, + .close = xen_be_evtchn_close, + .get_fd = xen_be_evtchn_fd, + .notify = xen_be_evtchn_notify, + .unmask = xen_be_evtchn_unmask, + .pending = xen_be_evtchn_pending, +}; + static void gsi_assert_bh(void *opaque) { struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0); @@ -318,6 +330,9 @@ void xen_evtchn_create(void) s->nr_pirq_inuse_words = DIV_ROUND_UP(s->nr_pirqs, 64); s->pirq_inuse_bitmap = g_new0(uint64_t, s->nr_pirq_inuse_words); s->pirq = g_new0(struct pirq_info, s->nr_pirqs); + + /* Set event channel functions for backend drivers to use */ + xen_evtchn_ops = &emu_evtchn_backend_ops; } void xen_evtchn_connect_gsis(qemu_irq *system_gsis) -- cgit v1.1 From b08d88e30f061d5d8ae080a453a078214d4b462a Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 6 Jan 2023 09:59:28 +0000 Subject: hw/xen: Add emulated implementation of grant table operations This is limited to mapping a single grant at a time, because under Xen the pages are mapped *contiguously* into qemu's address space, and that's very hard to do when those pages actually come from anonymous mappings in qemu in the first place. Eventually perhaps we can look at using shared mappings of actual objects for system RAM, and then we can make new mappings of the same backing store (be it deleted files, shmem, whatever). But for now let's stick to a page at a time. Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant --- hw/i386/kvm/xen_gnttab.c | 299 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 296 insertions(+), 3 deletions(-) (limited to 'hw') diff --git a/hw/i386/kvm/xen_gnttab.c b/hw/i386/kvm/xen_gnttab.c index 1e691de..2bf91d3 100644 --- a/hw/i386/kvm/xen_gnttab.c +++ b/hw/i386/kvm/xen_gnttab.c @@ -22,6 +22,7 @@ #include "hw/sysbus.h" #include "hw/xen/xen.h" +#include "hw/xen/xen_backend_ops.h" #include "xen_overlay.h" #include "xen_gnttab.h" @@ -34,11 +35,10 @@ #define TYPE_XEN_GNTTAB "xen-gnttab" OBJECT_DECLARE_SIMPLE_TYPE(XenGnttabState, XEN_GNTTAB) -#define XEN_PAGE_SHIFT 12 -#define XEN_PAGE_SIZE (1ULL << XEN_PAGE_SHIFT) - #define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t)) +static struct gnttab_backend_ops emu_gnttab_backend_ops; + struct XenGnttabState { /*< private >*/ SysBusDevice busdev; @@ -57,6 +57,8 @@ struct XenGnttabState { MemoryRegion gnt_frames; MemoryRegion *gnt_aliases; uint64_t *gnt_frame_gpas; + + uint8_t *map_track; }; struct XenGnttabState *xen_gnttab_singleton; @@ -88,9 +90,15 @@ static void xen_gnttab_realize(DeviceState *dev, Error **errp) s->gnt_frame_gpas[i] = INVALID_GPA; } + s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access; + s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE); qemu_mutex_init(&s->gnt_lock); xen_gnttab_singleton = s; + + s->map_track = g_new0(uint8_t, s->max_frames * ENTRIES_PER_FRAME_V1); + + xen_gnttab_ops = &emu_gnttab_backend_ops; } static int xen_gnttab_post_load(void *opaque, int version_id) @@ -230,3 +238,288 @@ int xen_gnttab_query_size_op(struct gnttab_query_size *size) size->max_nr_frames = s->max_frames; return 0; } + +/* Track per-open refs, to allow close() to clean up. */ +struct active_ref { + MemoryRegionSection mrs; + void *virtaddr; + uint32_t refcnt; + int prot; +}; + +static void gnt_unref(XenGnttabState *s, grant_ref_t ref, + MemoryRegionSection *mrs, int prot) +{ + if (mrs && mrs->mr) { + if (prot & PROT_WRITE) { + memory_region_set_dirty(mrs->mr, mrs->offset_within_region, + XEN_PAGE_SIZE); + } + memory_region_unref(mrs->mr); + mrs->mr = NULL; + } + assert(s->map_track[ref] != 0); + + if (--s->map_track[ref] == 0) { + grant_entry_v1_t *gnt_p = &s->entries.v1[ref]; + qatomic_and(&gnt_p->flags, (uint16_t)~(GTF_reading | GTF_writing)); + } +} + +static uint64_t gnt_ref(XenGnttabState *s, grant_ref_t ref, int prot) +{ + uint16_t mask = GTF_type_mask | GTF_sub_page; + grant_entry_v1_t gnt, *gnt_p; + int retries = 0; + + if (ref >= s->max_frames * ENTRIES_PER_FRAME_V1 || + s->map_track[ref] == UINT8_MAX) { + return INVALID_GPA; + } + + if (prot & PROT_WRITE) { + mask |= GTF_readonly; + } + + gnt_p = &s->entries.v1[ref]; + + /* + * The guest can legitimately be changing the GTF_readonly flag. Allow + * that, but don't let a malicious guest cause a livelock. + */ + for (retries = 0; retries < 5; retries++) { + uint16_t new_flags; + + /* Read the entry before an atomic operation on its flags */ + gnt = *(volatile grant_entry_v1_t *)gnt_p; + + if ((gnt.flags & mask) != GTF_permit_access || + gnt.domid != DOMID_QEMU) { + return INVALID_GPA; + } + + new_flags = gnt.flags | GTF_reading; + if (prot & PROT_WRITE) { + new_flags |= GTF_writing; + } + + if (qatomic_cmpxchg(&gnt_p->flags, gnt.flags, new_flags) == gnt.flags) { + return (uint64_t)gnt.frame << XEN_PAGE_SHIFT; + } + } + + return INVALID_GPA; +} + +struct xengntdev_handle { + GHashTable *active_maps; +}; + +static int xen_be_gnttab_set_max_grants(struct xengntdev_handle *xgt, + uint32_t nr_grants) +{ + return 0; +} + +static void *xen_be_gnttab_map_refs(struct xengntdev_handle *xgt, + uint32_t count, uint32_t domid, + uint32_t *refs, int prot) +{ + XenGnttabState *s = xen_gnttab_singleton; + struct active_ref *act; + + if (!s) { + errno = ENOTSUP; + return NULL; + } + + if (domid != xen_domid) { + errno = EINVAL; + return NULL; + } + + if (!count || count > 4096) { + errno = EINVAL; + return NULL; + } + + /* + * Making a contiguous mapping from potentially discontiguous grant + * references would be... distinctly non-trivial. We don't support it. + * Even changing the API to return an array of pointers, one per page, + * wouldn't be simple to use in PV backends because some structures + * actually cross page boundaries (e.g. 32-bit blkif_response ring + * entries are 12 bytes). + */ + if (count != 1) { + errno = EINVAL; + return NULL; + } + + QEMU_LOCK_GUARD(&s->gnt_lock); + + act = g_hash_table_lookup(xgt->active_maps, GINT_TO_POINTER(refs[0])); + if (act) { + if ((prot & PROT_WRITE) && !(act->prot & PROT_WRITE)) { + if (gnt_ref(s, refs[0], prot) == INVALID_GPA) { + return NULL; + } + act->prot |= PROT_WRITE; + } + act->refcnt++; + } else { + uint64_t gpa = gnt_ref(s, refs[0], prot); + if (gpa == INVALID_GPA) { + errno = EINVAL; + return NULL; + } + + act = g_new0(struct active_ref, 1); + act->prot = prot; + act->refcnt = 1; + act->mrs = memory_region_find(get_system_memory(), gpa, XEN_PAGE_SIZE); + + if (act->mrs.mr && + !int128_lt(act->mrs.size, int128_make64(XEN_PAGE_SIZE)) && + memory_region_get_ram_addr(act->mrs.mr) != RAM_ADDR_INVALID) { + act->virtaddr = qemu_map_ram_ptr(act->mrs.mr->ram_block, + act->mrs.offset_within_region); + } + if (!act->virtaddr) { + gnt_unref(s, refs[0], &act->mrs, 0); + g_free(act); + errno = EINVAL; + return NULL; + } + + s->map_track[refs[0]]++; + g_hash_table_insert(xgt->active_maps, GINT_TO_POINTER(refs[0]), act); + } + + return act->virtaddr; +} + +static gboolean do_unmap(gpointer key, gpointer value, gpointer user_data) +{ + XenGnttabState *s = user_data; + grant_ref_t gref = GPOINTER_TO_INT(key); + struct active_ref *act = value; + + gnt_unref(s, gref, &act->mrs, act->prot); + g_free(act); + return true; +} + +static int xen_be_gnttab_unmap(struct xengntdev_handle *xgt, + void *start_address, uint32_t *refs, + uint32_t count) +{ + XenGnttabState *s = xen_gnttab_singleton; + struct active_ref *act; + + if (!s) { + return -ENOTSUP; + } + + if (count != 1) { + return -EINVAL; + } + + QEMU_LOCK_GUARD(&s->gnt_lock); + + act = g_hash_table_lookup(xgt->active_maps, GINT_TO_POINTER(refs[0])); + if (!act) { + return -ENOENT; + } + + if (act->virtaddr != start_address) { + return -EINVAL; + } + + if (!--act->refcnt) { + do_unmap(GINT_TO_POINTER(refs[0]), act, s); + g_hash_table_remove(xgt->active_maps, GINT_TO_POINTER(refs[0])); + } + + return 0; +} + +/* + * This looks a bit like the one for true Xen in xen-operations.c but + * in emulation we don't support multi-page mappings. And under Xen we + * *want* the multi-page mappings so we have fewer bounces through the + * kernel and the hypervisor. So the code paths end up being similar, + * but different. + */ +static int xen_be_gnttab_copy(struct xengntdev_handle *xgt, bool to_domain, + uint32_t domid, XenGrantCopySegment *segs, + uint32_t nr_segs, Error **errp) +{ + int prot = to_domain ? PROT_WRITE : PROT_READ; + unsigned int i; + + for (i = 0; i < nr_segs; i++) { + XenGrantCopySegment *seg = &segs[i]; + void *page; + uint32_t ref = to_domain ? seg->dest.foreign.ref : + seg->source.foreign.ref; + + page = xen_be_gnttab_map_refs(xgt, 1, domid, &ref, prot); + if (!page) { + if (errp) { + error_setg_errno(errp, errno, + "xen_be_gnttab_map_refs failed"); + } + return -errno; + } + + if (to_domain) { + memcpy(page + seg->dest.foreign.offset, seg->source.virt, + seg->len); + } else { + memcpy(seg->dest.virt, page + seg->source.foreign.offset, + seg->len); + } + + if (xen_be_gnttab_unmap(xgt, page, &ref, 1)) { + if (errp) { + error_setg_errno(errp, errno, "xen_be_gnttab_unmap failed"); + } + return -errno; + } + } + + return 0; +} + +static struct xengntdev_handle *xen_be_gnttab_open(void) +{ + struct xengntdev_handle *xgt = g_new0(struct xengntdev_handle, 1); + + xgt->active_maps = g_hash_table_new(g_direct_hash, g_direct_equal); + return xgt; +} + +static int xen_be_gnttab_close(struct xengntdev_handle *xgt) +{ + XenGnttabState *s = xen_gnttab_singleton; + + if (!s) { + return -ENOTSUP; + } + + g_hash_table_foreach_remove(xgt->active_maps, do_unmap, s); + g_hash_table_destroy(xgt->active_maps); + g_free(xgt); + return 0; +} + +static struct gnttab_backend_ops emu_gnttab_backend_ops = { + .open = xen_be_gnttab_open, + .close = xen_be_gnttab_close, + .grant_copy = xen_be_gnttab_copy, + .set_max_grants = xen_be_gnttab_set_max_grants, + .map_refs = xen_be_gnttab_map_refs, + .unmap = xen_be_gnttab_unmap, +}; + -- cgit v1.1 From 032475127225e5949c021dcb1dfcc0ffec400157 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 19 Jan 2023 00:04:31 +0000 Subject: hw/xen: Add emulated implementation of XenStore operations Now that we have an internal implementation of XenStore, we can populate the xenstore_backend_ops to allow PV backends to talk to it. Watches can't be processed with immediate callbacks because that would call back into XenBus code recursively. Defer them to a QEMUBH to be run as appropriate from the main loop. We use a QEMUBH per XS handle, and it walks all the watches (there shouldn't be many per handle) to fire any which have pending events. We *could* have done it differently but this allows us to use the same struct watch_event as we have for the guest side, and keeps things relatively simple. Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant --- hw/i386/kvm/xen_xenstore.c | 273 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 269 insertions(+), 4 deletions(-) (limited to 'hw') diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c index 35898e9..bf466c7 100644 --- a/hw/i386/kvm/xen_xenstore.c +++ b/hw/i386/kvm/xen_xenstore.c @@ -49,7 +49,7 @@ struct XenXenstoreState { /*< public >*/ XenstoreImplState *impl; - GList *watch_events; + GList *watch_events; /* for the guest */ MemoryRegion xenstore_page; struct xenstore_domain_interface *xs; @@ -73,6 +73,8 @@ struct XenXenstoreState *xen_xenstore_singleton; static void xen_xenstore_event(void *opaque); static void fire_watch_cb(void *opaque, const char *path, const char *token); +static struct xenstore_backend_ops emu_xenstore_backend_ops; + static void G_GNUC_PRINTF (4, 5) relpath_printf(XenXenstoreState *s, GList *perms, const char *relpath, @@ -169,6 +171,8 @@ static void xen_xenstore_realize(DeviceState *dev, Error **errp) relpath_printf(s, perms, "feature", "%s", ""); g_list_free_full(perms, g_free); + + xen_xenstore_ops = &emu_xenstore_backend_ops; } static bool xen_xenstore_is_needed(void *opaque) @@ -1306,6 +1310,15 @@ struct watch_event { char *token; }; +static void free_watch_event(struct watch_event *ev) +{ + if (ev) { + g_free(ev->path); + g_free(ev->token); + g_free(ev); + } +} + static void queue_watch(XenXenstoreState *s, const char *path, const char *token) { @@ -1352,9 +1365,7 @@ static void process_watch_events(XenXenstoreState *s) deliver_watch(s, ev->path, ev->token); s->watch_events = g_list_remove(s->watch_events, ev); - g_free(ev->path); - g_free(ev->token); - g_free(ev); + free_watch_event(ev); } static void xen_xenstore_event(void *opaque) @@ -1444,3 +1455,257 @@ int xen_xenstore_reset(void) return 0; } + +struct qemu_xs_handle { + XenstoreImplState *impl; + GList *watches; + QEMUBH *watch_bh; +}; + +struct qemu_xs_watch { + struct qemu_xs_handle *h; + char *path; + xs_watch_fn fn; + void *opaque; + GList *events; +}; + +static char *xs_be_get_domain_path(struct qemu_xs_handle *h, unsigned int domid) +{ + return g_strdup_printf("/local/domain/%u", domid); +} + +static char **xs_be_directory(struct qemu_xs_handle *h, xs_transaction_t t, + const char *path, unsigned int *num) +{ + GList *items = NULL, *l; + unsigned int i = 0; + char **items_ret; + int err; + + err = xs_impl_directory(h->impl, DOMID_QEMU, t, path, NULL, &items); + if (err) { + errno = err; + return NULL; + } + + items_ret = g_new0(char *, g_list_length(items) + 1); + *num = 0; + for (l = items; l; l = l->next) { + items_ret[i++] = l->data; + (*num)++; + } + g_list_free(items); + return items_ret; +} + +static void *xs_be_read(struct qemu_xs_handle *h, xs_transaction_t t, + const char *path, unsigned int *len) +{ + GByteArray *data = g_byte_array_new(); + bool free_segment = false; + int err; + + err = xs_impl_read(h->impl, DOMID_QEMU, t, path, data); + if (err) { + free_segment = true; + errno = err; + } else { + if (len) { + *len = data->len; + } + /* The xen-bus-helper code expects to get NUL terminated string! */ + g_byte_array_append(data, (void *)"", 1); + } + + return g_byte_array_free(data, free_segment); +} + +static bool xs_be_write(struct qemu_xs_handle *h, xs_transaction_t t, + const char *path, const void *data, unsigned int len) +{ + GByteArray *gdata = g_byte_array_new(); + int err; + + g_byte_array_append(gdata, data, len); + err = xs_impl_write(h->impl, DOMID_QEMU, t, path, gdata); + g_byte_array_unref(gdata); + if (err) { + errno = err; + return false; + } + return true; +} + +static bool xs_be_create(struct qemu_xs_handle *h, xs_transaction_t t, + unsigned int owner, unsigned int domid, + unsigned int perms, const char *path) +{ + g_autoptr(GByteArray) data = g_byte_array_new(); + GList *perms_list = NULL; + int err; + + /* mkdir does this */ + err = xs_impl_read(h->impl, DOMID_QEMU, t, path, data); + if (err == ENOENT) { + err = xs_impl_write(h->impl, DOMID_QEMU, t, path, data); + } + if (err) { + errno = err; + return false; + } + + perms_list = g_list_append(perms_list, + xs_perm_as_string(XS_PERM_NONE, owner)); + perms_list = g_list_append(perms_list, + xs_perm_as_string(perms, domid)); + + err = xs_impl_set_perms(h->impl, DOMID_QEMU, t, path, perms_list); + g_list_free_full(perms_list, g_free); + if (err) { + errno = err; + return false; + } + return true; +} + +static bool xs_be_destroy(struct qemu_xs_handle *h, xs_transaction_t t, + const char *path) +{ + int err = xs_impl_rm(h->impl, DOMID_QEMU, t, path); + if (err) { + errno = err; + return false; + } + return true; +} + +static void be_watch_bh(void *_h) +{ + struct qemu_xs_handle *h = _h; + GList *l; + + for (l = h->watches; l; l = l->next) { + struct qemu_xs_watch *w = l->data; + + while (w->events) { + struct watch_event *ev = w->events->data; + + w->fn(w->opaque, ev->path); + + w->events = g_list_remove(w->events, ev); + free_watch_event(ev); + } + } +} + +static void xs_be_watch_cb(void *opaque, const char *path, const char *token) +{ + struct watch_event *ev = g_new0(struct watch_event, 1); + struct qemu_xs_watch *w = opaque; + + /* We don't care about the token */ + ev->path = g_strdup(path); + w->events = g_list_append(w->events, ev); + + qemu_bh_schedule(w->h->watch_bh); +} + +static struct qemu_xs_watch *xs_be_watch(struct qemu_xs_handle *h, + const char *path, xs_watch_fn fn, + void *opaque) +{ + struct qemu_xs_watch *w = g_new0(struct qemu_xs_watch, 1); + int err; + + w->h = h; + w->fn = fn; + w->opaque = opaque; + + err = xs_impl_watch(h->impl, DOMID_QEMU, path, NULL, xs_be_watch_cb, w); + if (err) { + errno = err; + g_free(w); + return NULL; + } + + w->path = g_strdup(path); + h->watches = g_list_append(h->watches, w); + return w; +} + +static void xs_be_unwatch(struct qemu_xs_handle *h, struct qemu_xs_watch *w) +{ + xs_impl_unwatch(h->impl, DOMID_QEMU, w->path, NULL, xs_be_watch_cb, w); + + h->watches = g_list_remove(h->watches, w); + g_list_free_full(w->events, (GDestroyNotify)free_watch_event); + g_free(w->path); + g_free(w); +} + +static xs_transaction_t xs_be_transaction_start(struct qemu_xs_handle *h) +{ + unsigned int new_tx = XBT_NULL; + int err = xs_impl_transaction_start(h->impl, DOMID_QEMU, &new_tx); + if (err) { + errno = err; + return XBT_NULL; + } + return new_tx; +} + +static bool xs_be_transaction_end(struct qemu_xs_handle *h, xs_transaction_t t, + bool abort) +{ + int err = xs_impl_transaction_end(h->impl, DOMID_QEMU, t, !abort); + if (err) { + errno = err; + return false; + } + return true; +} + +static struct qemu_xs_handle *xs_be_open(void) +{ + XenXenstoreState *s = xen_xenstore_singleton; + struct qemu_xs_handle *h; + + if (!s && !s->impl) { + errno = -ENOSYS; + return NULL; + } + + h = g_new0(struct qemu_xs_handle, 1); + h->impl = s->impl; + + h->watch_bh = aio_bh_new(qemu_get_aio_context(), be_watch_bh, h); + + return h; +} + +static void xs_be_close(struct qemu_xs_handle *h) +{ + while (h->watches) { + struct qemu_xs_watch *w = h->watches->data; + xs_be_unwatch(h, w); + } + + qemu_bh_delete(h->watch_bh); + g_free(h); +} + +static struct xenstore_backend_ops emu_xenstore_backend_ops = { + .open = xs_be_open, + .close = xs_be_close, + .get_domain_path = xs_be_get_domain_path, + .directory = xs_be_directory, + .read = xs_be_read, + .write = xs_be_write, + .create = xs_be_create, + .destroy = xs_be_destroy, + .watch = xs_be_watch, + .unwatch = xs_be_unwatch, + .transaction_start = xs_be_transaction_start, + .transaction_end = xs_be_transaction_end, +}; -- cgit v1.1 From d05864d23b1aa3263cd645e1dd881b543b0ad447 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 7 Jan 2023 13:54:07 +0000 Subject: hw/xen: Map guest XENSTORE_PFN grant in emulated Xenstore We don't actually access the guest's page through the grant, because this isn't real Xen, and we can just use the page we gave it in the first place. Map the grant anyway, mostly for cosmetic purposes so it *looks* like it's in use in the guest-visible grant table. Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant --- hw/i386/kvm/xen_xenstore.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'hw') diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c index bf466c7..2cadafd 100644 --- a/hw/i386/kvm/xen_xenstore.c +++ b/hw/i386/kvm/xen_xenstore.c @@ -21,6 +21,7 @@ #include "hw/sysbus.h" #include "hw/xen/xen.h" +#include "hw/xen/xen_backend_ops.h" #include "xen_overlay.h" #include "xen_evtchn.h" #include "xen_xenstore.h" @@ -34,6 +35,7 @@ #include "hw/xen/interface/io/xs_wire.h" #include "hw/xen/interface/event_channel.h" +#include "hw/xen/interface/grant_table.h" #define TYPE_XEN_XENSTORE "xen-xenstore" OBJECT_DECLARE_SIMPLE_TYPE(XenXenstoreState, XEN_XENSTORE) @@ -66,6 +68,9 @@ struct XenXenstoreState { uint8_t *impl_state; uint32_t impl_state_size; + + struct xengntdev_handle *gt; + void *granted_xs; }; struct XenXenstoreState *xen_xenstore_singleton; @@ -1453,6 +1458,17 @@ int xen_xenstore_reset(void) } s->be_port = err; + /* + * We don't actually access the guest's page through the grant, because + * this isn't real Xen, and we can just use the page we gave it in the + * first place. Map the grant anyway, mostly for cosmetic purposes so + * it *looks* like it's in use in the guest-visible grant table. + */ + s->gt = qemu_xen_gnttab_open(); + uint32_t xs_gntref = GNTTAB_RESERVED_XENSTORE; + s->granted_xs = qemu_xen_gnttab_map_refs(s->gt, 1, xen_domid, &xs_gntref, + PROT_READ | PROT_WRITE); + return 0; } -- cgit v1.1 From de26b26197895857631863e6dea575b91e86f6d5 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 10 Jan 2023 01:09:04 +0000 Subject: hw/xen: Implement soft reset for emulated gnttab This is only part of it; we will also need to get the PV back end drivers to tear down their own mappings (or do it for them, but they kind of need to stop using the pointers too). Some more work on the actual PV back ends and xen-bus code is going to be needed to really make soft reset and migration fully functional, and this part is the basis for that. Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant --- hw/i386/kvm/xen_gnttab.c | 26 ++++++++++++++++++++++++-- hw/i386/kvm/xen_gnttab.h | 1 + 2 files changed, 25 insertions(+), 2 deletions(-) (limited to 'hw') diff --git a/hw/i386/kvm/xen_gnttab.c b/hw/i386/kvm/xen_gnttab.c index 2bf91d3..21c30e3 100644 --- a/hw/i386/kvm/xen_gnttab.c +++ b/hw/i386/kvm/xen_gnttab.c @@ -72,13 +72,11 @@ static void xen_gnttab_realize(DeviceState *dev, Error **errp) error_setg(errp, "Xen grant table support is for Xen emulation"); return; } - s->nr_frames = 0; s->max_frames = kvm_xen_get_gnttab_max_frames(); memory_region_init_ram(&s->gnt_frames, OBJECT(dev), "xen:grant_table", XEN_PAGE_SIZE * s->max_frames, &error_abort); memory_region_set_enabled(&s->gnt_frames, true); s->entries.v1 = memory_region_get_ram_ptr(&s->gnt_frames); - memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames); /* Create individual page-sizes aliases for overlays */ s->gnt_aliases = (void *)g_new0(MemoryRegion, s->max_frames); @@ -90,8 +88,11 @@ static void xen_gnttab_realize(DeviceState *dev, Error **errp) s->gnt_frame_gpas[i] = INVALID_GPA; } + s->nr_frames = 0; + memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames); s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access; s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE); + qemu_mutex_init(&s->gnt_lock); xen_gnttab_singleton = s; @@ -523,3 +524,24 @@ static struct gnttab_backend_ops emu_gnttab_backend_ops = { .unmap = xen_be_gnttab_unmap, }; +int xen_gnttab_reset(void) +{ + XenGnttabState *s = xen_gnttab_singleton; + + if (!s) { + return -ENOTSUP; + } + + QEMU_LOCK_GUARD(&s->gnt_lock); + + s->nr_frames = 0; + + memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames); + + s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access; + s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE); + + memset(s->map_track, 0, s->max_frames * ENTRIES_PER_FRAME_V1); + + return 0; +} diff --git a/hw/i386/kvm/xen_gnttab.h b/hw/i386/kvm/xen_gnttab.h index 3bdbe96..ee21523 100644 --- a/hw/i386/kvm/xen_gnttab.h +++ b/hw/i386/kvm/xen_gnttab.h @@ -13,6 +13,7 @@ #define QEMU_XEN_GNTTAB_H void xen_gnttab_create(void); +int xen_gnttab_reset(void); int xen_gnttab_map_page(uint64_t idx, uint64_t gfn); struct gnttab_set_version; -- cgit v1.1 From a78c54c4f92c38d32211990b7b23b417fbfde8d1 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 15 Feb 2023 16:10:00 +0100 Subject: i386/xen: Initialize Xen backends from pc_basic_device_init() for emulation Now that all the work is done to enable the PV backends to work without actual Xen, instantiate the bus from pc_basic_device_init() for emulated mode. This allows us finally to launch an emulated Xen guest with PV disk. qemu-system-x86_64 -serial mon:stdio -M q35 -cpu host -display none \ -m 1G -smp 2 -accel kvm,xen-version=0x4000a,kernel-irqchip=split \ -kernel bzImage -append "console=ttyS0 root=/dev/xvda1" \ -drive file=/var/lib/libvirt/images/fedora28.qcow2,if=none,id=disk \ -device xen-disk,drive=disk,vdev=xvda If we use -M pc instead of q35, we can even add an IDE disk and boot a guest image normally through grub. But q35 gives us AHCI and that isn't unplugged by the Xen magic, so the guests ends up seeing "both" disks. Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant --- hw/i386/pc.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'hw') diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 7bebea5..1489abf 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -102,6 +102,11 @@ #include "trace.h" #include CONFIG_DEVICES +#ifdef CONFIG_XEN_EMU +#include "hw/xen/xen-legacy-backend.h" +#include "hw/xen/xen-bus.h" +#endif + /* * Helper for setting model-id for CPU models that changed model-id * depending on QEMU versions up to QEMU 2.4. @@ -1318,6 +1323,8 @@ void pc_basic_device_init(struct PCMachineState *pcms, if (pcms->bus) { pci_create_simple(pcms->bus, -1, "xen-platform"); } + xen_bus_init(); + xen_be_init(); } #endif -- cgit v1.1